@@ -970,7 +970,9 @@ def evaluate(
970970 self ,
971971 * ,
972972 dataset : Union [
973- types .EvaluationDatasetOrDict , list [types .EvaluationDatasetOrDict ]
973+ pd .DataFrame ,
974+ types .EvaluationDatasetOrDict ,
975+ list [types .EvaluationDatasetOrDict ],
974976 ],
975977 metrics : list [types .MetricOrDict ] = None ,
976978 config : Optional [types .EvaluateMethodConfigOrDict ] = None ,
@@ -979,10 +981,13 @@ def evaluate(
979981 """Evaluates candidate responses in the provided dataset(s) using the specified metrics.
980982
981983 Args:
982- dataset: The dataset(s) to evaluate. Can be a single `types.EvaluationDataset` or a list of `types.EvaluationDataset`.
984+ dataset: The dataset(s) to evaluate. Can be a pandas DataFrame, a single
985+ `types.EvaluationDataset` or a list of `types.EvaluationDataset`.
983986 metrics: The list of metrics to use for evaluation.
984- config: Optional configuration for the evaluation. Can be a dictionary or a `types.EvaluateMethodConfig` object.
985- - dataset_schema: Schema to use for the dataset. If not specified, the dataset schema will be inferred from the dataset automatically.
987+ config: Optional configuration for the evaluation. Can be a dictionary or a
988+ `types.EvaluateMethodConfig` object.
989+ - dataset_schema: Schema to use for the dataset. If not specified, the
990+ dataset schema will be inferred from the dataset automatically.
986991 - dest: Destination path for storing evaluation results.
987992 **kwargs: Extra arguments to pass to evaluation, such as `agent_info`.
988993
@@ -993,6 +998,10 @@ def evaluate(
993998 config = types .EvaluateMethodConfig ()
994999 if isinstance (config , dict ):
9951000 config = types .EvaluateMethodConfig .model_validate (config )
1001+
1002+ if isinstance (dataset , pd .DataFrame ):
1003+ dataset = types .EvaluationDataset (eval_dataset_df = dataset )
1004+
9961005 if isinstance (dataset , list ):
9971006 dataset = [
9981007 (
0 commit comments