Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1465,13 +1465,23 @@ def get_client_type(evaluate_kwargs: Dict[str, Any]) -> Literal["run_submitter",

# Update column mappings to use data references instead of run outputs
for evaluator_name, mapping in column_mapping.items():
mapped_to_values = set(mapping.values())
# First, convert any existing ${run.outputs.} references to ${data.__outputs.}
# This handles user-provided column mappings that reference target outputs
for map_to_key, map_value in list(mapping.items()):
if "${run.outputs." in map_value:
# Extract the column name from ${run.outputs.column_name}
# and convert to ${data.__outputs.column_name}
new_value = map_value.replace("${run.outputs.", f"${{data.{Prefixes.TSG_OUTPUTS}")
column_mapping[evaluator_name][map_to_key] = new_value

# Then, add auto-generated mappings for target columns not explicitly mapped
mapped_to_values = set(column_mapping[evaluator_name].values())
for col in target_generated_columns:
# Use data reference instead of run output to ensure we get all rows
target_reference = f"${{data.{Prefixes.TSG_OUTPUTS}{col}}}"

# We will add our mapping only if customer did not map target output.
if col not in mapping and target_reference not in mapped_to_values:
if col not in column_mapping[evaluator_name] and target_reference not in mapped_to_values:
column_mapping[evaluator_name][col] = target_reference

# Don't pass the target_run since we're now using the complete dataframe
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
query
hello
world
test
Original file line number Diff line number Diff line change
Expand Up @@ -1652,3 +1652,49 @@ def test_log_metrics_and_instance_results_onedp_no_redundant_tags(self, mock_cli
call_args = mock_client.start_evaluation_run.call_args
eval_upload = call_args[1]["evaluation"]
assert eval_upload.tags == tags

def test_column_mapping_with_target_and_pf_client(self, mock_model_config):
"""Test that column mapping works correctly when using _use_pf_client=True with a target function.

This test validates the fix for the issue where user-provided column mappings with ${target.}
references were not being converted to ${data.__outputs.} format when using ProxyClient.
"""
# Create a simple CSV test file
test_data_file = _get_file("test_column_mapping_pf_client.csv")

# Define a simple target function that returns a response
def simple_target(query):
return {"response": f"Response to: {query}"}

# Define an evaluator that requires both query and response
def simple_evaluator(query, response):
return {"score": len(response)}

# Test with column mapping that uses ${target.response}
result = evaluate(
data=test_data_file,
target=simple_target,
evaluators={"test": simple_evaluator},
evaluator_config={
"default": {
"column_mapping": {
"query": "${data.query}",
"response": "${target.response}",
}
}
},
_use_pf_client=True,
)

# Verify the evaluation completed successfully
assert result is not None
assert "rows" in result
assert len(result["rows"]) > 0

# Verify that the evaluator ran and produced scores
row_df = pd.DataFrame(result["rows"])
assert "outputs.test.score" in row_df.columns

# Verify that all rows have scores (no NaN values from column mapping errors)
scores = row_df["outputs.test.score"]
assert not scores.isna().any(), "Some evaluations failed due to column mapping errors"
Loading