From 1f2fcd3a0c21fa2e35d58b9eaa316b0bdd584fc3 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 29 Dec 2025 21:22:21 +0000
Subject: [PATCH 1/3] Initial plan


From 2f063f0189ec29a7440d013ea6dc4c46ff4a4117 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 29 Dec 2025 21:30:31 +0000
Subject: [PATCH 2/3] Fix column mapping error when _use_pf_client=True with
 target function

Co-authored-by: luigiw <1483379+luigiw@users.noreply.github.com>
---
 .../ai/evaluation/_evaluate/_evaluate.py      | 14 +++++-
 .../data/test_column_mapping_pf_client.csv    |  4 ++
 .../tests/unittests/test_evaluate.py          | 46 +++++++++++++++++++
 3 files changed, 62 insertions(+), 2 deletions(-)
 create mode 100644 sdk/evaluation/azure-ai-evaluation/tests/unittests/data/test_column_mapping_pf_client.csv

diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
index 10e96867254c..b2f3470bc66b 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
@@ -1465,13 +1465,23 @@ def get_client_type(evaluate_kwargs: Dict[str, Any]) -> Literal["run_submitter",
 
                 # Update column mappings to use data references instead of run outputs
                 for evaluator_name, mapping in column_mapping.items():
-                    mapped_to_values = set(mapping.values())
+                    # First, convert any existing ${run.outputs.} references to ${data.__outputs.}
+                    # This handles user-provided column mappings that reference target outputs
+                    for map_to_key, map_value in list(mapping.items()):
+                        if "${run.outputs." in map_value:
+                            # Extract the column name from ${run.outputs.column_name}
+                            # and convert to ${data.__outputs.column_name}
+                            new_value = map_value.replace("${run.outputs.", f"${{data.{Prefixes.TSG_OUTPUTS}")
+                            column_mapping[evaluator_name][map_to_key] = new_value
+                    
+                    # Then, add auto-generated mappings for target columns not explicitly mapped
+                    mapped_to_values = set(column_mapping[evaluator_name].values())
                     for col in target_generated_columns:
                         # Use data reference instead of run output to ensure we get all rows
                         target_reference = f"${{data.{Prefixes.TSG_OUTPUTS}{col}}}"
 
                         # We will add our mapping only if customer did not map target output.
-                        if col not in mapping and target_reference not in mapped_to_values:
+                        if col not in column_mapping[evaluator_name] and target_reference not in mapped_to_values:
                             column_mapping[evaluator_name][col] = target_reference
 
                 # Don't pass the target_run since we're now using the complete dataframe
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/data/test_column_mapping_pf_client.csv b/sdk/evaluation/azure-ai-evaluation/tests/unittests/data/test_column_mapping_pf_client.csv
new file mode 100644
index 000000000000..7617893be081
--- /dev/null
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/data/test_column_mapping_pf_client.csv
@@ -0,0 +1,4 @@
+query
+hello
+world
+test
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
index e110eb369369..a0de13459e85 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
@@ -1652,3 +1652,49 @@ def test_log_metrics_and_instance_results_onedp_no_redundant_tags(self, mock_cli
         call_args = mock_client.start_evaluation_run.call_args
         eval_upload = call_args[1]["evaluation"]
         assert eval_upload.tags == tags
+
+    def test_column_mapping_with_target_and_pf_client(self, mock_model_config):
+        """Test that column mapping works correctly when using _use_pf_client=True with a target function.
+        
+        This test validates the fix for the issue where user-provided column mappings with ${target.}
+        references were not being converted to ${data.__outputs.} format when using ProxyClient.
+        """
+        # Create a simple CSV test file
+        test_data_file = _get_file("test_column_mapping_pf_client.csv")
+        
+        # Define a simple target function that returns a response
+        def simple_target(query):
+            return {"response": f"Response to: {query}"}
+        
+        # Define an evaluator that requires both query and response
+        def simple_evaluator(query, response):
+            return {"score": len(response)}
+        
+        # Test with column mapping that uses ${target.response}
+        result = evaluate(
+            data=test_data_file,
+            target=simple_target,
+            evaluators={"test": simple_evaluator},
+            evaluator_config={
+                "default": {
+                    "column_mapping": {
+                        "query": "${data.query}",
+                        "response": "${target.response}",
+                    }
+                }
+            },
+            _use_pf_client=True,
+        )
+        
+        # Verify the evaluation completed successfully
+        assert result is not None
+        assert "rows" in result
+        assert len(result["rows"]) > 0
+        
+        # Verify that the evaluator ran and produced scores
+        row_df = pd.DataFrame(result["rows"])
+        assert "outputs.test.score" in row_df.columns
+        
+        # Verify that all rows have scores (no NaN values from column mapping errors)
+        scores = row_df["outputs.test.score"]
+        assert not scores.isna().any(), "Some evaluations failed due to column mapping errors"

From 9e5957257a3f492be0d1c5fda834e46072b55b00 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 29 Dec 2025 21:33:53 +0000
Subject: [PATCH 3/3] Remove trailing whitespace

Co-authored-by: luigiw <1483379+luigiw@users.noreply.github.com>
---
 .../azure/ai/evaluation/_evaluate/_evaluate.py                  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
index b2f3470bc66b..eb69de087380 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
@@ -1473,7 +1473,7 @@ def get_client_type(evaluate_kwargs: Dict[str, Any]) -> Literal["run_submitter",
                             # and convert to ${data.__outputs.column_name}
                             new_value = map_value.replace("${run.outputs.", f"${{data.{Prefixes.TSG_OUTPUTS}")
                             column_mapping[evaluator_name][map_to_key] = new_value
-                    
+
                     # Then, add auto-generated mappings for target columns not explicitly mapped
                     mapped_to_values = set(column_mapping[evaluator_name].values())
                     for col in target_generated_columns: