Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
import csv
import json
import logging
import os
Expand Down Expand Up @@ -479,7 +480,11 @@ def __init__(self, filename: Union[os.PathLike, str]):
self.filename = filename

def load(self) -> pd.DataFrame:
return pd.read_csv(self.filename, dtype=str)
# Use QUOTE_NONE to preserve quotation marks as literal characters in cell values.
# By default, pandas treats quotes as CSV field delimiters and strips them.
# This ensures that values like "test" are read as "test" (with quotes), not test.
# The escapechar allows escaping special characters like commas within values.
return pd.read_csv(self.filename, dtype=str, quoting=csv.QUOTE_NONE, escapechar="\\")


class DataLoaderFactory:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
response,ground_truth
test,"test"
"quoted",quoted
start,"""end"
Original file line number Diff line number Diff line change
Expand Up @@ -1652,3 +1652,58 @@ def test_log_metrics_and_instance_results_onedp_no_redundant_tags(self, mock_cli
call_args = mock_client.start_evaluation_run.call_args
eval_upload = call_args[1]["evaluation"]
assert eval_upload.tags == tags

def test_csv_preserves_quotes_in_values(self):
"""Test that CSV loading preserves quotes in cell values.

This test validates the fix for the issue where custom code evaluators
were dropping leading and trailing quotation marks from parameter values.
The issue occurs when a CSV cell value starts AND ends with quotes.

Test CSV contains:
- Row 0: test,"test" - unquoted vs quoted value
- Row 1: "quoted",quoted - quoted vs unquoted value
- Row 2: start,"""end" - tests that even multiple quotes are preserved as literals

With QUOTE_NONE, all quotes are treated as literal characters, not delimiters.
"""
# Get the test CSV file
csv_file = _get_file("test_csv_quotes.csv")

# Define a custom evaluator that checks if quotes are preserved
def quote_checker(response: str, ground_truth: str):
"""Custom evaluator that checks if values match exactly."""
return {
"match": 1 if response == ground_truth else 0,
"response_value": response,
"ground_truth_value": ground_truth,
}

# Run evaluation with the custom evaluator
result = evaluate(
data=csv_file,
evaluators={"quote_checker": quote_checker},
)

# Verify the results
assert result is not None
row_result_df = pd.DataFrame(result["rows"])

# Check that we have the expected rows
assert len(row_result_df) == 3

# Row 0: response='test', ground_truth='"test"' - should NOT match
assert row_result_df["outputs.quote_checker.response_value"][0] == "test"
assert row_result_df["outputs.quote_checker.ground_truth_value"][0] == '"test"'
assert row_result_df["outputs.quote_checker.match"][0] == 0

# Row 1: response='"quoted"', ground_truth='quoted' - should NOT match
assert row_result_df["outputs.quote_checker.response_value"][1] == '"quoted"'
assert row_result_df["outputs.quote_checker.ground_truth_value"][1] == "quoted"
assert row_result_df["outputs.quote_checker.match"][1] == 0

# Row 2: response='start', ground_truth='"""end"' - should NOT match
# Note: With QUOTE_NONE, """end" is read as the literal string """end"
assert row_result_df["outputs.quote_checker.response_value"][2] == 'start'
assert row_result_df["outputs.quote_checker.ground_truth_value"][2] == '"""end"'
assert row_result_df["outputs.quote_checker.match"][2] == 0
Loading