Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ readme = { file = "README.md", content-type = "text/markdown" }
requires-python = ">=3.11"
dependencies = [
"uipath-core>=0.1.4, <0.2.0",
"uipath-runtime>=0.3.1, <0.4.0",
"uipath-runtime>=0.3.4, <0.4.0",
"click>=8.3.1",
"httpx>=0.28.1",
"pyjwt>=2.10.1",
Expand Down
167 changes: 0 additions & 167 deletions src/uipath/_cli/_evals/_configurable_factory.py

This file was deleted.

52 changes: 31 additions & 21 deletions src/uipath/_cli/_evals/_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@
from ...eval.models.models import AgentExecution, EvalItemResult
from .._utils._eval_set import EvalHelpers
from .._utils._parallelization import execute_parallel
from ._configurable_factory import ConfigurableRuntimeFactory
from ._evaluator_factory import EvaluatorFactory
from ._models._evaluation_set import (
EvaluationItem,
Expand Down Expand Up @@ -199,8 +198,7 @@ def __init__(
event_bus: EventBus,
):
self.context: UiPathEvalContext = context
# Wrap the factory to support model settings overrides
self.factory = ConfigurableRuntimeFactory(factory)
self.factory: UiPathRuntimeFactoryProtocol = factory
self.event_bus: EventBus = event_bus
self.trace_manager: UiPathTraceManager = trace_manager
self.span_exporter: ExecutionSpanExporter = ExecutionSpanExporter()
Expand All @@ -225,10 +223,6 @@ async def __aexit__(self, *args: Any) -> None:
self.coverage.stop()
self.coverage.report(include=["./*"], show_missing=True)

# Clean up any temporary files created by the factory
if hasattr(self.factory, "dispose"):
await self.factory.dispose()

async def get_schema(self, runtime: UiPathRuntimeProtocol) -> UiPathRuntimeSchema:
schema = await runtime.get_schema()
if schema is None:
Expand Down Expand Up @@ -290,9 +284,6 @@ async def initiate_evaluation(
)

async def execute(self) -> UiPathRuntimeResult:
# Configure model settings override before creating runtime
await self._configure_model_settings_override()

runtime = await self.factory.new_runtime(
entrypoint=self.context.entrypoint or "",
runtime_id=self.execution_id,
Comment on lines 286 to 289

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Honor model settings override when creating runtime

The execute() path now constructs the runtime directly (factory.new_runtime at lines 286‑289) but no longer applies the requested model settings override. _get_model_settings_override() (lines 554‑601) is never invoked, so the --model-settings-id option wired through cli_eval is a no-op and evaluation runs always use the agent’s default settings. Users selecting a non‑default model/temperature in the evaluation set will silently execute against the wrong model.

Useful? React with 👍 / 👎.

Expand Down Expand Up @@ -560,14 +551,21 @@ def _get_and_clear_execution_data(

return spans, logs

async def _configure_model_settings_override(self) -> None:
"""Configure the factory with model settings override if specified."""
# Skip if no model settings ID specified
def _get_model_settings_override(
self,
) -> dict[str, Any] | None:
"""Get model settings override from evaluation set if specified.

Returns:
Model settings dict to use for override, or None if using defaults.
Settings are passed via schema.metadata to the runtime.
"""
# Skip if no model settings ID specified or using default
if (
not self.context.model_settings_id
or self.context.model_settings_id == "default"
):
return
return None

# Load evaluation set to get model settings
evaluation_set, _ = EvalHelpers.load_eval_set(self.context.eval_set or "")
Expand All @@ -576,7 +574,7 @@ async def _configure_model_settings_override(self) -> None:
or not evaluation_set.model_settings
):
logger.warning("No model settings available in evaluation set")
return
return None

# Find the specified model settings
target_model_settings = next(
Expand All @@ -592,15 +590,15 @@ async def _configure_model_settings_override(self) -> None:
logger.warning(
f"Model settings ID '{self.context.model_settings_id}' not found in evaluation set"
)
return
return None

logger.info(
f"Configuring model settings override: id='{target_model_settings.id}', "
f"Using model settings override: id='{target_model_settings.id}', "
f"model='{target_model_settings.model}', temperature='{target_model_settings.temperature}'"
)

# Configure the factory with the override settings
self.factory.set_model_settings_override(target_model_settings)
# Return settings as dict for schema.metadata override
return target_model_settings.model_dump(exclude_none=True)

async def execute_runtime(
self,
Expand Down Expand Up @@ -687,15 +685,27 @@ async def run_evaluator(
return result

async def _get_agent_model(self, runtime: UiPathRuntimeProtocol) -> str | None:
"""Get agent model from the runtime.
"""Get agent model from the runtime schema metadata.

The model is read from schema.metadata["settings"]["model"] which is
populated by the low-code agents runtime from agent.json.

Returns:
The model name from agent settings, or None if not found.
"""
try:
schema = await self.get_schema(runtime)
if schema.metadata and "settings" in schema.metadata:
settings = schema.metadata["settings"]
model = settings.get("model")
if model:
logger.debug(f"Got agent model from schema.metadata: {model}")
return model

# Fallback to protocol-based approach for backwards compatibility
model = self._find_agent_model_in_runtime(runtime)
if model:
logger.debug(f"Got agent model from runtime: {model}")
logger.debug(f"Got agent model from runtime protocol: {model}")
return model
except Exception:
return None
Expand Down
Loading