From 39fd7b6bb128a9256346c1ff7fe23e1e914206d9 Mon Sep 17 00:00:00 2001
From: HenriqueAssumpcao <henriquesas2010@gmail.com>
Date: Mon, 9 Feb 2026 12:52:43 +0000
Subject: [PATCH] add model_id tracking

---
 openevolve/database.py              |  5 ++-
 openevolve/iteration.py             |  3 +-
 openevolve/llm/ensemble.py          | 68 +++++++++++++++++++++--------
 openevolve/process_parallel.py      |  3 +-
 tests/test_llm_ensemble.py          | 11 +++--
 tests/test_novelty_asyncio_issue.py |  6 +--
 6 files changed, 67 insertions(+), 29 deletions(-)

diff --git a/openevolve/database.py b/openevolve/database.py
index eca5eab0b..a1f271dd1 100644
--- a/openevolve/database.py
+++ b/openevolve/database.py
@@ -55,6 +55,7 @@ class Program:
     generation: int = 0
     timestamp: float = field(default_factory=time.time)
     iteration_found: int = 0  # Track which iteration this program was found
+    model_id: Optional[int] = None # Track the id of the model that generated this program
 
     # Performance metrics
     metrics: Dict[str, float] = field(default_factory=dict)
@@ -1016,10 +1017,10 @@ def _llm_judge_novelty(self, program: Program, similar_program: Program) -> bool
                             messages=[{"role": "user", "content": user_msg}],
                         ),
                     )
-                    content: str = future.result()
+                    content, _model_id = future.result()
             except RuntimeError:
                 # No event loop running, safe to use asyncio.run()
-                content: str = asyncio.run(
+                content, _model_id = asyncio.run(
                     self.novelty_llm.generate_with_context(
                         system_message=NOVELTY_SYSTEM_MSG,
                         messages=[{"role": "user", "content": user_msg}],
diff --git a/openevolve/iteration.py b/openevolve/iteration.py
index 7afaff75b..68dce60ae 100644
--- a/openevolve/iteration.py
+++ b/openevolve/iteration.py
@@ -89,7 +89,7 @@ async def run_iteration_with_shared_db(
         iteration_start = time.time()
 
         # Generate code modification
-        llm_response = await llm_ensemble.generate_with_context(
+        llm_response, model_id = await llm_ensemble.generate_with_context(
             system_message=prompt["system"],
             messages=[{"role": "user", "content": prompt["user"]}],
         )
@@ -181,6 +181,7 @@ async def run_iteration_with_shared_db(
             generation=parent.generation + 1,
             metrics=result.child_metrics,
             iteration_found=iteration,
+            model_id=model_id,
             metadata={
                 "changes": changes_summary,
                 "parent_metrics": parent.metrics,
diff --git a/openevolve/llm/ensemble.py b/openevolve/llm/ensemble.py
index e3c471673..c0b7969b3 100644
--- a/openevolve/llm/ensemble.py
+++ b/openevolve/llm/ensemble.py
@@ -55,39 +55,71 @@ def __init__(self, models_cfg: List[LLMModelConfig]):
             )
             logger._ensemble_logged = True
 
-    async def generate(self, prompt: str, **kwargs) -> str:
-        """Generate text using a randomly selected model based on weights"""
-        model = self._sample_model()
-        return await model.generate(prompt, **kwargs)
+    async def generate(self, prompt: str, **kwargs) -> Tuple[str, int]:
+        """Generate text using a randomly selected model based on weights
+
+        Returns:
+            Tuple of (generated_text, model_id) where model_id is the index
+            of the selected model in the ensemble
+        """
+        model, model_id = self._sample_model()
+        response = await model.generate(prompt, **kwargs)
+        return response, model_id
 
     async def generate_with_context(
         self, system_message: str, messages: List[Dict[str, str]], **kwargs
-    ) -> str:
-        """Generate text using a system message and conversational context"""
-        model = self._sample_model()
-        return await model.generate_with_context(system_message, messages, **kwargs)
-
-    def _sample_model(self) -> LLMInterface:
-        """Sample a model from the ensemble based on weights"""
+    ) -> Tuple[str, int]:
+        """Generate text using a system message and conversational context
+
+        Returns:
+            Tuple of (generated_text, model_id) where model_id is the index
+            of the selected model in the ensemble
+        """
+        model, model_id = self._sample_model()
+        response = await model.generate_with_context(system_message, messages, **kwargs)
+        return response, model_id
+
+    def _sample_model(self) -> Tuple[LLMInterface, int]:
+        """Sample a model from the ensemble based on weights
+
+        Returns:
+            Tuple of (model, model_id) where model_id is the index of the
+            selected model in the ensemble
+        """
         index = self.random_state.choices(range(len(self.models)), weights=self.weights, k=1)[0]
         sampled_model = self.models[index]
         logger.info(f"Sampled model: {vars(sampled_model)['model']}")
-        return sampled_model
+        return sampled_model, index
+
+    async def generate_multiple(self, prompt: str, n: int, **kwargs) -> List[Tuple[str, int]]:
+        """Generate multiple texts in parallel
 
-    async def generate_multiple(self, prompt: str, n: int, **kwargs) -> List[str]:
-        """Generate multiple texts in parallel"""
+        Returns:
+            List of (generated_text, model_id) tuples where model_id is the
+            index of the selected model in the ensemble
+        """
         tasks = [self.generate(prompt, **kwargs) for _ in range(n)]
         return await asyncio.gather(*tasks)
 
-    async def parallel_generate(self, prompts: List[str], **kwargs) -> List[str]:
-        """Generate responses for multiple prompts in parallel"""
+    async def parallel_generate(self, prompts: List[str], **kwargs) -> List[Tuple[str, int]]:
+        """Generate responses for multiple prompts in parallel
+
+        Returns:
+            List of (generated_text, model_id) tuples where model_id is the
+            index of the selected model in the ensemble
+        """
         tasks = [self.generate(prompt, **kwargs) for prompt in prompts]
         return await asyncio.gather(*tasks)
 
     async def generate_all_with_context(
         self, system_message: str, messages: List[Dict[str, str]], **kwargs
-    ) -> str:
-        """Generate text using a all available models and average their returned metrics"""
+    ) -> List[str]:
+        """Generate text using all available models and average their returned metrics
+
+        Returns:
+            List of generated texts, one per model in the ensemble (order matches
+            self.models). The model_id for each response is its index in the list.
+        """
         responses = []
         for model in self.models:
             responses.append(await model.generate_with_context(system_message, messages, **kwargs))
diff --git a/openevolve/process_parallel.py b/openevolve/process_parallel.py
index a2fd6592a..bc362ec92 100644
--- a/openevolve/process_parallel.py
+++ b/openevolve/process_parallel.py
@@ -197,7 +197,7 @@ def _run_iteration_worker(
 
         # Generate code modification (sync wrapper for async)
         try:
-            llm_response = asyncio.run(
+            llm_response, model_id = asyncio.run(
                 _worker_llm_ensemble.generate_with_context(
                     system_message=prompt["system"],
                     messages=[{"role": "user", "content": prompt["user"]}],
@@ -304,6 +304,7 @@ def _run_iteration_worker(
             generation=parent.generation + 1,
             metrics=child_metrics,
             iteration_found=iteration,
+            model_id=model_id,
             metadata={
                 "changes": changes_summary,
                 "parent_metrics": parent.metrics,
diff --git a/tests/test_llm_ensemble.py b/tests/test_llm_ensemble.py
index 7c11baea0..3f0c6d7c8 100644
--- a/tests/test_llm_ensemble.py
+++ b/tests/test_llm_ensemble.py
@@ -17,7 +17,9 @@ def test_weighted_sampling(self):
         ensemble = LLMEnsemble(models)
         # Should always sample model 'b'
         for _ in range(10):
-            self.assertEqual(ensemble._sample_model().model, "b")
+            model, model_id = ensemble._sample_model()
+            self.assertEqual(model.model, "b")
+            self.assertEqual(model_id, 1)
 
         models = [
             LLMModelConfig(name="a", weight=0.3, api_key="test", api_base="http://test"),
@@ -25,11 +27,12 @@ def test_weighted_sampling(self):
             LLMModelConfig(name="c", weight=0.3, api_key="test", api_base="http://test"),
         ]
         ensemble = LLMEnsemble(models)
-        # Should sample both models. Track sampled models in a set
+        # Should sample all models. Track sampled models in a set
         sampled_models = set()
         for _ in range(1000):
-            sampled_models.add(ensemble._sample_model().model)
-            # Cancel once we have both models
+            model, model_id = ensemble._sample_model()
+            sampled_models.add(model.model)
+            # Cancel once we have all models
             if len(sampled_models) == len(models):
                 break
         self.assertEqual(len(sampled_models), len(models))
diff --git a/tests/test_novelty_asyncio_issue.py b/tests/test_novelty_asyncio_issue.py
index 46fb03475..85ff9df59 100644
--- a/tests/test_novelty_asyncio_issue.py
+++ b/tests/test_novelty_asyncio_issue.py
@@ -15,11 +15,11 @@
 
 
 class MockLLM:
-    """Mock LLM that implements the async interface"""
+    """Mock LLM that implements the LLMEnsemble async interface"""
 
     async def generate_with_context(self, system_message: str, messages: list):
-        """Mock async generate method that returns NOVEL"""
-        return "NOVEL"
+        """Mock async generate method that returns NOVEL with model_id"""
+        return "NOVEL", 0
 
 
 class TestNoveltyAsyncioIssue(unittest.TestCase):