diff --git a/openevolve/database.py b/openevolve/database.py index eca5eab0b..55047b6de 100644 --- a/openevolve/database.py +++ b/openevolve/database.py @@ -861,6 +861,7 @@ def _calculate_feature_coords(self, program: Program) -> List[int]: # Use code length as complexity measure complexity = len(program.code) bin_idx = self._calculate_complexity_bin(complexity) + program.complexity = bin_idx # Store complexity bin in program coords.append(bin_idx) elif dim == "diversity": # Use cached diversity calculation with reference set @@ -869,6 +870,7 @@ def _calculate_feature_coords(self, program: Program) -> List[int]: else: diversity = self._get_cached_diversity(program) bin_idx = self._calculate_diversity_bin(diversity) + program.diversity = bin_idx # Store diversity bin in program coords.append(bin_idx) elif dim == "score": # Use average of numeric metrics diff --git a/openevolve/evaluator.py b/openevolve/evaluator.py index b1142ece5..582701954 100644 --- a/openevolve/evaluator.py +++ b/openevolve/evaluator.py @@ -208,9 +208,10 @@ async def evaluate_program( if "combined_score" in eval_result.metrics: # Original combined_score is just accuracy accuracy = eval_result.metrics["combined_score"] - # Combine with LLM average (70% accuracy, 30% LLM quality) + # Combine accuracy with LLM average using dynamic weighting: + # (1 - llm_feedback_weight) * accuracy + llm_feedback_weight * LLM quality eval_result.metrics["combined_score"] = ( - accuracy * 0.7 + llm_average * 0.3 + accuracy * (1-self.config.llm_feedback_weight) + llm_average * self.config.llm_feedback_weight ) # Store artifacts if enabled and present