Use EmbeddingClient in llama_integration and remove get_embedding_for_text

Copilot · Mte90 · Copilot · commit d9ea51d02b74 · 2025-11-10T17:58:36.000Z
- Update llama_integration.py to use EmbeddingClient instead of get_embedding_for_text
- Create module-level _embedding_client instance in llama_integration.py
- Remove unused get_embedding_for_text function from openai.py
- All embedding operations now use EmbeddingClient for consistent logging and retry behavior

Co-authored-by: Mte90 &lt;403283+Mte90@users.noreply.github.com&gt;
diff --git a/ai/llama_integration.py b/ai/llama_integration.py
@@ -4,11 +4,14 @@
 from typing import List
 from llama_index.core import Document
 
-from .openai import get_embedding_for_text
+from .openai import EmbeddingClient
 from utils.logger import get_logger
 
 logger = get_logger(__name__)
 
+# Create a module-level embedding client instance
+_embedding_client = EmbeddingClient()
+
 
 def llama_index_retrieve_documents(query: str, database_path: str, top_k: int = 5, 
                                    search_func=None, get_chunk_func=None) -> List[Document]:
@@ -28,7 +31,7 @@ def llama_index_retrieve_documents(query: str, database_path: str, top_k: int =
     if search_func is None or get_chunk_func is None:
         raise ValueError("search_func and get_chunk_func must be provided")
     
-    q_emb = get_embedding_for_text(query)
+    q_emb = _embedding_client.embed_text(query, file_path="<query>", chunk_index=0)
     if not q_emb:
         return []
 
diff --git a/ai/openai.py b/ai/openai.py
@@ -296,26 +296,6 @@ def embed_multiple(self, chunks: List[str], file_path: str = "<unknown>") -> Lis
         return results
 
 
-def get_embedding_for_text(text: str, model: Optional[str] = None):
-    """
-    Return embedding vector (list[float]) using the new OpenAI client.
-    Includes rate limiting, retry logic with exponential backoff, and circuit breaker.
-    model: optional model id; if not provided, uses DEFAULT_EMBEDDING_MODEL from CFG.
-    """
-    model_to_use = model or DEFAULT_EMBEDDING_MODEL
-    if not model_to_use:
-        raise RuntimeError("No embedding model configured. Set EMBEDDING_MODEL in .env or pass model argument.")
-
-    def _get_embedding():
-        resp = _client.embeddings.create(model=model_to_use, input=text)
-        return resp.data[0].embedding
-    
-    try:
-        return _retry_with_backoff(_get_embedding)
-    except Exception as e:
-        raise RuntimeError(f"Failed to obtain embedding from OpenAI client: {e}") from e
-
-
 def call_coding_api(prompt: str, model: Optional[str] = None, max_tokens: int = 1024):
     """
     Call a generative/coding model via the new OpenAI client.