Skip to content

Commit 4867684

Browse files
CopilotMte90
andauthored
Fix embedding timeout caused by thread pool deadlock (#14)
Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
1 parent 4b06603 commit 4867684

File tree

2 files changed

+11
-6
lines changed

2 files changed

+11
-6
lines changed

ai/analyzer.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,15 @@
5252
# Increase batch size for parallel processing
5353
EMBEDDING_BATCH_SIZE = 16 # Process embeddings in batches for better throughput
5454
PROGRESS_LOG_INTERVAL = 10 # Log progress every N completed files
55-
EMBEDDING_TIMEOUT = 30 # Timeout in seconds for each embedding API call
55+
# Timeout for future.result() must account for retries: (max_retries + 1) × SDK_timeout + buffer
56+
# With SDK timeout of 15s and max_retries=2, this allows 3 × 15s = 45s + 15s buffer = 60s
57+
EMBEDDING_TIMEOUT = 60 # Timeout in seconds for each embedding API call (including retries)
5658
FILE_PROCESSING_TIMEOUT = 300 # Timeout in seconds for processing a single file (5 minutes)
57-
_THREADPOOL_WORKERS = max(16, EMBEDDING_CONCURRENCY + 8)
58-
_EXECUTOR = concurrent.futures.ThreadPoolExecutor(max_workers=_THREADPOOL_WORKERS)
59+
60+
_FILE_EXECUTOR_WORKERS = 4
61+
_EMBEDDING_EXECUTOR_WORKERS = 4
62+
_FILE_EXECUTOR = concurrent.futures.ThreadPoolExecutor(max_workers=_FILE_EXECUTOR_WORKERS)
63+
_EMBEDDING_EXECUTOR = concurrent.futures.ThreadPoolExecutor(max_workers=_EMBEDDING_EXECUTOR_WORKERS)
5964

6065
logger = get_logger(__name__)
6166

@@ -217,7 +222,7 @@ def _process_file_sync(
217222
for idx, chunk_doc in batch:
218223
# Submit task to executor; semaphore will be acquired inside the worker
219224
embedding_start_time = time.time()
220-
future = _EXECUTOR.submit(_get_embedding_with_semaphore, semaphore, chunk_doc.text, rel_path, idx, embedding_model)
225+
future = _EMBEDDING_EXECUTOR.submit(_get_embedding_with_semaphore, semaphore, chunk_doc.text, rel_path, idx, embedding_model)
221226
embedding_futures.append((idx, chunk_doc, future, embedding_start_time))
222227

223228
# Wait for batch to complete and store results
@@ -397,7 +402,7 @@ def analyze_local_path_sync(
397402
counters[0] += 1
398403
file_num = counters[0]
399404

400-
fut = _EXECUTOR.submit(
405+
fut = _FILE_EXECUTOR.submit(
401406
_process_file_sync,
402407
semaphore,
403408
database_path,

ai/openai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def __init__(self,
122122
api_url: Optional[str] = None,
123123
api_key: Optional[str] = None,
124124
model: Optional[str] = None,
125-
timeout: float = 30.0,
125+
timeout: float = 15.0,
126126
max_retries: int = 2,
127127
backoff: float = 1.5):
128128
self.api_url = api_url or CFG.get("api_url")

0 commit comments

Comments
 (0)