Skip to content

Commit 0d5bae1

Browse files
CopilotMte90
andcommitted
Refactor: use named constant for progress interval and math.ceil
Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com>
1 parent 75acf3f commit 0d5bae1

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

ai/analyzer.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import sqlite3
66
import importlib.resources
77
import hashlib
8+
import math
89
from pathlib import Path
910
from typing import Optional, Dict, Any, List
1011

@@ -44,6 +45,7 @@
4445
EMBEDDING_CONCURRENCY = 4
4546
# Increase batch size for parallel processing
4647
EMBEDDING_BATCH_SIZE = 16 # Process embeddings in batches for better throughput
48+
PROGRESS_LOG_INTERVAL = 10 # Log progress every N completed files
4749
_THREADPOOL_WORKERS = max(16, EMBEDDING_CONCURRENCY + 8)
4850
_EXECUTOR = concurrent.futures.ThreadPoolExecutor(max_workers=_THREADPOOL_WORKERS)
4951

@@ -418,7 +420,7 @@ def _process_file_sync(
418420
chunk_tasks.append((idx, chunk_doc))
419421

420422
# Process embeddings in parallel batches for better throughput
421-
num_batches = (len(chunk_tasks) + EMBEDDING_BATCH_SIZE - 1) // EMBEDDING_BATCH_SIZE
423+
num_batches = math.ceil(len(chunk_tasks) / EMBEDDING_BATCH_SIZE)
422424
for batch_num, batch_start in enumerate(range(0, len(chunk_tasks), EMBEDDING_BATCH_SIZE), 1):
423425
batch = chunk_tasks[batch_start:batch_start + EMBEDDING_BATCH_SIZE]
424426

@@ -571,7 +573,7 @@ def analyze_local_path_sync(
571573
with counters[2]:
572574
counters[1] += 1
573575
completed_count = counters[1]
574-
should_log = completed_count % 10 == 0
576+
should_log = completed_count % PROGRESS_LOG_INTERVAL == 0
575577

576578
if isinstance(r, dict):
577579
if r.get("stored"):

0 commit comments

Comments
 (0)