Skip to content

Commit ba30044

Browse files
CopilotMte90
andcommitted
Implement code quality improvements: unified connection and retry utilities
Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com>
1 parent 5bff056 commit ba30044

File tree

4 files changed

+288
-62
lines changed

4 files changed

+288
-62
lines changed

db/connection.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
"""
2+
Unified database connection utilities.
3+
Provides consistent connection management across all database operations.
4+
"""
5+
import os
6+
import sqlite3
7+
from typing import Optional
8+
from contextlib import contextmanager
9+
from utils.logger import get_logger
10+
11+
logger = get_logger(__name__)
12+
13+
14+
def get_db_connection(
15+
db_path: str,
16+
timeout: float = 30.0,
17+
enable_wal: bool = True,
18+
enable_vector: bool = False,
19+
row_factory: bool = True
20+
) -> sqlite3.Connection:
21+
"""
22+
Create a database connection with consistent configuration.
23+
24+
Args:
25+
db_path: Path to the SQLite database file
26+
timeout: Timeout in seconds for waiting on locks (default: 30.0)
27+
enable_wal: Enable Write-Ahead Logging mode (default: True)
28+
enable_vector: Load sqlite-vector extension (default: False)
29+
row_factory: Use sqlite3.Row factory for dict-like access (default: True)
30+
31+
Returns:
32+
sqlite3.Connection object configured for the specified operations
33+
34+
Raises:
35+
RuntimeError: If vector extension fails to load when enable_vector=True
36+
"""
37+
# Create directory if needed
38+
dirname = os.path.dirname(os.path.abspath(db_path))
39+
if dirname and not os.path.isdir(dirname):
40+
os.makedirs(dirname, exist_ok=True)
41+
42+
# Create connection with consistent settings
43+
conn = sqlite3.connect(db_path, timeout=timeout, check_same_thread=False)
44+
45+
if row_factory:
46+
conn.row_factory = sqlite3.Row
47+
48+
# Enable WAL mode for better concurrency
49+
if enable_wal:
50+
try:
51+
conn.execute("PRAGMA journal_mode = WAL;")
52+
except Exception as e:
53+
logger.warning(f"Failed to enable WAL mode: {e}")
54+
55+
# Set busy timeout (milliseconds)
56+
try:
57+
conn.execute(f"PRAGMA busy_timeout = {int(timeout * 1000)};")
58+
except Exception as e:
59+
logger.warning(f"Failed to set busy_timeout: {e}")
60+
61+
# Load vector extension if requested
62+
if enable_vector:
63+
from .vector_operations import load_sqlite_vector_extension
64+
load_sqlite_vector_extension(conn)
65+
logger.debug(f"Vector extension loaded for connection to {db_path}")
66+
67+
return conn
68+
69+
70+
@contextmanager
71+
def db_connection(db_path: str, **kwargs):
72+
"""
73+
Context manager for database connections with automatic cleanup.
74+
75+
Args:
76+
db_path: Path to the SQLite database file
77+
**kwargs: Additional arguments passed to get_db_connection()
78+
79+
Yields:
80+
sqlite3.Connection object
81+
82+
Example:
83+
with db_connection(db_path) as conn:
84+
cur = conn.cursor()
85+
cur.execute("SELECT * FROM files")
86+
results = cur.fetchall()
87+
"""
88+
conn = get_db_connection(db_path, **kwargs)
89+
try:
90+
yield conn
91+
finally:
92+
try:
93+
conn.close()
94+
except Exception as e:
95+
logger.warning(f"Error closing database connection: {e}")

db/operations.py

Lines changed: 16 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from utils.config import CFG # config (keeps chunk_size etc if needed)
88
from utils.logger import get_logger
99
from utils.cache import project_cache, stats_cache, file_cache
10+
from utils.retry import retry_on_db_locked
1011
from .db_writer import get_writer
1112

1213
_LOG = get_logger(__name__)
@@ -15,24 +16,18 @@
1516
_PREPARED_STATEMENTS = {}
1617
_PREPARED_LOCK = threading.Lock()
1718

18-
import threading
1919

2020
# Simple connection helper: we open new connections per operation so the code is robust
2121
# across threads. We set WAL journal mode for safer concurrency.
2222
# Added a small timeout to avoid long blocking if DB is locked.
2323
def _get_connection(db_path: str) -> sqlite3.Connection:
24-
dirname = os.path.dirname(os.path.abspath(db_path))
25-
if dirname and not os.path.isdir(dirname):
26-
os.makedirs(dirname, exist_ok=True)
27-
# timeout in seconds for busy sqlite; small value to avoid long blocking in web requests
28-
conn = sqlite3.connect(db_path, check_same_thread=False, timeout=5.0)
29-
conn.row_factory = sqlite3.Row
30-
try:
31-
conn.execute("PRAGMA journal_mode = WAL;")
32-
except Exception:
33-
# Not fatal — continue
34-
pass
35-
return conn
24+
"""
25+
DEPRECATED: Use db.connection.get_db_connection() instead.
26+
This function is maintained for backward compatibility.
27+
"""
28+
from .connection import get_db_connection
29+
# Use shorter timeout for web requests (5s instead of default 30s)
30+
return get_db_connection(db_path, timeout=5.0, enable_wal=True)
3631

3732

3833
def _get_prepared_statement(conn: sqlite3.Connection, query_key: str, sql: str):
@@ -392,24 +387,15 @@ def _ensure_projects_dir():
392387

393388

394389
def _retry_on_db_locked(func, *args, max_retries=DB_RETRY_COUNT, **kwargs):
395-
"""Retry a database operation if it's locked."""
396-
import time
397-
last_error = None
398-
399-
for attempt in range(max_retries):
400-
try:
401-
return func(*args, **kwargs)
402-
except sqlite3.OperationalError as e:
403-
if "database is locked" in str(e).lower() and attempt < max_retries - 1:
404-
last_error = e
405-
time.sleep(DB_RETRY_DELAY * (2 ** attempt)) # Exponential backoff
406-
continue
407-
raise
408-
except Exception as e:
409-
raise
390+
"""
391+
Retry a database operation if it's locked.
410392
411-
if last_error:
412-
raise last_error
393+
DEPRECATED: Use @retry_on_db_locked decorator from utils.retry instead.
394+
This function is maintained for backward compatibility.
395+
"""
396+
# Use the retry decorator from utils.retry
397+
decorated_func = retry_on_db_locked(max_retries=max_retries, base_delay=DB_RETRY_DELAY)(func)
398+
return decorated_func(*args, **kwargs)
413399

414400

415401
def _get_project_id(project_path: str) -> str:

db/vector_operations.py

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
"""
66
import os
77
import json
8-
import time
98
import sqlite3
109
import importlib.resources
1110
from typing import List, Dict, Any, Optional
1211
from utils.logger import get_logger
12+
from utils.retry import retry_on_exception
1313

1414
logger = get_logger(__name__)
1515

@@ -18,7 +18,7 @@
1818
SQLITE_VECTOR_RESOURCE = "vector"
1919
SQLITE_VECTOR_VERSION_FN = "vector_version" # SELECT vector_version();
2020

21-
# Retry policy for DB-locked operations
21+
# Retry policy for DB-locked operations (used by insert_chunk_vector_with_retry)
2222
DB_LOCK_RETRY_COUNT = 6
2323
DB_LOCK_RETRY_BASE_DELAY = 0.05 # seconds, exponential backoff multiplier
2424

@@ -27,21 +27,18 @@ def connect_db(db_path: str, timeout: float = 30.0) -> sqlite3.Connection:
2727
"""
2828
Create a database connection with appropriate timeout and settings.
2929
30+
DEPRECATED: Use db.connection.get_db_connection() instead.
31+
This function is maintained for backward compatibility.
32+
3033
Args:
3134
db_path: Path to the SQLite database file
3235
timeout: Timeout in seconds for waiting on locks
3336
3437
Returns:
3538
sqlite3.Connection object configured for vector operations
3639
"""
37-
# timeout instructs sqlite to wait up to `timeout` seconds for locks
38-
conn = sqlite3.connect(db_path, timeout=timeout, check_same_thread=False)
39-
conn.row_factory = sqlite3.Row
40-
try:
41-
conn.execute("PRAGMA busy_timeout = 30000;") # 30s
42-
except Exception:
43-
pass
44-
return conn
40+
from .connection import get_db_connection
41+
return get_db_connection(db_path, timeout=timeout, enable_vector=False)
4542

4643

4744
def load_sqlite_vector_extension(conn: sqlite3.Connection) -> None:
@@ -163,30 +160,38 @@ def insert_chunk_vector_with_retry(conn: sqlite3.Connection, file_id: int, path:
163160

164161
q_vec = json.dumps(vector)
165162

166-
attempt = 0
167-
while True:
163+
# Use retry decorator for the actual insert operation
164+
@retry_on_exception(
165+
exceptions=(sqlite3.OperationalError,),
166+
max_retries=DB_LOCK_RETRY_COUNT,
167+
base_delay=DB_LOCK_RETRY_BASE_DELAY,
168+
exponential_backoff=True
169+
)
170+
def _insert_with_retry():
171+
"""Inner function with retry logic."""
172+
# Check if it's a database locked error
168173
try:
169-
# use vector_as_f32(json) as per API so extension formats blob
170174
cur.execute("INSERT INTO chunks (file_id, path, chunk_index, embedding) VALUES (?, ?, ?, vector_as_f32(?))",
171-
(file_id, path, chunk_index, q_vec))
175+
(file_id, path, chunk_index, q_vec))
172176
conn.commit()
173177
rowid = int(cur.lastrowid)
174178
logger.debug(f"Inserted chunk vector for {path} chunk {chunk_index}, rowid={rowid}")
175179
return rowid
176180
except sqlite3.OperationalError as e:
177-
msg = str(e).lower()
178-
if "database is locked" in msg and attempt < DB_LOCK_RETRY_COUNT:
179-
attempt += 1
180-
delay = DB_LOCK_RETRY_BASE_DELAY * (2 ** (attempt - 1))
181-
logger.warning(f"Database locked, retrying in {delay}s (attempt {attempt}/{DB_LOCK_RETRY_COUNT})")
182-
time.sleep(delay)
183-
continue
184-
else:
185-
logger.error(f"Failed to insert chunk vector after {attempt} retries: {e}")
181+
# Only retry on database locked errors
182+
if "database is locked" not in str(e).lower():
183+
logger.error(f"Failed to insert chunk vector: {e}")
186184
raise RuntimeError(f"Failed to INSERT chunk vector (vector_as_f32 call): {e}") from e
185+
raise # Re-raise for retry decorator to handle
187186
except Exception as e:
188187
logger.error(f"Failed to insert chunk vector: {e}")
189188
raise RuntimeError(f"Failed to INSERT chunk vector (vector_as_f32 call): {e}") from e
189+
190+
try:
191+
return _insert_with_retry()
192+
except sqlite3.OperationalError as e:
193+
logger.error(f"Failed to insert chunk vector after {DB_LOCK_RETRY_COUNT} retries: {e}")
194+
raise RuntimeError(f"Failed to INSERT chunk vector after retries: {e}") from e
190195

191196

192197
def search_vectors(database_path: str, q_vector: List[float], top_k: int = 5) -> List[Dict[str, Any]]:
@@ -204,10 +209,11 @@ def search_vectors(database_path: str, q_vector: List[float], top_k: int = 5) ->
204209
Raises:
205210
RuntimeError: If vector search operations fail
206211
"""
212+
from .connection import db_connection
213+
207214
logger.debug(f"Searching vectors in database: {database_path}, top_k={top_k}")
208-
conn = connect_db(database_path)
209-
try:
210-
load_sqlite_vector_extension(conn)
215+
216+
with db_connection(database_path, enable_vector=True) as conn:
211217
ensure_chunks_and_meta(conn)
212218

213219
# Ensure vector index is initialized before searching
@@ -253,8 +259,6 @@ def search_vectors(database_path: str, q_vector: List[float], top_k: int = 5) ->
253259
score = float(distance)
254260
results.append({"file_id": int(file_id), "path": path, "chunk_index": int(chunk_index), "score": score})
255261
return results
256-
finally:
257-
conn.close()
258262

259263

260264
def get_chunk_text(database_path: str, file_id: int, chunk_index: int) -> Optional[str]:
@@ -271,9 +275,9 @@ def get_chunk_text(database_path: str, file_id: int, chunk_index: int) -> Option
271275
The chunk text, or None if not found
272276
"""
273277
from .operations import get_project_metadata
278+
from .connection import db_connection
274279

275-
conn = connect_db(database_path)
276-
try:
280+
with db_connection(database_path) as conn:
277281
cur = conn.cursor()
278282
# Get file path from database
279283
cur.execute("SELECT path FROM files WHERE id = ?", (file_id,))
@@ -337,5 +341,3 @@ def get_chunk_text(database_path: str, file_id: int, chunk_index: int) -> Option
337341
start = chunk_index * step
338342
end = min(start + CHUNK_SIZE, len(content))
339343
return content[start:end]
340-
finally:
341-
conn.close()

0 commit comments

Comments
 (0)