@@ -173,27 +173,33 @@ def init_db(database_path: str) -> None:
173173 """
174174 Initialize database schema. Safe to call multiple times.
175175 Creates:
176- - files (stores full content of indexed files)
176+ - files (stores full content of indexed files with metadata for incremental indexing )
177177 - chunks (with embedding BLOB column for sqlite-vector)
178+ - project_metadata (project-level tracking)
178179 """
179180 conn = _get_connection (database_path )
180181 try :
181182 cur = conn .cursor ()
182183
183184 # files table (stores full content, used to reconstruct chunks)
185+ # Added last_modified and file_hash for incremental indexing
184186 cur .execute (
185187 """
186188 CREATE TABLE IF NOT EXISTS files (
187189 id INTEGER PRIMARY KEY AUTOINCREMENT,
188- path TEXT NOT NULL,
190+ path TEXT NOT NULL UNIQUE ,
189191 content TEXT,
190192 language TEXT,
191193 snippet TEXT,
192- created_at TEXT DEFAULT (datetime('now'))
194+ last_modified REAL,
195+ file_hash TEXT,
196+ created_at TEXT DEFAULT (datetime('now')),
197+ updated_at TEXT DEFAULT (datetime('now'))
193198 )
194199 """
195200 )
196201 cur .execute ("CREATE INDEX IF NOT EXISTS idx_files_path ON files(path);" )
202+ cur .execute ("CREATE INDEX IF NOT EXISTS idx_files_hash ON files(file_hash);" )
197203
198204 # chunks table: metadata for chunked documents; includes embedding BLOB column
199205 cur .execute (
@@ -210,20 +216,43 @@ def init_db(database_path: str) -> None:
210216 """
211217 )
212218 cur .execute ("CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_id);" )
219+
220+ # project_metadata table for project-level tracking
221+ cur .execute (
222+ """
223+ CREATE TABLE IF NOT EXISTS project_metadata (
224+ key TEXT PRIMARY KEY,
225+ value TEXT,
226+ updated_at TEXT DEFAULT (datetime('now'))
227+ )
228+ """
229+ )
230+
213231 conn .commit ()
214232 finally :
215233 conn .close ()
216234
217235
218- def store_file (database_path , path , content , language ):
236+ def store_file (database_path , path , content , language , last_modified = None , file_hash = None ):
219237 """
220- Insert a file record into the DB using a queued single-writer to avoid
238+ Insert or update a file record into the DB using a queued single-writer to avoid
221239 sqlite 'database is locked' errors in multithreaded scenarios.
240+ Supports incremental indexing with last_modified and file_hash tracking.
222241 Returns lastrowid (same as the previous store_file implementation).
223242 """
224243 snippet = (content [:512 ] if content else "" )
225- sql = "INSERT INTO files (path, content, language, snippet) VALUES (?, ?, ?, ?)"
226- params = (path , content , language , snippet )
244+ sql = """
245+ INSERT INTO files (path, content, language, snippet, last_modified, file_hash, updated_at)
246+ VALUES (?, ?, ?, ?, ?, ?, datetime('now'))
247+ ON CONFLICT(path) DO UPDATE SET
248+ content=excluded.content,
249+ language=excluded.language,
250+ snippet=excluded.snippet,
251+ last_modified=excluded.last_modified,
252+ file_hash=excluded.file_hash,
253+ updated_at=datetime('now')
254+ """
255+ params = (path , content , language , snippet , last_modified , file_hash )
227256
228257 writer = _get_writer (database_path )
229258 # We wait for the background writer to complete the insert and then return the row id.
@@ -314,6 +343,106 @@ def clear_project_data(database_path: str) -> None:
314343 conn .close ()
315344
316345
346+ def get_file_by_path (database_path : str , path : str ) -> Optional [Dict [str , Any ]]:
347+ """
348+ Get file metadata by path for incremental indexing checks.
349+ Returns None if file doesn't exist.
350+ """
351+ conn = _get_connection (database_path )
352+ try :
353+ row = conn .execute (
354+ "SELECT id, path, last_modified, file_hash FROM files WHERE path = ?" ,
355+ (path ,)
356+ ).fetchone ()
357+ if row :
358+ return {
359+ "id" : row ["id" ],
360+ "path" : row ["path" ],
361+ "last_modified" : row ["last_modified" ],
362+ "file_hash" : row ["file_hash" ]
363+ }
364+ return None
365+ finally :
366+ conn .close ()
367+
368+
369+ def needs_reindex (database_path : str , path : str , current_mtime : float , current_hash : str ) -> bool :
370+ """
371+ Check if a file needs to be re-indexed based on modification time and hash.
372+ Returns True if file is new or has changed.
373+ """
374+ existing = get_file_by_path (database_path , path )
375+ if not existing :
376+ return True
377+
378+ # Check if modification time or hash changed
379+ if existing ["last_modified" ] is None or existing ["file_hash" ] is None :
380+ return True
381+
382+ if existing ["last_modified" ] != current_mtime or existing ["file_hash" ] != current_hash :
383+ return True
384+
385+ return False
386+
387+
388+ def set_project_metadata (database_path : str , key : str , value : str ) -> None :
389+ """
390+ Set a project metadata key-value pair.
391+ """
392+ conn = _get_connection (database_path )
393+ try :
394+ cur = conn .cursor ()
395+ cur .execute (
396+ """
397+ INSERT INTO project_metadata (key, value, updated_at)
398+ VALUES (?, ?, datetime('now'))
399+ ON CONFLICT(key) DO UPDATE SET
400+ value=excluded.value,
401+ updated_at=datetime('now')
402+ """ ,
403+ (key , value )
404+ )
405+ conn .commit ()
406+ finally :
407+ conn .close ()
408+
409+
410+ def get_project_metadata (database_path : str , key : str ) -> Optional [str ]:
411+ """
412+ Get a project metadata value by key.
413+ """
414+ conn = _get_connection (database_path )
415+ try :
416+ row = conn .execute (
417+ "SELECT value FROM project_metadata WHERE key = ?" ,
418+ (key ,)
419+ ).fetchone ()
420+ return row ["value" ] if row else None
421+ finally :
422+ conn .close ()
423+
424+
425+ def delete_file_by_path (database_path : str , path : str ) -> None :
426+ """
427+ Delete a file and its chunks by path.
428+ Used for incremental indexing when files are removed.
429+ """
430+ conn = _get_connection (database_path )
431+ try :
432+ cur = conn .cursor ()
433+ # Get file_id
434+ row = cur .execute ("SELECT id FROM files WHERE path = ?" , (path ,)).fetchone ()
435+ if row :
436+ file_id = row ["id" ]
437+ # Delete chunks first
438+ cur .execute ("DELETE FROM chunks WHERE file_id = ?" , (file_id ,))
439+ # Delete file
440+ cur .execute ("DELETE FROM files WHERE id = ?" , (file_id ,))
441+ conn .commit ()
442+ finally :
443+ conn .close ()
444+
445+
317446# ============================================================================
318447# Project Registry Database Operations
319448# ============================================================================
0 commit comments