Skip to content

Commit d24bad4

Browse files
CopilotMte90
andcommitted
Address code review feedback and improve documentation
Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com>
1 parent 4057109 commit d24bad4

File tree

4 files changed

+28
-55
lines changed

4 files changed

+28
-55
lines changed

analyzer.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -324,10 +324,11 @@ def _process_file_sync(
324324
if isinstance(cfg, dict):
325325
embedding_model = cfg.get("embedding_model")
326326

327-
# Use smart chunking for code files, fallback to simple for others
327+
# Use smart chunking for supported code languages
328328
use_smart_chunking = cfg.get("smart_chunking", True) if isinstance(cfg, dict) else True
329+
supported_languages = ["python", "javascript", "typescript", "java", "go", "rust", "c", "cpp"]
329330

330-
if use_smart_chunking and lang != "text":
331+
if use_smart_chunking and lang in supported_languages:
331332
chunks = smart_chunk(content, language=lang, chunk_size=CHUNK_SIZE, overlap=CHUNK_OVERLAP)
332333
else:
333334
chunks = chunk_text(content, chunk_size=CHUNK_SIZE, overlap=CHUNK_OVERLAP)

db_modules/__init__.py

Lines changed: 0 additions & 48 deletions
This file was deleted.

services/search_service.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,9 @@ def semantic_search(
8585

8686
@staticmethod
8787
def _make_cache_key(project_id: str, query: str, top_k: int) -> str:
88-
"""Generate cache key for search query."""
88+
"""Generate cache key for search query using SHA-256."""
8989
key_str = f"{project_id}:{query}:{top_k}"
90-
key_hash = hashlib.md5(key_str.encode()).hexdigest()
90+
key_hash = hashlib.sha256(key_str.encode()).hexdigest()[:16] # Use first 16 chars
9191
return f"search:{key_hash}"
9292

9393
@staticmethod

smart_chunker.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,13 @@ def _split_into_units(self, text: str, language: str) -> List[Tuple[str, str]]:
106106
return []
107107

108108
def _split_python(self, text: str) -> List[Tuple[str, str]]:
109-
"""Split Python code into classes and functions."""
109+
"""
110+
Split Python code into classes and functions.
111+
112+
Uses indentation-based parsing. Works well for most Python code
113+
but may have edge cases with complex indentation patterns.
114+
Falls back to simple chunking if parsing fails.
115+
"""
110116
units = []
111117
lines = text.split("\n")
112118
current_unit = []
@@ -154,11 +160,19 @@ def _split_python(self, text: str) -> List[Tuple[str, str]]:
154160
return units
155161

156162
def _split_javascript(self, text: str) -> List[Tuple[str, str]]:
157-
"""Split JavaScript/TypeScript code into functions and classes."""
163+
"""
164+
Split JavaScript/TypeScript code into functions and classes.
165+
166+
Uses regex patterns to match function and class declarations.
167+
Works well for standard code patterns but may not handle all
168+
edge cases with nested structures. Falls back to brace-based
169+
splitting if regex matching doesn't find units.
170+
"""
158171
units = []
159172

160173
# Regex patterns for JS/TS
161174
# Match function declarations, arrow functions, class declarations
175+
# Note: Non-greedy matching, works for most cases but not perfect for deeply nested code
162176
patterns = [
163177
r'((?:export\s+)?(?:async\s+)?function\s+\w+\s*\([^)]*\)\s*{[\s\S]*?})',
164178
r'((?:export\s+)?const\s+\w+\s*=\s*(?:async\s*)?\([^)]*\)\s*=>\s*{[\s\S]*?})',
@@ -209,6 +223,11 @@ def _split_by_braces(self, text: str) -> List[Tuple[str, str]]:
209223
"""
210224
Generic brace-based splitting for C-style languages.
211225
Finds balanced brace blocks.
226+
227+
Note: This is a simple heuristic that doesn't handle braces
228+
inside strings, comments, or template literals. It works well
229+
for most code but may produce imperfect results in edge cases.
230+
The chunker will still fall back to simple chunking if needed.
212231
"""
213232
units = []
214233
lines = text.split("\n")
@@ -219,7 +238,8 @@ def _split_by_braces(self, text: str) -> List[Tuple[str, str]]:
219238
for line in lines:
220239
current_unit.append(line)
221240

222-
# Count braces (simple heuristic, doesn't handle strings/comments perfectly)
241+
# Count braces (simple heuristic)
242+
# Note: Doesn't handle strings/comments perfectly, but works well in practice
223243
brace_count += line.count("{") - line.count("}")
224244

225245
if "{" in line and not in_block:

0 commit comments

Comments
 (0)