@@ -106,7 +106,13 @@ def _split_into_units(self, text: str, language: str) -> List[Tuple[str, str]]:
106106 return []
107107
108108 def _split_python (self , text : str ) -> List [Tuple [str , str ]]:
109- """Split Python code into classes and functions."""
109+ """
110+ Split Python code into classes and functions.
111+
112+ Uses indentation-based parsing. Works well for most Python code
113+ but may have edge cases with complex indentation patterns.
114+ Falls back to simple chunking if parsing fails.
115+ """
110116 units = []
111117 lines = text .split ("\n " )
112118 current_unit = []
@@ -154,11 +160,19 @@ def _split_python(self, text: str) -> List[Tuple[str, str]]:
154160 return units
155161
156162 def _split_javascript (self , text : str ) -> List [Tuple [str , str ]]:
157- """Split JavaScript/TypeScript code into functions and classes."""
163+ """
164+ Split JavaScript/TypeScript code into functions and classes.
165+
166+ Uses regex patterns to match function and class declarations.
167+ Works well for standard code patterns but may not handle all
168+ edge cases with nested structures. Falls back to brace-based
169+ splitting if regex matching doesn't find units.
170+ """
158171 units = []
159172
160173 # Regex patterns for JS/TS
161174 # Match function declarations, arrow functions, class declarations
175+ # Note: Non-greedy matching, works for most cases but not perfect for deeply nested code
162176 patterns = [
163177 r'((?:export\s+)?(?:async\s+)?function\s+\w+\s*\([^)]*\)\s*{[\s\S]*?})' ,
164178 r'((?:export\s+)?const\s+\w+\s*=\s*(?:async\s*)?\([^)]*\)\s*=>\s*{[\s\S]*?})' ,
@@ -209,6 +223,11 @@ def _split_by_braces(self, text: str) -> List[Tuple[str, str]]:
209223 """
210224 Generic brace-based splitting for C-style languages.
211225 Finds balanced brace blocks.
226+
227+ Note: This is a simple heuristic that doesn't handle braces
228+ inside strings, comments, or template literals. It works well
229+ for most code but may produce imperfect results in edge cases.
230+ The chunker will still fall back to simple chunking if needed.
212231 """
213232 units = []
214233 lines = text .split ("\n " )
@@ -219,7 +238,8 @@ def _split_by_braces(self, text: str) -> List[Tuple[str, str]]:
219238 for line in lines :
220239 current_unit .append (line )
221240
222- # Count braces (simple heuristic, doesn't handle strings/comments perfectly)
241+ # Count braces (simple heuristic)
242+ # Note: Doesn't handle strings/comments perfectly, but works well in practice
223243 brace_count += line .count ("{" ) - line .count ("}" )
224244
225245 if "{" in line and not in_block :
0 commit comments