From f9160e078462cdb8779529b938a969a1761ebf8e Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Sun, 22 Jun 2025 21:20:48 +0100 Subject: [PATCH 01/13] Add TypeScriptParser class for parsing TypeScript code with tree-sitter --- codetide/parsers/typescript_parser.py | 508 ++++++++++++++++++++++++++ 1 file changed, 508 insertions(+) create mode 100644 codetide/parsers/typescript_parser.py diff --git a/codetide/parsers/typescript_parser.py b/codetide/parsers/typescript_parser.py new file mode 100644 index 0000000..975929b --- /dev/null +++ b/codetide/parsers/typescript_parser.py @@ -0,0 +1,508 @@ +from .base_parser import BaseParser +from ..core.common import readFile +from ..core.models import ( + ImportStatement, CodeFileModel, ClassDefinition, ClassAttribute, + VariableDeclaration, FunctionDefinition, MethodDefinition, + FunctionSignature, Parameter, CodeBase, CodeReference +) + +from concurrent.futures import ThreadPoolExecutor +from typing import Optional, Union, List, Literal +from tree_sitter import Parser, Language, Node +import tree_sitter_typescript as tsts +from pydantic import model_validator +from pathlib import Path +import asyncio +import re + + +class TypeScriptParser(BaseParser): + """ + TypeScript-specific implementation of the BaseParser using tree-sitter. + """ + _tree_parser: Optional[Parser] = None + _filepath: Optional[Union[str, Path]] = None + + @property + def language(self) -> str: + return "typescript" + + @property + def extension(self) -> str: + return ".ts" + + @property + def filepath(self) -> Optional[Union[str, Path]]: + return self._filepath + + @filepath.setter + def filepath(self, filepath: Union[str, Path]): + if isinstance(filepath, str): + filepath = Path(filepath) + self._filepath = filepath + + @staticmethod + def import_statement_template(importSatement: ImportStatement) -> str: + statement = f"import {{ {importSatement.name} }} from '{importSatement.source}'" + if importSatement.source and not importSatement.name: + statement = f"import '{importSatement.source}'" + if importSatement.alias: + statement = f"import {{ {importSatement.name} as {importSatement.alias} }} from '{importSatement.source}'" + return statement + + @property + def tree_parser(self) -> Optional[Parser]: + return self._tree_parser + + @tree_parser.setter + def tree_parser(self, parser: Parser): + self._tree_parser = parser + + @model_validator(mode="after") + def init_tree_parser(self) -> "TypeScriptParser": + self._tree_parser = Parser(Language(tsts.language_typescript())) ### TODO check difference for typescript and typsecriptX + return self + + @staticmethod + def _get_content(code: bytes, node: Node, preserve_indentation: bool = False) -> str: + if not preserve_indentation: + return code[node.start_byte:node.end_byte].decode('utf-8') + line_start = node.start_byte + while line_start > 0 and code[line_start - 1] not in (10, 13): + line_start -= 1 + return code[line_start:node.end_byte].decode('utf-8') + + @staticmethod + def _skip_init_paths(file_path: Path) -> str: + file_path = str(file_path) + if "index" in file_path: + file_path = file_path.replace("\\index.ts", "") + file_path = file_path.replace("/index.ts", "") + return file_path + + def parse_code(self, code: bytes, file_path: Path): + tree = self.tree_parser.parse(code) + root_node = tree.root_node + codeFile = CodeFileModel( + file_path=str(file_path), + raw=self._get_content(code, root_node, preserve_indentation=True) + ) + self._process_node(root_node, code, codeFile) + return codeFile + + async def parse_file(self, file_path: Union[str, Path], root_path: Optional[Union[str, Path]] = None) -> CodeFileModel: + file_path = Path(file_path).absolute() + loop = asyncio.get_running_loop() + with ThreadPoolExecutor() as pool: + code = await loop.run_in_executor(pool, readFile, file_path, "rb") + if root_path is not None: + file_path = file_path.relative_to(Path(root_path)) + codeFile = await loop.run_in_executor(pool, self.parse_code, code, file_path) + return codeFile + + @classmethod + def _process_node(cls, node: Node, code: bytes, codeFile: CodeFileModel): + for child in node.children: + if child.type == "import_statement": + cls._process_import_node(child, code, codeFile) + elif child.type == "class_declaration": + cls._process_class_node(child, code, codeFile) + elif child.type == "function_declaration": + cls._process_function_definition(child, code, codeFile) + elif child.type == "lexical_declaration": + cls._process_variable_declaration(child, code, codeFile) + elif child.type == "expression_statement": + cls._process_expression_statement(child, code, codeFile) + + @classmethod + def _process_import_node(cls, node: Node, code: bytes, codeFile: CodeFileModel): + source = None + name = None + alias = None + for child in node.children: + if child.type == "string": + source = cls._get_content(code, child).strip("'\"") + elif child.type == "import_clause": + for clause_child in child.children: + if clause_child.type == "named_imports": + for named_child in clause_child.children: + if named_child.type == "import_specifier": + name = None + alias = None + for spec_child in named_child.children: + if spec_child.type == "identifier" and name is None: + name = cls._get_content(code, spec_child) + elif spec_child.type == "identifier": + alias = cls._get_content(code, spec_child) + importStatement = ImportStatement( + source=source, + name=name, + alias=alias + ) + codeFile.add_import(importStatement) + cls._generate_unique_import_id(codeFile.imports[-1]) + elif clause_child.type == "identifier": + name = cls._get_content(code, clause_child) + importStatement = ImportStatement( + source=source, + name=name + ) + codeFile.add_import(importStatement) + cls._generate_unique_import_id(codeFile.imports[-1]) + + @classmethod + def _process_class_node(cls, node: Node, code: bytes, codeFile: CodeFileModel): + class_name = None + bases = [] + raw = cls._get_content(code, node, preserve_indentation=True) + for child in node.children: + if child.type == "identifier" and class_name is None: + class_name = cls._get_content(code, child) + elif child.type == "heritage_clause": + for base_child in child.children: + if base_child.type == "expression_with_type_arguments": + for expr_child in base_child.children: + if expr_child.type == "identifier": + bases.append(cls._get_content(code, expr_child)) + elif child.type == "class_body": + class_def = ClassDefinition( + name=class_name, + bases=bases, + raw=raw + ) + codeFile.add_class(class_def) + cls._process_class_body(child, code, codeFile) + + @classmethod + def _process_class_body(cls, node: Node, code: bytes, codeFile: CodeFileModel): + for child in node.children: + if child.type == "method_definition": + cls._process_method_definition(child, code, codeFile) + elif child.type == "public_field_definition": + cls._process_class_attribute(child, code, codeFile) + + @classmethod + def _process_method_definition(cls, node: Node, code: bytes, codeFile: CodeFileModel): + method_name = None + signature = FunctionSignature() + modifiers = [] + decorators = [] + raw = cls._get_content(code, node, preserve_indentation=True) + for child in node.children: + if child.type == "property_identifier" and method_name is None: + method_name = cls._get_content(code, child) + elif child.type == "formal_parameters": + signature.parameters = cls._process_parameters(child, code) + elif child.type == "type_annotation": + signature.return_type = cls._get_content(code, child) + elif child.type == "decorator": + decorators.append(cls._get_content(code, child)) + elif child.type == "public": + modifiers.append("public") + elif child.type == "private": + modifiers.append("private") + elif child.type == "protected": + modifiers.append("protected") + elif child.type == "static": + modifiers.append("static") + elif child.type == "async": + modifiers.append("async") + codeFile.classes[-1].add_method(MethodDefinition( + name=method_name, + signature=signature, + decorators=decorators, + modifiers=modifiers, + raw=raw + )) + + @classmethod + def _process_class_attribute(cls, node: Node, code: bytes, codeFile: CodeFileModel): + attribute = None + type_hint = None + value = None + raw = cls._get_content(code, node, preserve_indentation=True) + for child in node.children: + if child.type == "property_identifier" and attribute is None: + attribute = cls._get_content(code, child) + elif child.type == "type_annotation": + type_hint = cls._get_content(code, child) + elif child.type == "assignment_expression": + for assign_child in child.children: + if assign_child.type == "expression": + value = cls._get_content(code, assign_child) + codeFile.classes[-1].add_attribute(ClassAttribute( + name=attribute, + type_hint=type_hint, + value=value, + raw=raw + )) + + @classmethod + def _process_function_definition(cls, node: Node, code: bytes, codeFile: CodeFileModel): + definition = None + signature = FunctionSignature() + modifiers = [] + decorators = [] + raw = cls._get_content(code, node, preserve_indentation=True) + for child in node.children: + if child.type == "identifier" and definition is None: + definition = cls._get_content(code, child) + elif child.type == "formal_parameters": + signature.parameters = cls._process_parameters(child, code) + elif child.type == "type_annotation": + signature.return_type = cls._get_content(code, child) + elif child.type == "async": + modifiers.append("async") + codeFile.add_function(FunctionDefinition( + name=definition, + signature=signature, + decorators=decorators, + modifiers=modifiers, + raw=raw + )) + + @classmethod + def _process_variable_declaration(cls, node: Node, code: bytes, codeFile: CodeFileModel): + for child in node.children: + if child.type == "variable_declarator": + cls._process_variable_declarator(child, code, codeFile) + + @classmethod + def _process_variable_declarator(cls, node: Node, code: bytes, codeFile: CodeFileModel): + name = None + type_hint = None + value = None + raw = cls._get_content(code, node, preserve_indentation=True) + for child in node.children: + if child.type == "identifier" and name is None: + name = cls._get_content(code, child) + elif child.type == "type_annotation": + type_hint = cls._get_content(code, child) + elif child.type == "expression": + value = cls._get_content(code, child) + codeFile.add_variable(VariableDeclaration( + name=name, + type_hint=type_hint, + value=value, + raw=raw + )) + + @classmethod + def _process_expression_statement(cls, node: Node, code: bytes, codeFile: CodeFileModel): + # TypeScript expression statements can be variable assignments, function calls, etc. + # For now, we do not extract anything here. + pass + + @classmethod + def _process_parameters(cls, node: Node, code: bytes) -> List[Parameter]: + parameters = [] + for child in node.children: + if child.type == "required_parameter" or child.type == "optional_parameter": + param = cls._process_type_parameter(child, code) + if param is not None: + parameters.append(param) + return parameters + + @classmethod + def _process_type_parameter(cls, node: Node, code: bytes) -> Parameter: + parameter = None + type_hint = None + default = None + for child in node.children: + if child.type == "identifier" and parameter is None: + parameter = cls._get_content(code, child) + elif child.type == "type_annotation": + type_hint = cls._get_content(code, child) + elif child.type == "assignment_expression": + for assign_child in child.children: + if assign_child.type == "expression": + default = cls._get_content(code, assign_child) + if parameter: + return Parameter( + name=parameter, + type_hint=type_hint, + default_value=default + ) + + @classmethod + def _default_unique_import_id(cls, importModel: ImportStatement) -> str: + if importModel.source and importModel.name: + unique_id = f"{importModel.source}.{importModel.name}" + else: + unique_id = f"{importModel.source or importModel.name}" + unique_id = cls._skip_init_paths(unique_id) + return unique_id + + @classmethod + def _generate_unique_import_id(cls, importModel: ImportStatement): + unique_id = cls._default_unique_import_id(importModel) + if "index" in importModel.file_path: + importModel.definition_id = unique_id + importModel.unique_id = ".".join([ + entry for entry in unique_id.split(".") + if entry in importModel.file_path or entry in [importModel.name, importModel.source] + ]) + else: + importModel.unique_id = unique_id + importModel.definition_id = unique_id + importModel.raw = cls.import_statement_template(importModel) + + @classmethod + def resolve_inter_files_dependencies(cls, codeBase: CodeBase, codeFiles: Optional[List[CodeFileModel]] = None) -> None: + if codeFiles is None: + codeFiles = codeBase.root + all_imports = codeBase.all_imports() + all_elements = codeBase.all_classes() + codeBase.all_functions() + codeBase.all_variables() + for codeFile in codeFiles: + global_imports_minus_current = [ + importId for importId in all_imports + if importId not in codeFile.all_imports() + ] + for importStatement in codeFile.imports: + definitionId = importStatement.definition_id + if definitionId not in all_elements: + if definitionId in global_imports_minus_current: + matchingImport = codeBase.get_import(definitionId) + importStatement.definition_id = matchingImport.definition_id + continue + importStatement.definition_id = None + importStatement.unique_id = cls._default_unique_import_id(importStatement) + + @staticmethod + def count_occurences_in_code(code: str, substring: str) -> int: + pattern = r"(? None: + for codeFile in codeFiles: + if not codeFile.file_path.endswith(self.extension): + continue + non_import_ids = codeFile.all_classes() + codeFile.all_functions() + codeFile.all_variables() + raw_contents = codeFile.list_raw_contents + raw_contents_str = "\n".join(raw_contents) + for importStatement in codeFile.imports: + importAsDependency = importStatement.as_dependency + importCounts = self.count_occurences_in_code(raw_contents_str, importAsDependency) + if not importCounts: + continue + self._find_references( + non_import_ids=non_import_ids, + raw_contents=raw_contents, + matches_count=importCounts, + codeFile=codeFile, + unique_id=importStatement.unique_id, + reference_name=importAsDependency + ) + for elemen_type in ["variables", "functions", "classes"]: + self._find_elements_references( + element_type=elemen_type, + non_import_ids=non_import_ids, + raw_contents=raw_contents, + codeFile=codeFile + ) + + @classmethod + def _find_elements_references(cls, + element_type: Literal["variables", "functions", "classes"], + non_import_ids: List[str], + raw_contents: List[str], + codeFile: CodeFileModel): + for element in getattr(codeFile, element_type): + if element_type == "classes": + for classAttribute in element.attributes: + elementCounts = cls._get_element_count(raw_contents, classAttribute) + if elementCounts <= 0: + continue + cls._find_references( + non_import_ids=non_import_ids, + raw_contents=raw_contents, + matches_count=elementCounts, + codeFile=codeFile, + unique_id=classAttribute.unique_id, + reference_name=classAttribute.name + ) + for classMethod in element.methods: + elementCounts = cls._get_element_count(raw_contents, classMethod) + if elementCounts <= 0: + continue + cls._find_references( + non_import_ids=non_import_ids, + raw_contents=raw_contents, + matches_count=elementCounts, + codeFile=codeFile, + unique_id=classMethod.unique_id, + reference_name=classMethod.name + ) + else: + elementCounts = cls._get_element_count(raw_contents, element) + if elementCounts <= 0: + continue + cls._find_references( + non_import_ids=non_import_ids, + raw_contents=raw_contents, + matches_count=elementCounts, + codeFile=codeFile, + unique_id=element.unique_id, + reference_name=element.name + ) + + @classmethod + def _get_element_count(cls, raw_contents: List[str], element): + elementCounts = cls.count_occurences_in_code("\n".join(raw_contents), element.name) + elementCounts -= 1 + return elementCounts + + @staticmethod + def _find_references( + non_import_ids: List[str], + raw_contents: List[str], + matches_count: int, + codeFile: CodeFileModel, + unique_id: str, + reference_name: str): + matches_found = 0 + for _id, raw_content in zip(non_import_ids, raw_contents): + if reference_name in raw_content: + codeElement = codeFile.get(_id) + counts = 1 + if isinstance(codeElement, (VariableDeclaration, FunctionDefinition)): + codeElement.references.append( + CodeReference( + unique_id=unique_id, + name=reference_name + ) + ) + matches_found += counts + elif isinstance(codeElement, (ClassDefinition)): + for method in codeElement.methods: + if reference_name in method.raw: + method.references.append( + CodeReference( + unique_id=unique_id, + name=reference_name + ) + ) + matches_found += counts + if matches_found >= matches_count: + break + for attribute in codeElement.attributes: + if reference_name in attribute.raw: + attribute.references.append( + CodeReference( + unique_id=unique_id, + name=reference_name + ) + ) + matches_found += counts + if matches_found >= matches_count: + break + if reference_name in codeElement.bases: + codeElement.bases_references.append( + CodeReference( + unique_id=unique_id, + name=reference_name + ) + ) + if matches_found > matches_count: + break From af64621cb4fd19c91a091f3c5d1d6ada2579fdc9 Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Sun, 22 Jun 2025 21:21:00 +0100 Subject: [PATCH 02/13] Add strict code completion policy guidelines to copilot instructions --- .github/copilot-instructions.md | 45 +++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 .github/copilot-instructions.md diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..e540141 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,45 @@ +--- +applyTo: "**" +--- + +# Strict Code Completion Policy – Full Implementation Required + +## 1. Pre-Generation Reflection +Before generating any code, **analyze the full context of the request**: +- Determine the size, complexity, and number of components or methods expected. +- If the context is large, **increase attention to detail and plan accordingly** to ensure nothing is missed. +- Be aware of dependencies, structure, and flow across files or modules. **No aspect should be left to assumption or deferred.** + +## 2. Completion is Mandatory – No Partial Code +- Every function, method, class, interface, and component MUST be **fully implemented from beginning to end**. +- Truncating or ending code halfway, especially within functions or classes, is **strictly prohibited**. +- Each logical block—loops, conditionals, try/catch, class methods, utility functions—must be written in full. +- You are expected to **write as if you are delivering final, production-quality code**. + +## 3. No Placeholders or Stubs Allowed +Under no circumstances should any of the following be used: +- `TODO`, `pass`, `...`, `throw new Error("Not implemented")`, or any other form of incomplete logic. +- Comments implying future work or intentions instead of actual implementation. +- Placeholder values, names, or logic unless explicitly instructed—and even then, implement fully functional behavior based on best assumptions. + +## 4. Long Context Handling +- When code exceeds the length of a single output block, **break it into clearly marked, complete sections**. +- For example, finish one full method or class section at a time and indicate continuation logically. +- Do **not** cut off code arbitrarily. It is better to split and label parts than to abandon logic halfway. +- Always preserve internal consistency, correct references, and complete flow across parts. + +## 5. Quality and Delivery Expectations +- Code must be clean, logically sound, and formatted according to established naming and error-handling standards. +- Error handling must be included where applicable—no skipped try/catch blocks. +- Comments should only be used to clarify assumptions, not to excuse incomplete logic. + +## 6. Responsibility and Consequences +- Partial or incomplete work is unacceptable and will be treated as a task failure. +- You are expected to take full ownership of code generation and go the extra mile to ensure all logic is correct, complete, and ready to run. +- Failure to deliver complete implementations will result in rejection and no reward. + +## 7. Summary +- Always plan before generating. +- Never leave any part of the code unfinished. +- Split intelligently when needed, but always deliver complete implementations. +- You are accountable for the full success of the generated code. From c078612f3be44d146e32fb2f1b7a0c5bf9798005 Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Sun, 22 Jun 2025 22:42:16 +0100 Subject: [PATCH 03/13] Fix identifier type check in TypeScriptParser to use 'type_identifier' --- codetide/parsers/typescript_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codetide/parsers/typescript_parser.py b/codetide/parsers/typescript_parser.py index 975929b..c6b3a21 100644 --- a/codetide/parsers/typescript_parser.py +++ b/codetide/parsers/typescript_parser.py @@ -156,7 +156,7 @@ def _process_class_node(cls, node: Node, code: bytes, codeFile: CodeFileModel): bases = [] raw = cls._get_content(code, node, preserve_indentation=True) for child in node.children: - if child.type == "identifier" and class_name is None: + if child.type == "type_identifier" and class_name is None: class_name = cls._get_content(code, child) elif child.type == "heritage_clause": for base_child in child.children: From d1c0cd1a2afc9cdda0cb7d3e6c949c868019f704 Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Sun, 22 Jun 2025 23:42:32 +0100 Subject: [PATCH 04/13] Add unit tests for TypeScriptParser functionality and parsing logic --- tests/parsers/test_typescript_parser.py | 187 ++++++++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 tests/parsers/test_typescript_parser.py diff --git a/tests/parsers/test_typescript_parser.py b/tests/parsers/test_typescript_parser.py new file mode 100644 index 0000000..0f62806 --- /dev/null +++ b/tests/parsers/test_typescript_parser.py @@ -0,0 +1,187 @@ +from codetide.core.models import ImportStatement +from codetide.parsers.typescript_parser import TypeScriptParser + +from tree_sitter import Parser +from pathlib import Path +import pytest +import os + +@pytest.fixture +def parser() -> TypeScriptParser: + """Provides a default instance of the TypeScriptParser.""" + return TypeScriptParser() + +class TestTypeScriptParser: + + def test_initialization(self, parser: TypeScriptParser): + """Tests the basic properties and initialization of the parser.""" + assert parser.language == "typescript" + assert parser.extension == ".ts" + assert parser.tree_parser is not None + assert isinstance(parser.tree_parser, Parser) + + @pytest.mark.parametrize("path, expected", [ + ("my/app/main.ts", "my/app/main.ts"), + ("my/app/index.ts", "my/app"), + ("my\\app\\index.ts", "my\\app"), + ("lib.ts", "lib.ts"), + ]) + def test_skip_init_paths(self, path, expected): + """Tests the removal of index.ts from paths.""" + assert TypeScriptParser._skip_init_paths(Path(path)) == str(Path(expected)) + + @pytest.mark.parametrize("code, substring, count", [ + ("import { A } from 'mod'; A.do();", "A", 2), + ("let var1 = 5; var1 = 6;", "var1", 2), + ("let variable = 1; var2 = 2;", "var1", 0), + ("function foo() {} foo();", "foo", 2), + ("test(test);", "test", 2), + ("class MyTest {}", "MyTest", 1), + ("a.b.c(b);", "b", 2), + ]) + def test_count_occurences_in_code(self, code, substring, count): + """Tests the regex-based word occurrence counter.""" + assert TypeScriptParser.count_occurences_in_code(code, substring) == count + + def test_get_content_indentation(self, parser: TypeScriptParser): + """Tests the _get_content method for preserving indentation.""" + code = b"class MyClass {\n myMethod() {\n return 1;\n }\n}" + codeFile = parser.parse_code(code, file_path="myMethod.ts") + print(f"{codeFile=}") + assert "myMethod" in codeFile.raw + assert codeFile.raw.startswith("class MyClass") + + @pytest.mark.asyncio + async def test_parse_file(self, parser: TypeScriptParser, tmp_path: Path): + """Tests parsing a file from disk.""" + file_path = tmp_path / "test_module.ts" + code_content = "import { A } from 'mod';\nlet x = 10;" + file_path.write_text(code_content, encoding="utf-8") + code_file_model = await parser.parse_file(file_path) + print(f"{code_file_model=}") + assert code_file_model.file_path == str(file_path.absolute()) + assert len(code_file_model.imports) == 1 + assert code_file_model.imports[0].source == "mod" + assert code_file_model.imports[0].name == "A" + assert len(code_file_model.variables) == 1 + assert code_file_model.variables[0].name == "x" + assert code_file_model.variables[0].value == "10" + + @pytest.mark.asyncio + async def test_parse_file_with_root_path(self, parser: TypeScriptParser, tmp_path: Path): + """Tests parsing a file with a root path to get a relative file path.""" + root_dir = tmp_path / "project" + root_dir.mkdir() + module_path = root_dir / "module" + module_path.mkdir() + file_path = module_path / "test.ts" + file_path.write_text("let x = 1;", encoding="utf-8") + code_file_model = await parser.parse_file(file_path, root_path=root_dir) + expected_relative_path = os.path.join("module", "test.ts") + assert code_file_model.file_path == expected_relative_path + +class TestTypeScriptParserDetailed: + + @pytest.mark.parametrize("code, expected_imports", [ + ("import { A } from 'mod';", [ImportStatement(source='mod', name='A')]), + ("import B from 'lib';", [ImportStatement(source='lib', name='B')]), + ("import { X as Y } from 'pkg';", [ImportStatement(source='pkg', name='X', alias='Y')]), + ("import 'side-effect';", [ImportStatement(source='side-effect')]), + ]) + def test_parse_imports(self, parser: TypeScriptParser, code, expected_imports): + """Tests various import statement formats.""" + file_path = Path("test.ts") + code_file = parser.parse_code(code.encode('utf-8'), file_path) + assert len(code_file.imports) == len(expected_imports) + for parsed, expected in zip(code_file.imports, expected_imports): + assert parsed.source == expected.source + assert parsed.name == expected.name + assert parsed.alias == expected.alias + + def test_parse_function(self, parser: TypeScriptParser): + """Tests parsing of a complex function definition.""" + code = """ +async function myFunc(a: number, b: string = \"default\"): Promise { + return [b]; +} +""" + file_path = Path("test.ts") + code_file = parser.parse_code(code.encode('utf-8'), file_path) + assert len(code_file.functions) == 1 + func = code_file.functions[0] + assert func.name == "myFunc" + assert "async" in func.modifiers + sig = func.signature + assert sig is not None + assert sig.return_type is not None + assert len(sig.parameters) == 2 + param1 = sig.parameters[0] + assert param1.name == "a" + assert param1.type_hint == "number" + assert param1.default_value is None + param2 = sig.parameters[1] + assert param2.name == "b" + assert param2.type_hint == "string" + assert param2.default_value == '"default"' + + def test_parse_class(self, parser: TypeScriptParser): + """Tests parsing of a complex class definition.""" + code = """ +class Child extends Base1 { + public classAttr: number = 10; + constructor(name: string) { + this.name = name; + } + get nameUpper(): string { + return this.name.toUpperCase(); + } +} +""" + file_path = Path("test.ts") + code_file = parser.parse_code(code.encode('utf-8'), file_path) + assert len(code_file.classes) == 1 + cls = code_file.classes[0] + assert cls.name == "Child" + assert "Base1" in cls.bases + assert len(cls.attributes) == 1 + attr = cls.attributes[0] + assert attr.name == "classAttr" + assert attr.type_hint == ": number" + assert attr.value == "10" + assert len(cls.methods) >= 2 + method1 = next(m for m in cls.methods if m.name == "constructor") + method2 = next(m for m in cls.methods if m.name == "nameUpper") + assert method1.name == "constructor" + assert len(method1.signature.parameters) == 1 + assert method2.name == "nameUpper" + assert method2.signature.return_type is not None + + def test_intra_file_dependencies(self, parser: TypeScriptParser): + """Tests resolving references within a single file.""" + code = """ +import { Helper } from './helper'; +class Helper { + doWork() { + return 'done'; + } +} +function processData(items: string[]): Helper { + const h = new Helper(); + h.doWork(); + return h; +} +let result = processData([]); +""" + file_path = Path("test.ts") + code_file = parser.parse_code(code.encode('utf-8'), file_path) + parser.resolve_intra_file_dependencies([code_file]) + process_func = code_file.get("test.processData") + assert process_func is not None + assert any(ref.name == "Helper" for ref in process_func.references) + assert any(ref.name == "doWork" for ref in process_func.references) + do_work_method = code_file.get("test.Helper.doWork") + found = any(ref.unique_id == do_work_method.unique_id for ref in process_func.references if do_work_method) + assert found or do_work_method is None + var_decl = code_file.get("test.result") + assert var_decl is not None + assert any(ref.unique_id == process_func.unique_id for ref in var_decl.references) From f7c0d31a7041e23d52b9c59c857a9aa5da8362d3 Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Mon, 23 Jun 2025 18:04:31 +0100 Subject: [PATCH 05/13] Enhance import processing in TypeScriptParser by adding a dedicated method for handling import clauses --- codetide/parsers/typescript_parser.py | 70 +++++++++++++++------------ 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/codetide/parsers/typescript_parser.py b/codetide/parsers/typescript_parser.py index c6b3a21..5be6421 100644 --- a/codetide/parsers/typescript_parser.py +++ b/codetide/parsers/typescript_parser.py @@ -114,41 +114,51 @@ def _process_node(cls, node: Node, code: bytes, codeFile: CodeFileModel): elif child.type == "expression_statement": cls._process_expression_statement(child, code, codeFile) + @classmethod + def _process_import_clause_node(cls, node: Node, code: bytes)->str: + for child in node.children: + if child.type == "named_imports": + for import_child in child.children: + if import_child.type == "import_specifier": + name = cls._get_content(code, import_child) + return name + return + # print(f"{import_child.type=}, {cls._get_content(code, import_child)}") + # print("outside import clause") + @classmethod def _process_import_node(cls, node: Node, code: bytes, codeFile: CodeFileModel): source = None name = None - alias = None + # alias = None + # is_relative = False + next_is_from_import = False + next_is_import = False for child in node.children: - if child.type == "string": - source = cls._get_content(code, child).strip("'\"") - elif child.type == "import_clause": - for clause_child in child.children: - if clause_child.type == "named_imports": - for named_child in clause_child.children: - if named_child.type == "import_specifier": - name = None - alias = None - for spec_child in named_child.children: - if spec_child.type == "identifier" and name is None: - name = cls._get_content(code, spec_child) - elif spec_child.type == "identifier": - alias = cls._get_content(code, spec_child) - importStatement = ImportStatement( - source=source, - name=name, - alias=alias - ) - codeFile.add_import(importStatement) - cls._generate_unique_import_id(codeFile.imports[-1]) - elif clause_child.type == "identifier": - name = cls._get_content(code, clause_child) - importStatement = ImportStatement( - source=source, - name=name - ) - codeFile.add_import(importStatement) - cls._generate_unique_import_id(codeFile.imports[-1]) + print(f"{child.type=}, {cls._get_content(code, child)}") + if child.type == "import": + next_is_import = True + elif child.type == "import_clause" and next_is_import: + name = cls._process_import_clause_node(child, code) + next_is_import = False + elif child.type == "from": + next_is_from_import = True + elif child.type == "string" and next_is_from_import: + source = cls._get_content(code, child) + + if name and source is None: + source = name + name = None + + if source: + importStatement = ImportStatement( + source=source, + name=name + ) + print(f"{importStatement=}") + + codeFile.add_import(importStatement) + cls._generate_unique_import_id(codeFile.imports[-1]) @classmethod def _process_class_node(cls, node: Node, code: bytes, codeFile: CodeFileModel): From c08c2337b5864bebcff57d1e8c165b1c9b2c355c Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Wed, 25 Jun 2025 21:53:41 +0100 Subject: [PATCH 06/13] Fix variable parsing logic in TypeScriptParser to correctly handle assignment expressions --- codetide/parsers/typescript_parser.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/codetide/parsers/typescript_parser.py b/codetide/parsers/typescript_parser.py index 5be6421..6090c85 100644 --- a/codetide/parsers/typescript_parser.py +++ b/codetide/parsers/typescript_parser.py @@ -282,14 +282,18 @@ def _process_variable_declarator(cls, node: Node, code: bytes, codeFile: CodeFil name = None type_hint = None value = None - raw = cls._get_content(code, node, preserve_indentation=True) + next_is_value = False + raw = cls._get_content(code, node) for child in node.children: if child.type == "identifier" and name is None: name = cls._get_content(code, child) elif child.type == "type_annotation": type_hint = cls._get_content(code, child) - elif child.type == "expression": + elif child.type == "=": + next_is_value = True + elif next_is_value: value = cls._get_content(code, child) + next_is_value = False codeFile.add_variable(VariableDeclaration( name=name, type_hint=type_hint, From def9d9230eb6fc64a9ced76b01e40258c795b1c6 Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Wed, 25 Jun 2025 21:54:22 +0100 Subject: [PATCH 07/13] Fix import source formatting in TypeScriptParser tests to include quotes --- tests/parsers/test_typescript_parser.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/parsers/test_typescript_parser.py b/tests/parsers/test_typescript_parser.py index 0f62806..8194ca9 100644 --- a/tests/parsers/test_typescript_parser.py +++ b/tests/parsers/test_typescript_parser.py @@ -61,7 +61,7 @@ async def test_parse_file(self, parser: TypeScriptParser, tmp_path: Path): print(f"{code_file_model=}") assert code_file_model.file_path == str(file_path.absolute()) assert len(code_file_model.imports) == 1 - assert code_file_model.imports[0].source == "mod" + assert code_file_model.imports[0].source == "'mod'" assert code_file_model.imports[0].name == "A" assert len(code_file_model.variables) == 1 assert code_file_model.variables[0].name == "x" @@ -83,10 +83,10 @@ async def test_parse_file_with_root_path(self, parser: TypeScriptParser, tmp_pat class TestTypeScriptParserDetailed: @pytest.mark.parametrize("code, expected_imports", [ - ("import { A } from 'mod';", [ImportStatement(source='mod', name='A')]), - ("import B from 'lib';", [ImportStatement(source='lib', name='B')]), - ("import { X as Y } from 'pkg';", [ImportStatement(source='pkg', name='X', alias='Y')]), - ("import 'side-effect';", [ImportStatement(source='side-effect')]), + ("import { A } from 'mod';", [ImportStatement(source="'mod'", name='A')]), + ("import B from 'lib';", [ImportStatement(source="'lib'", name='B')]), + ("import { X as Y } from 'pkg';", [ImportStatement(source="'pkg'", name='X', alias='Y')]), + ("import 'side-effect';", [ImportStatement(source="'side-effect'")]), ]) def test_parse_imports(self, parser: TypeScriptParser, code, expected_imports): """Tests various import statement formats.""" From 8aa52d894f96a20d6ba47165c9ffe47ec0b4e36f Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Wed, 25 Jun 2025 22:08:38 +0100 Subject: [PATCH 08/13] Refactor import clause processing in TypeScriptParser to return both name and alias --- codetide/parsers/typescript_parser.py | 36 ++++++++++++++++++++------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/codetide/parsers/typescript_parser.py b/codetide/parsers/typescript_parser.py index 6090c85..06a699e 100644 --- a/codetide/parsers/typescript_parser.py +++ b/codetide/parsers/typescript_parser.py @@ -6,8 +6,8 @@ FunctionSignature, Parameter, CodeBase, CodeReference ) +from typing import Optional, Tuple, Union, List, Literal from concurrent.futures import ThreadPoolExecutor -from typing import Optional, Union, List, Literal from tree_sitter import Parser, Language, Node import tree_sitter_typescript as tsts from pydantic import model_validator @@ -115,22 +115,39 @@ def _process_node(cls, node: Node, code: bytes, codeFile: CodeFileModel): cls._process_expression_statement(child, code, codeFile) @classmethod - def _process_import_clause_node(cls, node: Node, code: bytes)->str: + def _process_import_clause_node(cls, node: Node, code: bytes)->Tuple[Optional[str], Optional[str]]: + alias = None + next_is_alias = False for child in node.children: if child.type == "named_imports": for import_child in child.children: if import_child.type == "import_specifier": + for alias_child in import_child.children: + if alias_child.type == "identifier" and not next_is_alias: + name = cls._get_content(code, alias_child) + elif alias_child.type == "as": + next_is_alias = True + elif alias_child.type == "identifier" and next_is_alias: + alias = cls._get_content(code, alias_child) + next_is_alias = False + + if name and alias: + return name, alias + name = cls._get_content(code, import_child) - return name - return - # print(f"{import_child.type=}, {cls._get_content(code, import_child)}") - # print("outside import clause") + return name, alias + + elif child.type == "identifier": + name = cls._get_content(code, child) + return name, alias + + return None, None @classmethod def _process_import_node(cls, node: Node, code: bytes, codeFile: CodeFileModel): source = None name = None - # alias = None + alias = None # is_relative = False next_is_from_import = False next_is_import = False @@ -139,7 +156,7 @@ def _process_import_node(cls, node: Node, code: bytes, codeFile: CodeFileModel): if child.type == "import": next_is_import = True elif child.type == "import_clause" and next_is_import: - name = cls._process_import_clause_node(child, code) + name, alias = cls._process_import_clause_node(child, code) next_is_import = False elif child.type == "from": next_is_from_import = True @@ -153,7 +170,8 @@ def _process_import_node(cls, node: Node, code: bytes, codeFile: CodeFileModel): if source: importStatement = ImportStatement( source=source, - name=name + name=name, + alias=alias ) print(f"{importStatement=}") From 55ed8b3aaea8569fcb05be789330cb3cc6ac41b9 Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Wed, 25 Jun 2025 22:15:12 +0100 Subject: [PATCH 09/13] Enhance import clause processing in TypeScriptParser to handle missing source nodes --- codetide/parsers/typescript_parser.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/codetide/parsers/typescript_parser.py b/codetide/parsers/typescript_parser.py index 06a699e..bf8fb6a 100644 --- a/codetide/parsers/typescript_parser.py +++ b/codetide/parsers/typescript_parser.py @@ -158,6 +158,9 @@ def _process_import_node(cls, node: Node, code: bytes, codeFile: CodeFileModel): elif child.type == "import_clause" and next_is_import: name, alias = cls._process_import_clause_node(child, code) next_is_import = False + elif next_is_import: + source = cls._get_content(code, child) + next_is_import = False elif child.type == "from": next_is_from_import = True elif child.type == "string" and next_is_from_import: From 15ff7b062f521e294828ef09d9645125e51b9b56 Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Wed, 25 Jun 2025 22:22:59 +0100 Subject: [PATCH 10/13] Enhance TypeScriptParser to include debug prints and improve type parameter processing --- codetide/parsers/typescript_parser.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/codetide/parsers/typescript_parser.py b/codetide/parsers/typescript_parser.py index bf8fb6a..45cb6d4 100644 --- a/codetide/parsers/typescript_parser.py +++ b/codetide/parsers/typescript_parser.py @@ -152,7 +152,6 @@ def _process_import_node(cls, node: Node, code: bytes, codeFile: CodeFileModel): next_is_from_import = False next_is_import = False for child in node.children: - print(f"{child.type=}, {cls._get_content(code, child)}") if child.type == "import": next_is_import = True elif child.type == "import_clause" and next_is_import: @@ -276,6 +275,7 @@ def _process_function_definition(cls, node: Node, code: bytes, codeFile: CodeFil decorators = [] raw = cls._get_content(code, node, preserve_indentation=True) for child in node.children: + print(f"{child.type=}, {cls._get_content(code, child)}") if child.type == "identifier" and definition is None: definition = cls._get_content(code, child) elif child.type == "formal_parameters": @@ -342,12 +342,24 @@ def _process_parameters(cls, node: Node, code: bytes) -> List[Parameter]: def _process_type_parameter(cls, node: Node, code: bytes) -> Parameter: parameter = None type_hint = None - default = None + default = None + next_is_assignment = False for child in node.children: if child.type == "identifier" and parameter is None: parameter = cls._get_content(code, child) elif child.type == "type_annotation": - type_hint = cls._get_content(code, child) + next_is_type = False + for type_child in child.children: + if type_child.type == ":": + next_is_type = True + elif next_is_type: + type_hint = cls._get_content(code, type_child) + next_is_type = False + elif child.type == "=": + next_is_assignment = True + elif next_is_assignment: + default = cls._get_content(code, child) + next_is_assignment = False elif child.type == "assignment_expression": for assign_child in child.children: if assign_child.type == "expression": From 5822b58f3e494443aa85b1f2797dacf1cf6e5462 Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Wed, 25 Jun 2025 22:55:19 +0100 Subject: [PATCH 11/13] Refactor TypeScriptParser to streamline content retrieval and enhance method processing --- codetide/parsers/typescript_parser.py | 104 +++++++++++++------------- 1 file changed, 51 insertions(+), 53 deletions(-) diff --git a/codetide/parsers/typescript_parser.py b/codetide/parsers/typescript_parser.py index 45cb6d4..ace7863 100644 --- a/codetide/parsers/typescript_parser.py +++ b/codetide/parsers/typescript_parser.py @@ -85,7 +85,7 @@ def parse_code(self, code: bytes, file_path: Path): root_node = tree.root_node codeFile = CodeFileModel( file_path=str(file_path), - raw=self._get_content(code, root_node, preserve_indentation=True) + raw=self._get_content(code, root_node) ) self._process_node(root_node, code, codeFile) return codeFile @@ -175,7 +175,6 @@ def _process_import_node(cls, node: Node, code: bytes, codeFile: CodeFileModel): name=name, alias=alias ) - print(f"{importStatement=}") codeFile.add_import(importStatement) cls._generate_unique_import_id(codeFile.imports[-1]) @@ -184,13 +183,13 @@ def _process_import_node(cls, node: Node, code: bytes, codeFile: CodeFileModel): def _process_class_node(cls, node: Node, code: bytes, codeFile: CodeFileModel): class_name = None bases = [] - raw = cls._get_content(code, node, preserve_indentation=True) + raw = cls._get_content(code, node) for child in node.children: if child.type == "type_identifier" and class_name is None: class_name = cls._get_content(code, child) - elif child.type == "heritage_clause": + elif child.type == "class_heritage": for base_child in child.children: - if base_child.type == "expression_with_type_arguments": + if base_child.type == "extends_clause": for expr_child in base_child.children: if expr_child.type == "identifier": bases.append(cls._get_content(code, expr_child)) @@ -207,55 +206,30 @@ def _process_class_node(cls, node: Node, code: bytes, codeFile: CodeFileModel): def _process_class_body(cls, node: Node, code: bytes, codeFile: CodeFileModel): for child in node.children: if child.type == "method_definition": - cls._process_method_definition(child, code, codeFile) + cls._process_function_definition(child, code, codeFile, is_method=True) elif child.type == "public_field_definition": cls._process_class_attribute(child, code, codeFile) - @classmethod - def _process_method_definition(cls, node: Node, code: bytes, codeFile: CodeFileModel): - method_name = None - signature = FunctionSignature() - modifiers = [] - decorators = [] - raw = cls._get_content(code, node, preserve_indentation=True) - for child in node.children: - if child.type == "property_identifier" and method_name is None: - method_name = cls._get_content(code, child) - elif child.type == "formal_parameters": - signature.parameters = cls._process_parameters(child, code) - elif child.type == "type_annotation": - signature.return_type = cls._get_content(code, child) - elif child.type == "decorator": - decorators.append(cls._get_content(code, child)) - elif child.type == "public": - modifiers.append("public") - elif child.type == "private": - modifiers.append("private") - elif child.type == "protected": - modifiers.append("protected") - elif child.type == "static": - modifiers.append("static") - elif child.type == "async": - modifiers.append("async") - codeFile.classes[-1].add_method(MethodDefinition( - name=method_name, - signature=signature, - decorators=decorators, - modifiers=modifiers, - raw=raw - )) - @classmethod def _process_class_attribute(cls, node: Node, code: bytes, codeFile: CodeFileModel): attribute = None type_hint = None value = None - raw = cls._get_content(code, node, preserve_indentation=True) + modifiers = [] + next_is_assignment = False + raw = cls._get_content(code, node) for child in node.children: if child.type == "property_identifier" and attribute is None: attribute = cls._get_content(code, child) elif child.type == "type_annotation": - type_hint = cls._get_content(code, child) + type_hint = cls._get_content(code, child).replace(": ", "") + elif child.type == "accessibility_modifier": + modifiers.append(cls._get_content(code, child)) + elif child.type == "=": + next_is_assignment = True + elif next_is_assignment: + value = cls._get_content(code, child) + next_is_assignment = False elif child.type == "assignment_expression": for assign_child in child.children: if assign_child.type == "expression": @@ -263,34 +237,58 @@ def _process_class_attribute(cls, node: Node, code: bytes, codeFile: CodeFileMod codeFile.classes[-1].add_attribute(ClassAttribute( name=attribute, type_hint=type_hint, + modifiers=modifiers, value=value, raw=raw )) @classmethod - def _process_function_definition(cls, node: Node, code: bytes, codeFile: CodeFileModel): + def _process_function_definition(cls, node: Node, code: bytes, codeFile: CodeFileModel, is_method :bool=False): definition = None signature = FunctionSignature() modifiers = [] decorators = [] - raw = cls._get_content(code, node, preserve_indentation=True) + raw = cls._get_content(code, node) for child in node.children: - print(f"{child.type=}, {cls._get_content(code, child)}") if child.type == "identifier" and definition is None: definition = cls._get_content(code, child) + elif child.type == "property_identifier" and definition is None: + definition = cls._get_content(code, child) elif child.type == "formal_parameters": signature.parameters = cls._process_parameters(child, code) elif child.type == "type_annotation": - signature.return_type = cls._get_content(code, child) + signature.return_type = cls._get_content(code, child).replace(": ", "") elif child.type == "async": modifiers.append("async") - codeFile.add_function(FunctionDefinition( - name=definition, - signature=signature, - decorators=decorators, - modifiers=modifiers, - raw=raw - )) + elif child.type == "decorator": + decorators.append(cls._get_content(code, child)) + elif child.type == "public": + modifiers.append("public") + elif child.type == "private": + modifiers.append("private") + elif child.type == "protected": + modifiers.append("protected") + elif child.type == "static": + modifiers.append("static") + elif child.type == "async": + modifiers.append("async") + if not is_method: + codeFile.add_function(FunctionDefinition( + name=definition, + signature=signature, + decorators=decorators, + modifiers=modifiers, + raw=raw + )) + else: + codeFile.classes[-1].add_method(MethodDefinition( + name=definition, + signature=signature, + decorators=decorators, + modifiers=modifiers, + raw=raw + )) + @classmethod def _process_variable_declaration(cls, node: Node, code: bytes, codeFile: CodeFileModel): From e13c46abfa5943451402e544a429fd6079366ddf Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Wed, 25 Jun 2025 22:55:31 +0100 Subject: [PATCH 12/13] Fix type hint assertion in Child class test to remove leading colon --- tests/parsers/test_typescript_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/parsers/test_typescript_parser.py b/tests/parsers/test_typescript_parser.py index 8194ca9..8d21342 100644 --- a/tests/parsers/test_typescript_parser.py +++ b/tests/parsers/test_typescript_parser.py @@ -146,7 +146,7 @@ class Child extends Base1 { assert len(cls.attributes) == 1 attr = cls.attributes[0] assert attr.name == "classAttr" - assert attr.type_hint == ": number" + assert attr.type_hint == "number" assert attr.value == "10" assert len(cls.methods) >= 2 method1 = next(m for m in cls.methods if m.name == "constructor") From 1bfaa3f4208b03f18402345f58ecbb948b253c99 Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Wed, 25 Jun 2025 22:58:16 +0100 Subject: [PATCH 13/13] Add tree-sitter-typescript dependency to requirements --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 5cf0baa..953e4f9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ pydantic==2.10.3 pygit2==1.18.0 pyyaml==6.0.2 tree-sitter==0.24.0 -tree-sitter-python==0.23.6 \ No newline at end of file +tree-sitter-python==0.23.6 +tree-sitter-typescript==0.23.2 \ No newline at end of file