diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 0000000..a3a0bb1 --- /dev/null +++ b/alembic.ini @@ -0,0 +1,147 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts. +# this is typically a path given in POSIX (e.g. forward slashes) +# format, relative to the token %(here)s which refers to the location of this +# ini file +script_location = %(here)s/alembic + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. for multiple paths, the path separator +# is defined by "path_separator" below. +prepend_sys_path = . + + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the tzdata library which can be installed by adding +# `alembic[tz]` to the pip requirements. +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to /versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "path_separator" +# below. +# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions + +# path_separator; This indicates what character is used to split lists of file +# paths, including version_locations and prepend_sys_path within configparser +# files such as alembic.ini. +# The default rendered in new alembic.ini files is "os", which uses os.pathsep +# to provide os-dependent path splitting. +# +# Note that in order to support legacy alembic.ini files, this default does NOT +# take place if path_separator is not present in alembic.ini. If this +# option is omitted entirely, fallback logic is as follows: +# +# 1. Parsing of the version_locations option falls back to using the legacy +# "version_path_separator" key, which if absent then falls back to the legacy +# behavior of splitting on spaces and/or commas. +# 2. Parsing of the prepend_sys_path option falls back to the legacy +# behavior of splitting on spaces, commas, or colons. +# +# Valid values for path_separator are: +# +# path_separator = : +# path_separator = ; +# path_separator = space +# path_separator = newline +# +# Use os.pathsep. Default configuration used for new projects. +path_separator = os + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# database URL. This is consumed by the user-maintained env.py script only. +# other means of configuring database URLs may be customized within the env.py +# file. +sqlalchemy.url = sqlite:///codesage.db + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module +# hooks = ruff +# ruff.type = module +# ruff.module = ruff +# ruff.options = check --fix REVISION_SCRIPT_FILENAME + +# Alternatively, use the exec runner to execute a binary found on your PATH +# hooks = ruff +# ruff.type = exec +# ruff.executable = ruff +# ruff.options = check --fix REVISION_SCRIPT_FILENAME + +# Logging configuration. This is also consumed by the user-maintained +# env.py script only. +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARNING +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARNING +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/alembic/README b/alembic/README new file mode 100644 index 0000000..98e4f9c --- /dev/null +++ b/alembic/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/alembic/env.py b/alembic/env.py new file mode 100644 index 0000000..9cb3808 --- /dev/null +++ b/alembic/env.py @@ -0,0 +1,85 @@ +from logging.config import fileConfig + +from sqlalchemy import engine_from_config +from sqlalchemy import pool + +from alembic import context +import os +import sys + +# Add project root to path +sys.path.insert(0, os.getcwd()) + +from codesage.history.models import Base + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = Base.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure( + connection=connection, target_metadata=target_metadata + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/alembic/script.py.mako b/alembic/script.py.mako new file mode 100644 index 0000000..1101630 --- /dev/null +++ b/alembic/script.py.mako @@ -0,0 +1,28 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + """Upgrade schema.""" + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + """Downgrade schema.""" + ${downgrades if downgrades else "pass"} diff --git a/codesage/cli/commands/scan.py b/codesage/cli/commands/scan.py index 60b7296..e59d1e6 100644 --- a/codesage/cli/commands/scan.py +++ b/codesage/cli/commands/scan.py @@ -7,8 +7,11 @@ from codesage.semantic_digest.python_snapshot_builder import PythonSemanticSnapshotBuilder, SnapshotConfig from codesage.semantic_digest.go_snapshot_builder import GoSemanticSnapshotBuilder from codesage.semantic_digest.shell_snapshot_builder import ShellSemanticSnapshotBuilder -from codesage.snapshot.models import ProjectSnapshot +from codesage.snapshot.models import ProjectSnapshot, Issue, IssueLocation from codesage.reporters import ConsoleReporter, JsonReporter, GitHubPRReporter +from codesage.cli.plugin_loader import PluginManager +from codesage.history.store import StorageEngine +from codesage.core.interfaces import CodeIssue def get_builder(language: str, path: Path): config = SnapshotConfig() @@ -28,11 +31,25 @@ def get_builder(language: str, path: Path): @click.option('--output', '-o', help='Output path for JSON reporter.') @click.option('--fail-on-high', is_flag=True, help='Exit with non-zero code if high severity issues are found.') @click.option('--ci-mode', is_flag=True, help='Enable CI mode (auto-detect GitHub environment).') +@click.option('--plugins-dir', default='.codesage/plugins', help='Directory containing plugins.') +@click.option('--db-url', default='sqlite:///codesage.db', help='Database URL for storage.') @click.pass_context -def scan(ctx, path, language, reporter, output, fail_on_high, ci_mode): +def scan(ctx, path, language, reporter, output, fail_on_high, ci_mode, plugins_dir, db_url): """ Scan the codebase and report issues. """ + # 1. Initialize Database + try: + storage = StorageEngine(db_url) + click.echo(f"Connected to storage: {db_url}") + except Exception as e: + click.echo(f"Warning: Could not connect to storage: {e}", err=True) + storage = None + + # 2. Load Plugins + plugin_manager = PluginManager(plugins_dir) + plugin_manager.load_plugins() + click.echo(f"Scanning {path} for {language}...") root_path = Path(path) @@ -44,6 +61,57 @@ def scan(ctx, path, language, reporter, output, fail_on_high, ci_mode): try: snapshot: ProjectSnapshot = builder.build() + + # 3. Apply Custom Rules (Plugins) + for rule in plugin_manager.rules: + for file_path, file_snapshot in snapshot.files.items(): + try: + content = "" + full_path = root_path / file_path + if full_path.exists(): + content = full_path.read_text(errors='ignore') + + issues = rule.check(str(file_path), content, {}) + if issues: + for i in issues: + # Convert plugin CodeIssue to standard Issue model + + # Map severity to Issue severity Literal + severity = "warning" + if i.severity.lower() in ["info", "warning", "error"]: + severity = i.severity.lower() + elif i.severity.lower() == "high": + severity = "error" + elif i.severity.lower() == "low": + severity = "info" + + new_issue = Issue( + rule_id=rule.id, + severity=severity, + message=i.description, + location=IssueLocation( + file_path=str(file_path), + line=i.line_number + ), + symbol=None, + tags=["custom-rule"] + ) + + if file_snapshot.issues is None: + file_snapshot.issues = [] + file_snapshot.issues.append(new_issue) + + except Exception as e: + click.echo(f"Error running rule {rule.id} on {file_path}: {e}", err=True) + + # 4. Save to Storage + if storage: + try: + storage.save_snapshot(snapshot.metadata.project_name, snapshot) + click.echo("Snapshot saved to database.") + except Exception as e: + click.echo(f"Failed to save snapshot: {e}", err=True) + except Exception as e: click.echo(f"Scan failed: {e}", err=True) ctx.exit(1) diff --git a/codesage/cli/plugin_loader.py b/codesage/cli/plugin_loader.py index a62c0d6..5277521 100644 --- a/codesage/cli/plugin_loader.py +++ b/codesage/cli/plugin_loader.py @@ -1,50 +1,77 @@ import importlib.util import inspect import os +import sys +import logging from pathlib import Path -import click - -def load_plugins(cli_group, plugins_dir=".codesage/plugins"): - """ - Dynamically loads plugins from the specified directory. - """ - plugins_path = Path(plugins_dir) - if not plugins_path.exists(): - return - - for plugin_file in plugins_path.glob("*.py"): - try: - spec = importlib.util.spec_from_file_location(plugin_file.stem, plugin_file) - plugin_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(plugin_module) - - if hasattr(plugin_module, 'register_command') and callable(plugin_module.register_command): - plugin_module.register_command(cli_group) - else: - click.echo(f"Warning: Plugin {plugin_file.name} does not have a 'register_command' function.", err=True) - except Exception as e: - click.echo(f"Warning: Could not load plugin {plugin_file.name}: {e}", err=True) - -if __name__ == '__main__': - @click.group() - def cli(): - pass - - # To test, create a dummy plugin file in .codesage/plugins - # e.g., .codesage/plugins/my_plugin.py - # import click - # def register_command(cli_group): - # @cli_group.command() - # def hello(): - # click.echo("Hello from plugin!") - - os.makedirs(".codesage/plugins", exist_ok=True) - with open(".codesage/plugins/my_plugin.py", "w") as f: - f.write("import click\n") - f.write("def register_command(cli_group):\n") - f.write(" @cli_group.command()\n") - f.write(" def hello():\n") - f.write(" click.echo('Hello from plugin!')\n") - - load_plugins(cli) - cli() +from typing import List, Type + +from codesage.core.interfaces import Plugin, Rule, Analyzer + +logger = logging.getLogger(__name__) + +class PluginManager: + def __init__(self, plugin_dir: str): + self.plugin_dir = Path(plugin_dir) + self.loaded_plugins: List[Plugin] = [] + self.rules: List[Rule] = [] + self.analyzers: List[Analyzer] = [] + + def load_plugins(self, engine_context=None): + """ + Scans plugin_dir for .py files and loads them. + """ + if not self.plugin_dir.exists(): + logger.warning(f"Plugin directory {self.plugin_dir} does not exist.") + return + + logger.info(f"Scanning for plugins in {self.plugin_dir}") + sys.path.insert(0, str(self.plugin_dir)) + + for plugin_file in self.plugin_dir.glob("*.py"): + if plugin_file.name.startswith("__"): + continue + + try: + module_name = plugin_file.stem + spec = importlib.util.spec_from_file_location(module_name, plugin_file) + if not spec or not spec.loader: + continue + + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + # Check for classes implementing Plugin interface + found_plugin = False + for name, obj in inspect.getmembers(module): + if inspect.isclass(obj) and issubclass(obj, Plugin) and obj is not Plugin: + try: + plugin_instance = obj() + plugin_instance.register(self) + self.loaded_plugins.append(plugin_instance) + found_plugin = True + logger.info(f"Loaded plugin: {name}") + except Exception as e: + logger.error(f"Error registering plugin {name}: {e}") + + if not found_plugin: + # Fallback: check for a standalone 'register' function + if hasattr(module, 'register') and callable(module.register): + try: + module.register(self) + logger.info(f"Loaded plugin from module: {module_name}") + except Exception as e: + logger.error(f"Error executing register() in {module_name}: {e}") + + except Exception as e: + logger.error(f"Failed to load plugin {plugin_file.name}: {e}") + + def register_rule(self, rule: Rule): + self.rules.append(rule) + logger.debug(f"Registered rule: {rule.id}") + + def register_analyzer(self, analyzer: Analyzer): + self.analyzers.append(analyzer) + logger.debug(f"Registered analyzer: {analyzer.id}") + +# Helper to expose a default manager if needed, but likely instantiated in CLI diff --git a/codesage/core/interfaces.py b/codesage/core/interfaces.py new file mode 100644 index 0000000..22be6f9 --- /dev/null +++ b/codesage/core/interfaces.py @@ -0,0 +1,53 @@ +from abc import ABC, abstractmethod +from typing import List, Dict, Any, Optional +from pydantic import BaseModel + +class CodeIssue(BaseModel): + """ + Represents a generic issue found in code. + """ + file_path: str + line_number: int + severity: str # low, medium, high, error + description: str + rule_id: str + context: Optional[str] = None + +class Plugin(ABC): + """ + Base interface for all plugins. + """ + @abstractmethod + def register(self, engine: Any) -> None: + """ + Registers the plugin components (rules, analyzers) with the engine. + """ + pass + +class Rule(ABC): + """ + Base interface for a static analysis rule. + """ + id: str + description: str + severity: str = "medium" + + @abstractmethod + def check(self, file_path: str, content: str, context: Dict[str, Any]) -> List[CodeIssue]: + """ + Checks the file content for violations of this rule. + """ + pass + +class Analyzer(ABC): + """ + Base interface for a custom analyzer (e.g., checking broader scope than a single file). + """ + id: str + + @abstractmethod + def analyze(self, project_path: str) -> List[CodeIssue]: + """ + Analyzes the project to find issues. + """ + pass diff --git a/codesage/history/models.py b/codesage/history/models.py index 7fab0ef..fb9c8cb 100644 --- a/codesage/history/models.py +++ b/codesage/history/models.py @@ -1,28 +1,58 @@ from datetime import datetime from typing import List, Optional -from pydantic import BaseModel, Field +from sqlalchemy import Column, Integer, String, ForeignKey, Text, DateTime, JSON +from sqlalchemy.orm import relationship, declarative_base +from sqlalchemy.sql import func -from codesage.snapshot.models import ProjectSnapshot +Base = declarative_base() +class Project(Base): + __tablename__ = 'projects' -class SnapshotMeta(BaseModel): - """Metadata for a single snapshot.""" - project_name: str - snapshot_id: str # e.g., commit hash or timestamp - created_at: datetime = Field(default_factory=datetime.utcnow) - branch: Optional[str] = None - commit: Optional[str] = None - trigger: Optional[str] = None # e.g., 'ci', 'manual' + id = Column(Integer, primary_key=True) + name = Column(String(255), unique=True, nullable=False) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + snapshots = relationship("Snapshot", back_populates="project", cascade="all, delete-orphan") -class HistoricalSnapshot(BaseModel): - """A snapshot with its metadata.""" - meta: SnapshotMeta - snapshot: ProjectSnapshot +class Snapshot(Base): + __tablename__ = 'snapshots' + id = Column(Integer, primary_key=True) + project_id = Column(Integer, ForeignKey('projects.id'), nullable=False) + timestamp = Column(DateTime(timezone=True), default=func.now()) + commit_hash = Column(String(40)) + branch = Column(String(255)) + risk_score = Column(Integer, default=0) -class SnapshotIndex(BaseModel): - """An index of all snapshots for a project.""" - project_name: str - items: List[SnapshotMeta] = Field(default_factory=list) + # Store raw metrics as JSON if structure varies or for flexibility + metrics = Column(JSON, nullable=True) + + project = relationship("Project", back_populates="snapshots") + issues = relationship("Issue", back_populates="snapshot", cascade="all, delete-orphan") + dependencies = relationship("Dependency", back_populates="snapshot", cascade="all, delete-orphan") + +class Issue(Base): + __tablename__ = 'issues' + + id = Column(Integer, primary_key=True) + snapshot_id = Column(Integer, ForeignKey('snapshots.id'), nullable=False) + file_path = Column(String(512), nullable=False) + line_number = Column(Integer) + severity = Column(String(20)) # LOW, MEDIUM, HIGH, ERROR + rule_id = Column(String(100)) + description = Column(Text) + + snapshot = relationship("Snapshot", back_populates="issues") + +class Dependency(Base): + __tablename__ = 'dependencies' + + id = Column(Integer, primary_key=True) + snapshot_id = Column(Integer, ForeignKey('snapshots.id'), nullable=False) + source_file = Column(String(512)) + target_file = Column(String(512)) + type = Column(String(50)) # import, call, inheritance + + snapshot = relationship("Snapshot", back_populates="dependencies") diff --git a/codesage/history/store.py b/codesage/history/store.py index fc35c41..4a865f3 100644 --- a/codesage/history/store.py +++ b/codesage/history/store.py @@ -1,72 +1,155 @@ -from pathlib import Path -from typing import Optional - -import yaml - -from codesage.config.history import HistoryConfig -from codesage.history.models import HistoricalSnapshot, SnapshotIndex, SnapshotMeta +import logging +from typing import Optional, List +from sqlalchemy import create_engine, select, desc +from sqlalchemy.orm import sessionmaker, Session +from codesage.history.models import Base, Project, Snapshot, Issue, Dependency from codesage.snapshot.models import ProjectSnapshot +logger = logging.getLogger(__name__) + +class StorageEngine: + def __init__(self, db_url: str = "sqlite:///codesage.db"): + self.engine = create_engine(db_url) + Base.metadata.create_all(self.engine) + self.SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self.engine) + + def get_session(self) -> Session: + return self.SessionLocal() + + def save_snapshot(self, project_name: str, snapshot_data: ProjectSnapshot) -> Snapshot: + """ + Saves a ProjectSnapshot to the database. + """ + session = self.get_session() + try: + # Get or create Project + project = session.execute(select(Project).where(Project.name == project_name)).scalar_one_or_none() + if not project: + project = Project(name=project_name) + session.add(project) + session.commit() + session.refresh(project) + + # Create Snapshot record + meta = snapshot_data.metadata + + # Handle field differences between SnapshotMetadata and SQLAlchemy model + commit_hash = getattr(meta, 'git_commit', None) + # branch is not in SnapshotMetadata currently, so we default to None or get from somewhere else if possible + # The Pydantic model has `git_commit`, not `commit_hash`. + + db_snapshot = Snapshot( + project_id=project.id, + commit_hash=commit_hash, + branch=None, # Metadata doesn't strictly have branch in current definition + # Assuming risk_score is available in summary, else calculate or default + # risk_summary has avg_risk, high_risk_files etc. Not a single 'score'. + # Let's use avg_risk * 100 as a proxy for score if 'score' field is missing. + risk_score=int(snapshot_data.risk_summary.avg_risk * 100) if snapshot_data.risk_summary else 0, + metrics=snapshot_data.model_dump(mode='json', exclude={'files', 'issues_summary', 'risk_summary', 'metadata'}) # Store simplified metrics + ) + session.add(db_snapshot) + session.commit() + session.refresh(db_snapshot) + + # Save Issues + # Assuming snapshot_data.issues_summary contains list of issues or we iterate files? + # ProjectSnapshot usually has `files` which contain `issues`. + # But checking `ProjectSnapshot` model is important. + # Let's assume we iterate over files and their issues if available, + # OR if there is a global issue list. + # The `ProjectSnapshot` likely aggregates issues. + # Let's check `ProjectIssuesSummary` structure or where issues are stored. + + # For now, we iterate files to find issues if not readily available in a flat list. + # Wait, `ProjectSnapshot` has `files` which is `Dict[str, FileSnapshot]`. + # `FileSnapshot` has `issues: List[Issue]`. + + for file_path, file_snapshot in snapshot_data.files.items(): + if file_snapshot.issues: + for issue in file_snapshot.issues: + db_issue = Issue( + snapshot_id=db_snapshot.id, + file_path=file_path, + line_number=issue.line, + severity=issue.severity, + rule_id=getattr(issue, 'category', 'unknown'), # Assuming category or rule_id exists + description=issue.message + ) + session.add(db_issue) + + session.commit() + # Expunge or make transient if we want to use it outside session? + # Or keep session open? The design here closes session. + # So we should detach and maybe eagerly load if needed. + # But `id` should be available if we refreshed inside session? + # Wait, `session.refresh(db_snapshot)` was called. + # But accessing attributes after close triggers reload. + session.refresh(db_snapshot) + # We need to eager load attributes if we close session, or make object transient. + # Expunge removes it from session, but doesn't load lazy attributes. + # To allow access after session close, we should either: + # 1. Not close session inside this method (let caller manage it). + # 2. Return a DTO. + # 3. Configure eager loading. + # 4. Expunge AFTER refreshing and loading what we need? + # Actually, session.refresh() re-attaches if needed but session is closing. + # The standard practice with SQLAlchemy ORM and disconnected objects is tricky. + # Let's detach it properly. `session.expunge` detaches. + # But if we access unloaded attributes later (like id was just refreshed), it should be fine IF they are loaded. + # `refresh` loads them. + # Eagerly load project to avoid DetachedInstanceError + # This is just for convenience in tests/returns, usually we don't need to return the full graph. + # But since we are using it in tests to verify: + _ = db_snapshot.project + + session.expunge(db_snapshot) + return db_snapshot + except Exception as e: + session.rollback() + logger.error(f"Failed to save snapshot: {e}") + raise + finally: + session.close() + + def get_latest_snapshot(self, project_name: str) -> Optional[Snapshot]: + session = self.get_session() + try: + project = session.execute(select(Project).where(Project.name == project_name)).scalar_one_or_none() + if not project: + return None + + stmt = select(Snapshot).where(Snapshot.project_id == project.id).order_by(desc(Snapshot.timestamp)).limit(1) + return session.execute(stmt).scalar_one_or_none() + finally: + session.close() + + def get_history(self, project_name: str, limit: int = 10) -> List[Snapshot]: + session = self.get_session() + try: + project = session.execute(select(Project).where(Project.name == project_name)).scalar_one_or_none() + if not project: + return [] + + stmt = select(Snapshot).where(Snapshot.project_id == project.id).order_by(desc(Snapshot.timestamp)).limit(limit) + return session.execute(stmt).scalars().all() + finally: + session.close() + +# Legacy functions for compatibility during migration (if needed by other modules) +# or we can remove them if we are sure. +# The prompt says "MODIFY: codesage/history/store.py (refactor to use StorageEngine)". +# I will expose a global engine instance or helper functions that use it. + +_engine: Optional[StorageEngine] = None + +def init_storage(db_url: str): + global _engine + _engine = StorageEngine(db_url) + +def get_storage() -> StorageEngine: + global _engine + if _engine is None: + _engine = StorageEngine() # Default to sqlite + return _engine -def save_historical_snapshot(root: Path, hs: HistoricalSnapshot, config: HistoryConfig) -> None: - """Saves a historical snapshot and updates the index.""" - project_dir = root / hs.meta.project_name - project_dir.mkdir(parents=True, exist_ok=True) - - snapshot_file = project_dir / f"{hs.meta.snapshot_id}.yaml" - - # Use Pydantic's `model_dump` for v2, which is equivalent to `dict` in v1 - data = hs.snapshot.model_dump(mode='json') - - with snapshot_file.open("w", encoding="utf-8") as f: - yaml.safe_dump(data, f, sort_keys=False, allow_unicode=True) - - update_snapshot_index(root, hs.meta, config.max_snapshots) - - -def load_historical_snapshot(root: Path, project: str, snapshot_id: str) -> HistoricalSnapshot: - """Loads a historical snapshot.""" - snapshot_file = root / project / f"{snapshot_id}.yaml" - with snapshot_file.open("r", encoding="utf-8") as f: - raw_snapshot = yaml.safe_load(f) - - snapshot = ProjectSnapshot.model_validate(raw_snapshot) - - index = load_snapshot_index(root, project) - meta = next((m for m in index.items if m.snapshot_id == snapshot_id), None) - - if not meta: - raise FileNotFoundError(f"Snapshot metadata for id {snapshot_id} not found in index.") - - return HistoricalSnapshot(meta=meta, snapshot=snapshot) - - -def load_snapshot_index(root: Path, project: str) -> SnapshotIndex: - """Loads the snapshot index for a project.""" - index_file = root / project / "index.yaml" - if not index_file.exists(): - return SnapshotIndex(project_name=project, items=[]) - - with index_file.open("r", encoding="utf-8") as f: - raw = yaml.safe_load(f) - - return SnapshotIndex.model_validate(raw) - - -def update_snapshot_index(root: Path, meta: SnapshotMeta, max_snapshots: int) -> None: - """Updates the snapshot index for a project.""" - index_file = root / meta.project_name / "index.yaml" - - index = load_snapshot_index(root, meta.project_name) - - # Avoid adding duplicate entries - index.items = [item for item in index.items if item.snapshot_id != meta.snapshot_id] - - index.items.append(meta) - index.items.sort(key=lambda m: m.created_at, reverse=True) - - if max_snapshots > 0: - index.items = index.items[:max_snapshots] - - with index_file.open("w", encoding="utf-8") as f: - yaml.safe_dump(index.model_dump(mode='json'), f, sort_keys=False, allow_unicode=True) diff --git a/examples/plugins/my_custom_rule.py b/examples/plugins/my_custom_rule.py new file mode 100644 index 0000000..ec06e7c --- /dev/null +++ b/examples/plugins/my_custom_rule.py @@ -0,0 +1,25 @@ +from typing import List, Dict, Any +from codesage.core.interfaces import Plugin, Rule, CodeIssue + +class NoPrintRule(Rule): + id = "custom-no-print" + description = "Avoid using print() in production code." + severity = "medium" + + def check(self, file_path: str, content: str, context: Dict[str, Any]) -> List[CodeIssue]: + issues = [] + lines = content.split('\n') + for i, line in enumerate(lines): + if "print(" in line and not line.strip().startswith("#"): + issues.append(CodeIssue( + file_path=file_path, + line_number=i + 1, + severity=self.severity, + description=self.description, + rule_id=self.id + )) + return issues + +class MyCustomPlugin(Plugin): + def register(self, engine): + engine.register_rule(NoPrintRule()) diff --git a/poetry.lock b/poetry.lock index 2965ad7..ad972cb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,5 +1,26 @@ # This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. +[[package]] +name = "alembic" +version = "1.17.2" +description = "A database migration tool for SQLAlchemy." +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "alembic-1.17.2-py3-none-any.whl", hash = "sha256:f483dd1fe93f6c5d49217055e4d15b905b425b6af906746abb35b69c1996c4e6"}, + {file = "alembic-1.17.2.tar.gz", hash = "sha256:bbe9751705c5e0f14877f02d46c53d10885e377e3d90eda810a016f9baa19e8e"}, +] + +[package.dependencies] +Mako = "*" +SQLAlchemy = ">=1.4.0" +tomli = {version = "*", markers = "python_version < \"3.11\""} +typing-extensions = ">=4.12" + +[package.extras] +tz = ["tzdata"] + [[package]] name = "annotated-types" version = "0.7.0" @@ -920,6 +941,26 @@ html-clean = ["lxml_html_clean"] html5 = ["html5lib"] htmlsoup = ["BeautifulSoup4"] +[[package]] +name = "mako" +version = "1.3.10" +description = "A super-fast templating language that borrows the best ideas from the existing templating languages." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59"}, + {file = "mako-1.3.10.tar.gz", hash = "sha256:99579a6f39583fa7e5630a28c3c1f440e4e97a414b80372649c0ce338da2ea28"}, +] + +[package.dependencies] +MarkupSafe = ">=0.9.2" + +[package.extras] +babel = ["Babel"] +lingua = ["lingua"] +testing = ["pytest"] + [[package]] name = "markdown-it-py" version = "4.0.0" @@ -1889,6 +1930,102 @@ files = [ {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, ] +[[package]] +name = "sqlalchemy" +version = "2.0.44" +description = "Database Abstraction Library" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "SQLAlchemy-2.0.44-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:471733aabb2e4848d609141a9e9d56a427c0a038f4abf65dd19d7a21fd563632"}, + {file = "SQLAlchemy-2.0.44-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48bf7d383a35e668b984c805470518b635d48b95a3c57cb03f37eaa3551b5f9f"}, + {file = "SQLAlchemy-2.0.44-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bf4bb6b3d6228fcf3a71b50231199fb94d2dd2611b66d33be0578ea3e6c2726"}, + {file = "SQLAlchemy-2.0.44-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:e998cf7c29473bd077704cea3577d23123094311f59bdc4af551923b168332b1"}, + {file = "SQLAlchemy-2.0.44-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:ebac3f0b5732014a126b43c2b7567f2f0e0afea7d9119a3378bde46d3dcad88e"}, + {file = "SQLAlchemy-2.0.44-cp37-cp37m-win32.whl", hash = "sha256:3255d821ee91bdf824795e936642bbf43a4c7cedf5d1aed8d24524e66843aa74"}, + {file = "SQLAlchemy-2.0.44-cp37-cp37m-win_amd64.whl", hash = "sha256:78e6c137ba35476adb5432103ae1534f2f5295605201d946a4198a0dea4b38e7"}, + {file = "sqlalchemy-2.0.44-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c77f3080674fc529b1bd99489378c7f63fcb4ba7f8322b79732e0258f0ea3ce"}, + {file = "sqlalchemy-2.0.44-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4c26ef74ba842d61635b0152763d057c8d48215d5be9bb8b7604116a059e9985"}, + {file = "sqlalchemy-2.0.44-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4a172b31785e2f00780eccab00bc240ccdbfdb8345f1e6063175b3ff12ad1b0"}, + {file = "sqlalchemy-2.0.44-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9480c0740aabd8cb29c329b422fb65358049840b34aba0adf63162371d2a96e"}, + {file = "sqlalchemy-2.0.44-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:17835885016b9e4d0135720160db3095dc78c583e7b902b6be799fb21035e749"}, + {file = "sqlalchemy-2.0.44-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cbe4f85f50c656d753890f39468fcd8190c5f08282caf19219f684225bfd5fd2"}, + {file = "sqlalchemy-2.0.44-cp310-cp310-win32.whl", hash = "sha256:2fcc4901a86ed81dc76703f3b93ff881e08761c63263c46991081fd7f034b165"}, + {file = "sqlalchemy-2.0.44-cp310-cp310-win_amd64.whl", hash = "sha256:9919e77403a483ab81e3423151e8ffc9dd992c20d2603bf17e4a8161111e55f5"}, + {file = "sqlalchemy-2.0.44-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fe3917059c7ab2ee3f35e77757062b1bea10a0b6ca633c58391e3f3c6c488dd"}, + {file = "sqlalchemy-2.0.44-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:de4387a354ff230bc979b46b2207af841dc8bf29847b6c7dbe60af186d97aefa"}, + {file = "sqlalchemy-2.0.44-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3678a0fb72c8a6a29422b2732fe423db3ce119c34421b5f9955873eb9b62c1e"}, + {file = "sqlalchemy-2.0.44-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cf6872a23601672d61a68f390e44703442639a12ee9dd5a88bbce52a695e46e"}, + {file = "sqlalchemy-2.0.44-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:329aa42d1be9929603f406186630135be1e7a42569540577ba2c69952b7cf399"}, + {file = "sqlalchemy-2.0.44-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:70e03833faca7166e6a9927fbee7c27e6ecde436774cd0b24bbcc96353bce06b"}, + {file = "sqlalchemy-2.0.44-cp311-cp311-win32.whl", hash = "sha256:253e2f29843fb303eca6b2fc645aca91fa7aa0aa70b38b6950da92d44ff267f3"}, + {file = "sqlalchemy-2.0.44-cp311-cp311-win_amd64.whl", hash = "sha256:7a8694107eb4308a13b425ca8c0e67112f8134c846b6e1f722698708741215d5"}, + {file = "sqlalchemy-2.0.44-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:72fea91746b5890f9e5e0997f16cbf3d53550580d76355ba2d998311b17b2250"}, + {file = "sqlalchemy-2.0.44-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:585c0c852a891450edbb1eaca8648408a3cc125f18cf433941fa6babcc359e29"}, + {file = "sqlalchemy-2.0.44-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b94843a102efa9ac68a7a30cd46df3ff1ed9c658100d30a725d10d9c60a2f44"}, + {file = "sqlalchemy-2.0.44-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:119dc41e7a7defcefc57189cfa0e61b1bf9c228211aba432b53fb71ef367fda1"}, + {file = "sqlalchemy-2.0.44-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0765e318ee9179b3718c4fd7ba35c434f4dd20332fbc6857a5e8df17719c24d7"}, + {file = "sqlalchemy-2.0.44-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2e7b5b079055e02d06a4308d0481658e4f06bc7ef211567edc8f7d5dce52018d"}, + {file = "sqlalchemy-2.0.44-cp312-cp312-win32.whl", hash = "sha256:846541e58b9a81cce7dee8329f352c318de25aa2f2bbe1e31587eb1f057448b4"}, + {file = "sqlalchemy-2.0.44-cp312-cp312-win_amd64.whl", hash = "sha256:7cbcb47fd66ab294703e1644f78971f6f2f1126424d2b300678f419aa73c7b6e"}, + {file = "sqlalchemy-2.0.44-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ff486e183d151e51b1d694c7aa1695747599bb00b9f5f604092b54b74c64a8e1"}, + {file = "sqlalchemy-2.0.44-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0b1af8392eb27b372ddb783b317dea0f650241cea5bd29199b22235299ca2e45"}, + {file = "sqlalchemy-2.0.44-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b61188657e3a2b9ac4e8f04d6cf8e51046e28175f79464c67f2fd35bceb0976"}, + {file = "sqlalchemy-2.0.44-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b87e7b91a5d5973dda5f00cd61ef72ad75a1db73a386b62877d4875a8840959c"}, + {file = "sqlalchemy-2.0.44-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:15f3326f7f0b2bfe406ee562e17f43f36e16167af99c4c0df61db668de20002d"}, + {file = "sqlalchemy-2.0.44-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1e77faf6ff919aa8cd63f1c4e561cac1d9a454a191bb864d5dd5e545935e5a40"}, + {file = "sqlalchemy-2.0.44-cp313-cp313-win32.whl", hash = "sha256:ee51625c2d51f8baadf2829fae817ad0b66b140573939dd69284d2ba3553ae73"}, + {file = "sqlalchemy-2.0.44-cp313-cp313-win_amd64.whl", hash = "sha256:c1c80faaee1a6c3428cecf40d16a2365bcf56c424c92c2b6f0f9ad204b899e9e"}, + {file = "sqlalchemy-2.0.44-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2fc44e5965ea46909a416fff0af48a219faefd5773ab79e5f8a5fcd5d62b2667"}, + {file = "sqlalchemy-2.0.44-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:dc8b3850d2a601ca2320d081874033684e246d28e1c5e89db0864077cfc8f5a9"}, + {file = "sqlalchemy-2.0.44-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d733dec0614bb8f4bcb7c8af88172b974f685a31dc3a65cca0527e3120de5606"}, + {file = "sqlalchemy-2.0.44-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22be14009339b8bc16d6b9dc8780bacaba3402aa7581658e246114abbd2236e3"}, + {file = "sqlalchemy-2.0.44-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:357bade0e46064f88f2c3a99808233e67b0051cdddf82992379559322dfeb183"}, + {file = "sqlalchemy-2.0.44-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4848395d932e93c1595e59a8672aa7400e8922c39bb9b0668ed99ac6fa867822"}, + {file = "sqlalchemy-2.0.44-cp38-cp38-win32.whl", hash = "sha256:2f19644f27c76f07e10603580a47278abb2a70311136a7f8fd27dc2e096b9013"}, + {file = "sqlalchemy-2.0.44-cp38-cp38-win_amd64.whl", hash = "sha256:1df4763760d1de0dfc8192cc96d8aa293eb1a44f8f7a5fbe74caf1b551905c5e"}, + {file = "sqlalchemy-2.0.44-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f7027414f2b88992877573ab780c19ecb54d3a536bef3397933573d6b5068be4"}, + {file = "sqlalchemy-2.0.44-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3fe166c7d00912e8c10d3a9a0ce105569a31a3d0db1a6e82c4e0f4bf16d5eca9"}, + {file = "sqlalchemy-2.0.44-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3caef1ff89b1caefc28f0368b3bde21a7e3e630c2eddac16abd9e47bd27cc36a"}, + {file = "sqlalchemy-2.0.44-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc2856d24afa44295735e72f3c75d6ee7fdd4336d8d3a8f3d44de7aa6b766df2"}, + {file = "sqlalchemy-2.0.44-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:11bac86b0deada30b6b5f93382712ff0e911fe8d31cb9bf46e6b149ae175eff0"}, + {file = "sqlalchemy-2.0.44-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:4d18cd0e9a0f37c9f4088e50e3839fcb69a380a0ec957408e0b57cff08ee0a26"}, + {file = "sqlalchemy-2.0.44-cp39-cp39-win32.whl", hash = "sha256:9e9018544ab07614d591a26c1bd4293ddf40752cc435caf69196740516af7100"}, + {file = "sqlalchemy-2.0.44-cp39-cp39-win_amd64.whl", hash = "sha256:8e0e4e66fd80f277a8c3de016a81a554e76ccf6b8d881ee0b53200305a8433f6"}, + {file = "sqlalchemy-2.0.44-py3-none-any.whl", hash = "sha256:19de7ca1246fbef9f9d1bff8f1ab25641569df226364a0e40457dc5457c54b05"}, + {file = "sqlalchemy-2.0.44.tar.gz", hash = "sha256:0ae7454e1ab1d780aee69fd2aae7d6b8670a581d8847f2d1e0f7ddfbf47e5a22"}, +] + +[package.dependencies] +greenlet = {version = ">=1", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} +typing-extensions = ">=4.6.0" + +[package.extras] +aiomysql = ["aiomysql (>=0.2.0)", "greenlet (>=1)"] +aioodbc = ["aioodbc", "greenlet (>=1)"] +aiosqlite = ["aiosqlite", "greenlet (>=1)", "typing_extensions (!=3.10.0.1)"] +asyncio = ["greenlet (>=1)"] +asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (>=1)"] +mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5,!=1.1.10)"] +mssql = ["pyodbc"] +mssql-pymssql = ["pymssql"] +mssql-pyodbc = ["pyodbc"] +mypy = ["mypy (>=0.910)"] +mysql = ["mysqlclient (>=1.4.0)"] +mysql-connector = ["mysql-connector-python"] +oracle = ["cx_oracle (>=8)"] +oracle-oracledb = ["oracledb (>=1.0.1)"] +postgresql = ["psycopg2 (>=2.7)"] +postgresql-asyncpg = ["asyncpg", "greenlet (>=1)"] +postgresql-pg8000 = ["pg8000 (>=1.29.1)"] +postgresql-psycopg = ["psycopg (>=3.0.7)"] +postgresql-psycopg2binary = ["psycopg2-binary"] +postgresql-psycopg2cffi = ["psycopg2cffi"] +postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] +pymysql = ["pymysql"] +sqlcipher = ["sqlcipher3_binary"] + [[package]] name = "starlette" version = "0.27.0" @@ -2274,4 +2411,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.10,<4.0" -content-hash = "47eeb507e0337b802684ac2787b3ea877ec7f6acc09dc23b2ba8416b9d0a9ec2" +content-hash = "f13fa092b27845b737b031954ecdc17d5a01b2edc4c94ece037bf4a98d8e0fe6" diff --git a/pyproject.toml b/pyproject.toml index 5cdcff5..7136bd7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,8 @@ tree-sitter-bash = "^0.25.0" tiktoken = "^0.12.0" python-multipart = "^0.0.20" playwright = "^1.56.0" +sqlalchemy = "^2.0.44" +alembic = "^1.17.2" [tool.poetry.dev-dependencies] diff --git a/tests/unit/core/test_plugin_loader.py b/tests/unit/core/test_plugin_loader.py new file mode 100644 index 0000000..a58f8c7 --- /dev/null +++ b/tests/unit/core/test_plugin_loader.py @@ -0,0 +1,39 @@ +import os +import pytest +from codesage.cli.plugin_loader import PluginManager +from codesage.core.interfaces import Plugin, Rule, CodeIssue + +class MockRule(Rule): + id = "mock-rule" + description = "Mock rule" + + def check(self, file_path, content, context): + return [] + +class MockPlugin(Plugin): + def register(self, engine): + engine.register_rule(MockRule()) + +def test_plugin_manager_load(tmp_path): + # Create a dummy plugin file + plugin_file = tmp_path / "test_plugin.py" + content = """ +from codesage.core.interfaces import Plugin, Rule + +class TestRule(Rule): + id = "test-rule" + description = "Test rule" + def check(self, f, c, ctx): return [] + +class TestPlugin(Plugin): + def register(self, engine): + engine.register_rule(TestRule()) +""" + plugin_file.write_text(content) + + manager = PluginManager(str(tmp_path)) + manager.load_plugins() + + assert len(manager.loaded_plugins) == 1 + assert len(manager.rules) == 1 + assert manager.rules[0].id == "test-rule" diff --git a/tests/unit/history/test_storage_engine.py b/tests/unit/history/test_storage_engine.py new file mode 100644 index 0000000..26118d1 --- /dev/null +++ b/tests/unit/history/test_storage_engine.py @@ -0,0 +1,88 @@ +import os +import tempfile +import pytest +from sqlalchemy import create_engine, inspect +from codesage.history.store import StorageEngine +from codesage.history.models import Project, Snapshot, Issue +from codesage.snapshot.models import ProjectSnapshot, ProjectRiskSummary, ProjectIssuesSummary, SnapshotMetadata + +@pytest.fixture +def db_url(): + _, path = tempfile.mkstemp() + url = f"sqlite:///{path}" + yield url + os.remove(path) + +def test_storage_engine_creation(db_url): + engine = StorageEngine(db_url) + assert engine is not None + + # Verify tables exist + inspector = inspect(engine.engine) + tables = inspector.get_table_names() + assert "projects" in tables + assert "snapshots" in tables + assert "issues" in tables + +def test_save_and_retrieve_snapshot(db_url): + storage = StorageEngine(db_url) + + # Create a dummy ProjectSnapshot + metadata = SnapshotMetadata.model_construct( + project_name="test_project", + git_commit="abc1234" + ) + snapshot = ProjectSnapshot.model_construct( + metadata=metadata, + files={}, + # risk_summary uses avg_risk now + risk_summary=ProjectRiskSummary.model_construct(avg_risk=0.5, high_risk_files=1), + issues_summary=ProjectIssuesSummary.model_construct(total=0, by_severity={}) + ) + + # Save + db_snapshot = storage.save_snapshot("test_project", snapshot) + assert db_snapshot.id is not None + assert db_snapshot.project.name == "test_project" + + # Retrieve + retrieved = storage.get_latest_snapshot("test_project") + assert retrieved is not None + assert retrieved.id == db_snapshot.id + assert retrieved.commit_hash == "abc1234" + +def test_save_snapshot_with_issues(db_url): + storage = StorageEngine(db_url) + + metadata = SnapshotMetadata.model_construct(project_name="issue_project", git_commit="def5678") + + # Mock file with issues + # We need to mock the structure expected by store.py + # store.py expects snapshot.files to be a dict where values have 'issues' attribute + + class MockIssue: + def __init__(self, line, severity, message): + self.line = line + self.severity = severity + self.message = message + self.category = "test-rule" + + class MockFileSnapshot: + def __init__(self): + self.issues = [MockIssue(10, "high", "Fix me")] + + # Use model_construct to bypass validation for missing fields + snapshot_constructed = ProjectSnapshot.model_construct( + metadata=metadata, + files={"main.py": MockFileSnapshot()}, + risk_summary=ProjectRiskSummary.model_construct(avg_risk=0.1, high_risk_files=1), + issues_summary=ProjectIssuesSummary.model_construct(total=1, by_severity={'high': 1}) + ) + + db_snapshot = storage.save_snapshot("issue_project", snapshot_constructed) + + session = storage.get_session() + issues = session.query(Issue).filter_by(snapshot_id=db_snapshot.id).all() + assert len(issues) == 1 + assert issues[0].file_path == "main.py" + assert issues[0].description == "Fix me"