turtacn · turtacn · Nov 23, 2025 · Nov 23, 2025
diff --git a/alembic.ini b/alembic.ini
@@ -0,0 +1,147 @@
+# A generic, single database configuration.
+
+[alembic]
+# path to migration scripts.
+# this is typically a path given in POSIX (e.g. forward slashes)
+# format, relative to the token %(here)s which refers to the location of this
+# ini file
+script_location = %(here)s/alembic
+
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
+# for all available tokens
+# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.  for multiple paths, the path separator
+# is defined by "path_separator" below.
+prepend_sys_path = .
+
+
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the tzdata library which can be installed by adding
+# `alembic[tz]` to the pip requirements.
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+# timezone =
+
+# max length of characters to apply to the "slug" field
+# truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; This defaults
+# to <script_location>/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "path_separator"
+# below.
+# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
+
+# path_separator; This indicates what character is used to split lists of file
+# paths, including version_locations and prepend_sys_path within configparser
+# files such as alembic.ini.
+# The default rendered in new alembic.ini files is "os", which uses os.pathsep
+# to provide os-dependent path splitting.
+#
+# Note that in order to support legacy alembic.ini files, this default does NOT
+# take place if path_separator is not present in alembic.ini.  If this
+# option is omitted entirely, fallback logic is as follows:
+#
+# 1. Parsing of the version_locations option falls back to using the legacy
+#    "version_path_separator" key, which if absent then falls back to the legacy
+#    behavior of splitting on spaces and/or commas.
+# 2. Parsing of the prepend_sys_path option falls back to the legacy
+#    behavior of splitting on spaces, commas, or colons.
+#
+# Valid values for path_separator are:
+#
+# path_separator = :
+# path_separator = ;
+# path_separator = space
+# path_separator = newline
+#
+# Use os.pathsep. Default configuration used for new projects.
+path_separator = os
+
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+# database URL.  This is consumed by the user-maintained env.py script only.
+# other means of configuring database URLs may be customized within the env.py
+# file.
+sqlalchemy.url = sqlite:///codesage.db
+
+
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+
+# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module
+# hooks = ruff
+# ruff.type = module
+# ruff.module = ruff
+# ruff.options = check --fix REVISION_SCRIPT_FILENAME
+
+# Alternatively, use the exec runner to execute a binary found on your PATH
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = ruff
+# ruff.options = check --fix REVISION_SCRIPT_FILENAME
+
+# Logging configuration.  This is also consumed by the user-maintained
+# env.py script only.
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARNING
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARNING
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/alembic/README b/alembic/README
@@ -0,0 +1 @@
+Generic single-database configuration.
diff --git a/alembic/env.py b/alembic/env.py
@@ -0,0 +1,85 @@
+from logging.config import fileConfig
+
+from sqlalchemy import engine_from_config
+from sqlalchemy import pool
+
+from alembic import context
+import os
+import sys
+
+# Add project root to path
+sys.path.insert(0, os.getcwd())
+
+from codesage.history.models import Base
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+# from myapp import mymodel
+# target_metadata = mymodel.Base.metadata
+target_metadata = Base.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode.
+
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+
+    Calls to context.execute() here emit the given string to the
+    script output.
+
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode.
+
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection, target_metadata=target_metadata
+        )
+
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
diff --git a/alembic/script.py.mako b/alembic/script.py.mako
@@ -0,0 +1,28 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    ${downgrades if downgrades else "pass"}
diff --git a/codesage/cli/commands/scan.py b/codesage/cli/commands/scan.py
@@ -7,8 +7,11 @@
 from codesage.semantic_digest.python_snapshot_builder import PythonSemanticSnapshotBuilder, SnapshotConfig
 from codesage.semantic_digest.go_snapshot_builder import GoSemanticSnapshotBuilder
 from codesage.semantic_digest.shell_snapshot_builder import ShellSemanticSnapshotBuilder
-from codesage.snapshot.models import ProjectSnapshot
+from codesage.snapshot.models import ProjectSnapshot, Issue, IssueLocation
 from codesage.reporters import ConsoleReporter, JsonReporter, GitHubPRReporter
+from codesage.cli.plugin_loader import PluginManager
+from codesage.history.store import StorageEngine
+from codesage.core.interfaces import CodeIssue
 
 def get_builder(language: str, path: Path):
     config = SnapshotConfig()
@@ -28,11 +31,25 @@ def get_builder(language: str, path: Path):
 @click.option('--output', '-o', help='Output path for JSON reporter.')
 @click.option('--fail-on-high', is_flag=True, help='Exit with non-zero code if high severity issues are found.')
 @click.option('--ci-mode', is_flag=True, help='Enable CI mode (auto-detect GitHub environment).')
+@click.option('--plugins-dir', default='.codesage/plugins', help='Directory containing plugins.')
+@click.option('--db-url', default='sqlite:///codesage.db', help='Database URL for storage.')
 @click.pass_context
-def scan(ctx, path, language, reporter, output, fail_on_high, ci_mode):
+def scan(ctx, path, language, reporter, output, fail_on_high, ci_mode, plugins_dir, db_url):
     """
     Scan the codebase and report issues.
     """
+    # 1. Initialize Database
+    try:
+        storage = StorageEngine(db_url)
+        click.echo(f"Connected to storage: {db_url}")
+    except Exception as e:
+        click.echo(f"Warning: Could not connect to storage: {e}", err=True)
+        storage = None
+
+    # 2. Load Plugins
+    plugin_manager = PluginManager(plugins_dir)
+    plugin_manager.load_plugins()
+
     click.echo(f"Scanning {path} for {language}...")
 
     root_path = Path(path)
@@ -44,6 +61,57 @@ def scan(ctx, path, language, reporter, output, fail_on_high, ci_mode):
 
     try:
         snapshot: ProjectSnapshot = builder.build()
+
+        # 3. Apply Custom Rules (Plugins)
+        for rule in plugin_manager.rules:
+            for file_path, file_snapshot in snapshot.files.items():
+                try:
+                    content = ""
+                    full_path = root_path / file_path
+                    if full_path.exists():
+                        content = full_path.read_text(errors='ignore')
+
+                    issues = rule.check(str(file_path), content, {})
+                    if issues:
+                        for i in issues:
+                            # Convert plugin CodeIssue to standard Issue model
+
+                            # Map severity to Issue severity Literal
+                            severity = "warning"
+                            if i.severity.lower() in ["info", "warning", "error"]:
+                                severity = i.severity.lower()
+                            elif i.severity.lower() == "high":
+                                severity = "error"
+                            elif i.severity.lower() == "low":
+                                severity = "info"
+
+                            new_issue = Issue(
+                                rule_id=rule.id,
+                                severity=severity,
+                                message=i.description,
+                                location=IssueLocation(
+                                    file_path=str(file_path),
+                                    line=i.line_number
+                                ),
+                                symbol=None,
+                                tags=["custom-rule"]
+                            )
+
+                            if file_snapshot.issues is None:
+                                file_snapshot.issues = []
+                            file_snapshot.issues.append(new_issue)
+
+                except Exception as e:
+                     click.echo(f"Error running rule {rule.id} on {file_path}: {e}", err=True)
+
+        # 4. Save to Storage
+        if storage:
+            try:
+                storage.save_snapshot(snapshot.metadata.project_name, snapshot)
+                click.echo("Snapshot saved to database.")
+            except Exception as e:
+                 click.echo(f"Failed to save snapshot: {e}", err=True)
+
     except Exception as e:
         click.echo(f"Scan failed: {e}", err=True)
         ctx.exit(1)