From 9ad483000176ed3f6f970203257b95460759b834 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 22 Dec 2025 19:20:28 +0000 Subject: [PATCH 01/39] Add Autopopulate 2.0 specification document Design specification for issue #1243 proposing: - Per-table jobs tables with native primary keys - Extended status values (pending, reserved, success, error, ignore) - Priority and scheduling support - Referential integrity via foreign keys - Automatic refresh on populate --- docs/src/design/autopopulate-2.0-spec.md | 527 +++++++++++++++++++++++ 1 file changed, 527 insertions(+) create mode 100644 docs/src/design/autopopulate-2.0-spec.md diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md new file mode 100644 index 000000000..6444b607e --- /dev/null +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -0,0 +1,527 @@ +# Autopopulate 2.0 Specification + +## Overview + +This specification redesigns the DataJoint job handling system to provide better visibility, control, and scalability for distributed computing workflows. The new system replaces the schema-level `~jobs` table with per-table job tables that offer richer status tracking, proper referential integrity, and dashboard-friendly monitoring. + +## Problem Statement + +### Current Jobs Table Limitations + +The existing `~jobs` table has significant limitations: + +1. **Limited status tracking**: Only supports `reserved`, `error`, and `ignore` statuses +2. **Functions as an error log**: Cannot efficiently track pending or completed jobs +3. **Poor dashboard visibility**: No way to monitor pipeline progress without querying multiple tables +4. **Key hashing obscures data**: Primary keys are stored as hashes, making debugging difficult +5. **No referential integrity**: Jobs table is independent of computed tables; orphaned jobs can accumulate + +### Key Source Limitations + +1. **Frequent manual modifications**: Subset operations require modifying `key_source` property +2. **Local visibility only**: Custom key sources are not accessible database-wide +3. **Performance bottleneck**: Multiple workers querying `key_source` simultaneously creates contention +4. **Codebase dependency**: Requires full pipeline codebase to determine pending work + +## Proposed Solution + +### Core Design Principles + +1. **Per-table jobs**: Each computed table gets its own hidden jobs table +2. **Native primary keys**: Jobs table uses the same primary key structure as its parent table (no hashes) +3. **Referential integrity**: Jobs are foreign-key linked to parent tables with cascading deletes +4. **Rich status tracking**: Extended status values for full lifecycle visibility +5. **Automatic refresh**: `populate()` automatically refreshes the jobs queue + +## Architecture + +### Jobs Table Structure + +Each `dj.Imported` or `dj.Computed` table `MyTable` will have an associated hidden jobs table `~my_table__jobs` with the following structure: + +``` +# Job queue for MyTable +-> ParentTable1 +-> ParentTable2 +... # Same primary key structure as MyTable +--- +status : enum('pending', 'reserved', 'success', 'error', 'ignore') +priority : int # Higher priority = processed first (default: 0) +scheduled_time : datetime # Process on or after this time (default: now) +reserved_time : datetime # When job was reserved (null if not reserved) +completed_time : datetime # When job completed (null if not completed) +duration : float # Execution duration in seconds (null if not completed) +error_message : varchar(2047) # Truncated error message +error_stack : mediumblob # Full error traceback +user : varchar(255) # Database user who reserved/completed job +host : varchar(255) # Hostname of worker +pid : int unsigned # Process ID of worker +connection_id : bigint unsigned # MySQL connection ID +version : varchar(255) # Code version (git hash, package version, etc.) +``` + +### Access Pattern + +Jobs are accessed as a property of the computed table: + +```python +# Current pattern (schema-level) +schema.jobs + +# New pattern (per-table) +MyTable.jobs + +# Examples +FilteredImage.jobs # Access jobs table +FilteredImage.jobs & 'status="error"' # Query errors +FilteredImage.jobs.refresh() # Refresh job queue +``` + +### Status Values + +| Status | Description | +|--------|-------------| +| `pending` | Job is queued and ready to be processed | +| `reserved` | Job is currently being processed by a worker | +| `success` | Job completed successfully | +| `error` | Job failed with an error | +| `ignore` | Job should be skipped (manually set) | + +### Status Transitions + +``` + ┌─────────────────────────────────────┐ + │ │ + ▼ │ +┌─────────┐ ┌──────────┐ ┌───────────┐ ┌────────┴──┐ +│ (none) │───▶│ pending │───▶│ reserved │───▶│ success │ +└─────────┘ └──────────┘ └───────────┘ └───────────┘ + │ │ │ + │ │ │ + │ ▼ ▼ + │ ┌──────────┐ ┌───────────┐ + └────────▶│ ignore │ │ error │───┐ + └──────────┘ └───────────┘ │ + ▲ │ │ + │ ▼ │ + │ ┌──────────┐ │ + └──────────│ pending │◀───┘ + └──────────┘ + (after reset) +``` + +## API Design + +### JobsTable Class + +```python +class JobsTable(Table): + """Hidden table managing job queue for a computed table.""" + + @property + def definition(self) -> str: + """Dynamically generated based on parent table's primary key.""" + ... + + def refresh(self, *restrictions) -> int: + """ + Refresh the jobs queue by scanning for missing entries. + + Computes: (key_source & restrictions) - target - jobs + Inserts new entries with status='pending'. + + Returns: + Number of new jobs added to queue. + """ + ... + + def reserve(self, key: dict) -> bool: + """ + Attempt to reserve a job for processing. + + Uses SELECT FOR UPDATE to prevent race conditions. + Only reserves jobs with status='pending' and scheduled_time <= now. + + Returns: + True if reservation successful, False if already taken. + """ + ... + + def complete(self, key: dict, duration: float = None) -> None: + """ + Mark a job as successfully completed. + + Updates status to 'success', records duration and completion time. + """ + ... + + def error(self, key: dict, error_message: str, error_stack: str = None) -> None: + """ + Mark a job as failed with error details. + + Updates status to 'error', records error message and stack trace. + """ + ... + + def ignore(self, key: dict) -> None: + """ + Mark a job to be ignored (skipped during populate). + """ + ... + + def reset(self, *restrictions, include_errors: bool = True) -> int: + """ + Reset jobs to pending status. + + Args: + restrictions: Conditions to filter which jobs to reset + include_errors: If True, also reset error jobs (default: True) + + Returns: + Number of jobs reset. + """ + ... + + def clear_completed(self, *restrictions, before: datetime = None) -> int: + """ + Remove completed jobs from the queue. + + Args: + restrictions: Conditions to filter which jobs to clear + before: Only clear jobs completed before this time + + Returns: + Number of jobs cleared. + """ + ... + + @property + def pending(self) -> QueryExpression: + """Return query for pending jobs.""" + return self & 'status="pending"' + + @property + def reserved(self) -> QueryExpression: + """Return query for reserved jobs.""" + return self & 'status="reserved"' + + @property + def errors(self) -> QueryExpression: + """Return query for error jobs.""" + return self & 'status="error"' + + @property + def completed(self) -> QueryExpression: + """Return query for completed jobs.""" + return self & 'status="success"' +``` + +### AutoPopulate Integration + +The `populate()` method is updated to use the new jobs table: + +```python +def populate( + self, + *restrictions, + suppress_errors: bool = False, + return_exception_objects: bool = False, + reserve_jobs: bool = False, + order: str = "original", + limit: int = None, + max_calls: int = None, + display_progress: bool = False, + processes: int = 1, + make_kwargs: dict = None, + # New parameters + priority: int = None, # Only process jobs with this priority or higher + refresh: bool = True, # Refresh jobs queue before populating +) -> dict: + """ + Populate the table by calling make() for each missing entry. + + New behavior with reserve_jobs=True: + 1. If refresh=True, calls self.jobs.refresh(*restrictions) + 2. Fetches jobs from self.jobs where status='pending' and scheduled_time <= now + 3. Reserves and processes jobs using the jobs table + 4. Records success/error status in jobs table + """ + ... +``` + +### Progress and Monitoring + +```python +# Current progress reporting +remaining, total = MyTable.progress() + +# Enhanced progress with jobs table +MyTable.jobs.progress() # Returns detailed status breakdown + +# Example output: +# { +# 'pending': 150, +# 'reserved': 3, +# 'success': 847, +# 'error': 12, +# 'ignore': 5, +# 'total': 1017 +# } +``` + +### Priority and Scheduling + +```python +# Set priority for specific jobs (higher = processed first) +MyTable.jobs.set_priority(restriction, priority=10) + +# Schedule jobs for future processing +from datetime import datetime, timedelta +future_time = datetime.now() + timedelta(hours=2) +MyTable.jobs.schedule(restriction, scheduled_time=future_time) + +# Insert with priority during refresh +MyTable.jobs.refresh(priority=5) # All new jobs get priority=5 +``` + +## Implementation Details + +### Table Naming Convention + +Jobs tables follow the existing hidden table naming pattern: +- Table `FilteredImage` (stored as `__filtered_image`) +- Jobs table: `~filtered_image__jobs` (stored as `_filtered_image__jobs`) + +### Referential Integrity + +The jobs table references the same parent tables as the computed table: + +```python +# If FilteredImage has definition: +@schema +class FilteredImage(dj.Computed): + definition = """ + -> Image + --- + filtered_image : + """ + +# The jobs table will have: +# -> Image (same foreign key reference) +# This ensures cascading deletes work correctly +``` + +### Cascading Behavior + +When a parent record is deleted: +1. The corresponding computed table record is deleted (existing behavior) +2. The corresponding jobs table record is also deleted (new behavior) + +This prevents orphaned job records. + +### Migration from Current System + +The schema-level `~jobs` table will be: +1. **Maintained** for backward compatibility during transition +2. **Deprecated** with warnings when `reserve_jobs=True` is used +3. **Migration utility** provided to convert existing jobs to new format + +```python +# Migration utility +schema.migrate_jobs() # Migrates ~jobs entries to per-table jobs tables +``` + +### Race Condition Handling + +Job reservation uses database-level locking to prevent race conditions: + +```sql +-- Reserve a job atomically +START TRANSACTION; +SELECT * FROM `_my_table__jobs` +WHERE status = 'pending' + AND scheduled_time <= NOW() +ORDER BY priority DESC, scheduled_time ASC +LIMIT 1 +FOR UPDATE SKIP LOCKED; + +-- If row found, update it +UPDATE `_my_table__jobs` +SET status = 'reserved', + reserved_time = NOW(), + user = CURRENT_USER(), + host = @@hostname, + pid = CONNECTION_ID() +WHERE ; + +COMMIT; +``` + +### Stale Job Detection + +Reserved jobs that have been running too long may indicate crashed workers: + +```python +# Find potentially stale jobs (reserved > 1 hour ago) +stale = MyTable.jobs & 'status="reserved"' & 'reserved_time < NOW() - INTERVAL 1 HOUR' + +# Reset stale jobs to pending +MyTable.jobs.reset(stale) +``` + +## Configuration Options + +New configuration settings for job management: + +```python +# In datajoint config +dj.config['jobs.auto_refresh'] = True # Auto-refresh on populate (default: True) +dj.config['jobs.keep_completed'] = False # Keep success records (default: False) +dj.config['jobs.stale_timeout'] = 3600 # Seconds before reserved job is stale (default: 3600) +dj.config['jobs.default_priority'] = 0 # Default priority for new jobs (default: 0) +``` + +## Usage Examples + +### Basic Distributed Computing + +```python +# Worker 1 +FilteredImage.populate(reserve_jobs=True) + +# Worker 2 (can run simultaneously) +FilteredImage.populate(reserve_jobs=True) + +# Monitor progress +print(FilteredImage.jobs.progress()) +``` + +### Priority-Based Processing + +```python +# Mark urgent jobs as high priority +urgent_subjects = Subject & 'priority="urgent"' +FilteredImage.jobs.set_priority(urgent_subjects, priority=100) + +# Workers will process high-priority jobs first +FilteredImage.populate(reserve_jobs=True) +``` + +### Scheduled Processing + +```python +# Schedule jobs for overnight processing +from datetime import datetime, timedelta + +tonight = datetime.now().replace(hour=22, minute=0, second=0) +FilteredImage.jobs.schedule('subject_id > 100', scheduled_time=tonight) + +# Only jobs scheduled for now or earlier will be processed +FilteredImage.populate(reserve_jobs=True) +``` + +### Error Recovery + +```python +# View errors +errors = FilteredImage.jobs.errors.fetch(as_dict=True) +for err in errors: + print(f"Key: {err['subject_id']}, Error: {err['error_message']}") + +# Reset specific errors after fixing the issue +FilteredImage.jobs.reset('subject_id=42') + +# Reset all errors +FilteredImage.jobs.reset(include_errors=True) +``` + +### Dashboard Queries + +```python +# Get pipeline-wide status +def pipeline_status(schema): + status = {} + for table in schema.list_tables(): + tbl = getattr(schema, table) + if hasattr(tbl, 'jobs'): + status[table] = tbl.jobs.progress() + return status + +# Example output: +# { +# 'FilteredImage': {'pending': 150, 'reserved': 3, 'success': 847, 'error': 12}, +# 'Analysis': {'pending': 500, 'reserved': 0, 'success': 0, 'error': 0}, +# } +``` + +## Backward Compatibility + +### Deprecation Path + +1. **Phase 1 (Current Release)**: + - New jobs tables created alongside existing `~jobs` + - `reserve_jobs=True` uses new system by default + - `reserve_jobs='legacy'` uses old system + - Deprecation warning when using legacy system + +2. **Phase 2 (Next Release)**: + - Legacy `~jobs` table no longer updated + - `reserve_jobs='legacy'` removed + - Migration utility provided + +3. **Phase 3 (Future Release)**: + - Legacy `~jobs` table dropped on schema upgrade + +### API Compatibility + +The `schema.jobs` property will continue to work but return a unified view: + +```python +# Returns all jobs across all tables in the schema +schema.jobs # Deprecated, shows warning + +# Equivalent to: +# SELECT * FROM _table1__jobs UNION SELECT * FROM _table2__jobs ... +``` + +## Future Extensions + +- [ ] Web-based dashboard for job monitoring +- [ ] Webhook notifications for job completion/failure +- [ ] Job dependencies (job B waits for job A) +- [ ] Resource tagging (GPU required, high memory, etc.) +- [ ] Retry policies (max retries, exponential backoff) +- [ ] Job grouping/batching for efficiency +- [ ] Integration with external schedulers (Slurm, PBS, etc.) + +## Rationale + +### Why Not External Orchestration? + +The team considered integrating external tools like Airflow or Flyte but rejected this approach because: + +1. **Deployment complexity**: External orchestrators require significant infrastructure +2. **Maintenance burden**: Additional systems to maintain and monitor +3. **Accessibility**: Not all DataJoint users have access to orchestration platforms +4. **Tight integration**: DataJoint's transaction model requires close coordination + +The built-in jobs system provides 80% of the value with minimal additional complexity. + +### Why Per-Table Jobs? + +Per-table jobs tables provide: + +1. **Better isolation**: Jobs for one table don't affect others +2. **Simpler queries**: No need to filter by table_name +3. **Native keys**: Primary keys are readable, not hashed +4. **Referential integrity**: Automatic cleanup via foreign keys +5. **Scalability**: Each table's jobs can be indexed independently + +### Why Remove Key Hashing? + +The current system hashes primary keys to support arbitrary key types. The new system uses native keys because: + +1. **Readability**: Debugging is much easier with readable keys +2. **Query efficiency**: Native keys can use table indexes +3. **Foreign keys**: Hash-based keys cannot participate in foreign key relationships +4. **Simplicity**: No need for hash computation and comparison From df94fcc3257690b9d87a0e853bbeba6c6d157b0d Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 22 Dec 2025 20:44:01 +0000 Subject: [PATCH 02/39] Add foreign-key-only primary key constraint to spec Auto-populated tables must have primary keys composed entirely of foreign key references. This ensures 1:1 job correspondence and enables proper referential integrity for the jobs table. --- docs/src/design/autopopulate-2.0-spec.md | 94 ++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 5 deletions(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index 6444b607e..477c1438f 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -27,11 +27,54 @@ The existing `~jobs` table has significant limitations: ### Core Design Principles -1. **Per-table jobs**: Each computed table gets its own hidden jobs table -2. **Native primary keys**: Jobs table uses the same primary key structure as its parent table (no hashes) -3. **Referential integrity**: Jobs are foreign-key linked to parent tables with cascading deletes -4. **Rich status tracking**: Extended status values for full lifecycle visibility -5. **Automatic refresh**: `populate()` automatically refreshes the jobs queue +1. **Foreign-key-only primary keys**: Auto-populated tables cannot introduce new primary key attributes; their primary key must comprise only foreign key references +2. **Per-table jobs**: Each computed table gets its own hidden jobs table +3. **Native primary keys**: Jobs table uses the same primary key structure as its parent table (no hashes) +4. **Referential integrity**: Jobs are foreign-key linked to parent tables with cascading deletes +5. **Rich status tracking**: Extended status values for full lifecycle visibility +6. **Automatic refresh**: `populate()` automatically refreshes the jobs queue + +### Primary Key Constraint + +**Auto-populated tables (`dj.Imported` and `dj.Computed`) must have primary keys composed entirely of foreign key references.** + +This constraint ensures: +- **1:1 key_source mapping**: Each entry in `key_source` corresponds to exactly one potential job +- **Deterministic job identity**: A job's identity is fully determined by its parent records +- **Simplified jobs table**: The jobs table can directly reference the same parents as the computed table + +```python +# VALID: Primary key is entirely foreign keys +@schema +class FilteredImage(dj.Computed): + definition = """ + -> Image + --- + filtered_image : + """ + +# VALID: Multiple foreign keys in primary key +@schema +class Comparison(dj.Computed): + definition = """ + -> Image.proj(image_a='image_id') + -> Image.proj(image_b='image_id') + --- + similarity : float + """ + +# INVALID: Additional primary key attribute not allowed +@schema +class Analysis(dj.Computed): + definition = """ + -> Recording + analysis_method : varchar(32) # NOT ALLOWED - adds to primary key + --- + result : float + """ +``` + +**Migration note**: Existing tables that violate this constraint will continue to work but cannot use the new jobs system. A deprecation warning will be issued. ## Architecture @@ -525,3 +568,44 @@ The current system hashes primary keys to support arbitrary key types. The new s 2. **Query efficiency**: Native keys can use table indexes 3. **Foreign keys**: Hash-based keys cannot participate in foreign key relationships 4. **Simplicity**: No need for hash computation and comparison + +### Why Require Foreign-Key-Only Primary Keys? + +Restricting auto-populated tables to foreign-key-only primary keys provides: + +1. **1:1 job correspondence**: Each `key_source` entry maps to exactly one job, eliminating ambiguity about what constitutes a "job" +2. **Proper referential integrity**: The jobs table can reference the same parent tables, enabling cascading deletes +3. **Eliminates key_source complexity**: No need for custom `key_source` definitions to enumerate non-foreign-key combinations +4. **Clearer data model**: The computation graph is fully determined by table dependencies +5. **Simpler populate logic**: No need to handle partial key matching or key enumeration + +**What if I need multiple outputs per parent?** + +Use a part table pattern instead: + +```python +# Instead of adding analysis_method to primary key: +@schema +class Analysis(dj.Computed): + definition = """ + -> Recording + --- + timestamp : datetime + """ + + class Method(dj.Part): + definition = """ + -> master + analysis_method : varchar(32) + --- + result : float + """ + + def make(self, key): + self.insert1(key) + for method in ['pca', 'ica', 'nmf']: + result = run_analysis(key, method) + self.Method.insert1({**key, 'analysis_method': method, 'result': result}) +``` + +This pattern maintains the 1:1 job mapping while supporting multiple outputs per computation. From 91105158b18600dd7fe4ed735d8a3489722a13b7 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 22 Dec 2025 20:56:18 +0000 Subject: [PATCH 03/39] Remove FK constraints from jobs tables for performance - Jobs tables have matching primary key structure but no FK constraints - Stale jobs (from deleted upstream records) handled by refresh() - Added created_time field for stale detection - refresh() now returns {added, removed} counts - Updated rationale sections to reflect performance-focused design --- docs/src/design/autopopulate-2.0-spec.md | 71 ++++++++++++++++-------- 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index 477c1438f..7a92263aa 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -30,9 +30,9 @@ The existing `~jobs` table has significant limitations: 1. **Foreign-key-only primary keys**: Auto-populated tables cannot introduce new primary key attributes; their primary key must comprise only foreign key references 2. **Per-table jobs**: Each computed table gets its own hidden jobs table 3. **Native primary keys**: Jobs table uses the same primary key structure as its parent table (no hashes) -4. **Referential integrity**: Jobs are foreign-key linked to parent tables with cascading deletes +4. **No FK constraints on jobs**: Jobs tables omit foreign key constraints for performance; stale jobs are cleaned by `refresh()` 5. **Rich status tracking**: Extended status values for full lifecycle visibility -6. **Automatic refresh**: `populate()` automatically refreshes the jobs queue +6. **Automatic refresh**: `populate()` automatically refreshes the jobs queue (adding new jobs, removing stale ones) ### Primary Key Constraint @@ -84,12 +84,13 @@ Each `dj.Imported` or `dj.Computed` table `MyTable` will have an associated hidd ``` # Job queue for MyTable --> ParentTable1 --> ParentTable2 -... # Same primary key structure as MyTable +subject_id : int +session_id : int +... # Same primary key attributes as MyTable (NO foreign key constraints) --- status : enum('pending', 'reserved', 'success', 'error', 'ignore') priority : int # Higher priority = processed first (default: 0) +created_time : datetime # When job was added to queue scheduled_time : datetime # Process on or after this time (default: now) reserved_time : datetime # When job was reserved (null if not reserved) completed_time : datetime # When job completed (null if not completed) @@ -103,6 +104,11 @@ connection_id : bigint unsigned # MySQL connection ID version : varchar(255) # Code version (git hash, package version, etc.) ``` +**Important**: The jobs table has the same primary key *structure* as the target table but **no foreign key constraints**. This is intentional for performance: +- Foreign key constraints add overhead on every insert/update/delete +- Jobs tables are high-traffic (frequent reservations and completions) +- Stale jobs (referencing deleted upstream records) are handled by `refresh()` instead + ### Access Pattern Jobs are accessed as a property of the computed table: @@ -166,15 +172,23 @@ class JobsTable(Table): """Dynamically generated based on parent table's primary key.""" ... - def refresh(self, *restrictions) -> int: + def refresh(self, *restrictions, stale_timeout: float = None) -> dict: """ - Refresh the jobs queue by scanning for missing entries. + Refresh the jobs queue: add new jobs and remove stale ones. + + Operations performed: + 1. Add new jobs: (key_source & restrictions) - target - jobs → insert as 'pending' + 2. Remove stale jobs: pending jobs older than stale_timeout whose keys + are no longer in key_source (upstream records were deleted) - Computes: (key_source & restrictions) - target - jobs - Inserts new entries with status='pending'. + Args: + restrictions: Conditions to filter key_source + stale_timeout: Seconds after which pending jobs are checked for staleness. + Jobs older than this are removed if their key is no longer + in key_source. Default from config: jobs.stale_timeout (3600s) Returns: - Number of new jobs added to queue. + {'added': int, 'removed': int} - counts of jobs added and stale jobs removed """ ... @@ -335,9 +349,9 @@ Jobs tables follow the existing hidden table naming pattern: - Table `FilteredImage` (stored as `__filtered_image`) - Jobs table: `~filtered_image__jobs` (stored as `_filtered_image__jobs`) -### Referential Integrity +### Primary Key Matching (No Foreign Keys) -The jobs table references the same parent tables as the computed table: +The jobs table has the same primary key *attributes* as the target table, but **without foreign key constraints**: ```python # If FilteredImage has definition: @@ -349,18 +363,31 @@ class FilteredImage(dj.Computed): filtered_image : """ -# The jobs table will have: -# -> Image (same foreign key reference) -# This ensures cascading deletes work correctly +# The jobs table will have the same primary key (image_id), +# but NO foreign key constraint to Image. +# This is for performance - FK constraints add overhead. ``` -### Cascading Behavior +### Stale Job Handling -When a parent record is deleted: -1. The corresponding computed table record is deleted (existing behavior) -2. The corresponding jobs table record is also deleted (new behavior) +When upstream records are deleted, their corresponding jobs become "stale" (orphaned). Since there are no FK constraints, these jobs remain in the table until cleaned up: + +```python +# refresh() handles stale jobs automatically +result = FilteredImage.jobs.refresh() +# Returns: {'added': 10, 'removed': 3} # 3 stale jobs cleaned up + +# Stale detection logic: +# 1. Find pending jobs where created_time < (now - stale_timeout) +# 2. Check if their keys still exist in key_source +# 3. Remove jobs whose keys no longer exist +``` -This prevents orphaned job records. +**Why not use foreign key cascading deletes?** +- FK constraints add overhead on every insert/update/delete operation +- Jobs tables are high-traffic (frequent reservations and status updates) +- Stale jobs are harmless until refresh—they simply won't match key_source +- The `refresh()` approach is more efficient for batch cleanup ### Migration from Current System @@ -557,7 +584,7 @@ Per-table jobs tables provide: 1. **Better isolation**: Jobs for one table don't affect others 2. **Simpler queries**: No need to filter by table_name 3. **Native keys**: Primary keys are readable, not hashed -4. **Referential integrity**: Automatic cleanup via foreign keys +4. **High performance**: No FK constraints means minimal overhead on job operations 5. **Scalability**: Each table's jobs can be indexed independently ### Why Remove Key Hashing? @@ -574,7 +601,7 @@ The current system hashes primary keys to support arbitrary key types. The new s Restricting auto-populated tables to foreign-key-only primary keys provides: 1. **1:1 job correspondence**: Each `key_source` entry maps to exactly one job, eliminating ambiguity about what constitutes a "job" -2. **Proper referential integrity**: The jobs table can reference the same parent tables, enabling cascading deletes +2. **Matching key structure**: The jobs table primary key exactly matches the target table, enabling efficient stale detection via `key_source` comparison 3. **Eliminates key_source complexity**: No need for custom `key_source` definitions to enumerate non-foreign-key combinations 4. **Clearer data model**: The computation graph is fully determined by table dependencies 5. **Simpler populate logic**: No need to handle partial key matching or key enumeration From 46377084df52da57f7ba5b3c0eee6e8d3cd40029 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 22 Dec 2025 21:08:00 +0000 Subject: [PATCH 04/39] Add table drop/alter behavior and schema.jobs list API - Jobs table automatically dropped when target table is dropped/altered - schema.jobs returns list of JobsTable objects for all auto-populated tables - Updated dashboard examples to use schema.jobs iteration --- docs/src/design/autopopulate-2.0-spec.md | 65 +++++++++++++++++++----- 1 file changed, 53 insertions(+), 12 deletions(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index 7a92263aa..b1faf2661 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -389,6 +389,26 @@ result = FilteredImage.jobs.refresh() - Stale jobs are harmless until refresh—they simply won't match key_source - The `refresh()` approach is more efficient for batch cleanup +### Table Drop and Alter Behavior + +When an auto-populated table is **dropped**, its associated jobs table is automatically dropped: + +```python +# Dropping FilteredImage also drops ~filtered_image__jobs +FilteredImage.drop() +``` + +When an auto-populated table is **altered** (e.g., primary key changes), the jobs table is dropped and can be recreated via `refresh()`: + +```python +# Alter that changes primary key structure +# Jobs table is dropped since its structure no longer matches +FilteredImage.alter() + +# Recreate jobs table with new structure +FilteredImage.jobs.refresh() +``` + ### Migration from Current System The schema-level `~jobs` table will be: @@ -508,20 +528,30 @@ FilteredImage.jobs.reset(include_errors=True) ### Dashboard Queries ```python -# Get pipeline-wide status +# Get pipeline-wide status using schema.jobs def pipeline_status(schema): - status = {} - for table in schema.list_tables(): - tbl = getattr(schema, table) - if hasattr(tbl, 'jobs'): - status[table] = tbl.jobs.progress() - return status + return { + jt.target.table_name: jt.progress() + for jt in schema.jobs + } # Example output: # { # 'FilteredImage': {'pending': 150, 'reserved': 3, 'success': 847, 'error': 12}, # 'Analysis': {'pending': 500, 'reserved': 0, 'success': 0, 'error': 0}, # } + +# Refresh all jobs tables in the schema +for jobs_table in schema.jobs: + jobs_table.refresh() + +# Get all errors across the pipeline +all_errors = [] +for jt in schema.jobs: + errors = jt.errors.fetch(as_dict=True) + for err in errors: + err['_table'] = jt.target.table_name + all_errors.append(err) ``` ## Backward Compatibility @@ -544,16 +574,27 @@ def pipeline_status(schema): ### API Compatibility -The `schema.jobs` property will continue to work but return a unified view: +The `schema.jobs` property returns a list of all jobs table objects for auto-populated tables in the schema: ```python -# Returns all jobs across all tables in the schema -schema.jobs # Deprecated, shows warning +# Returns list of JobsTable objects +schema.jobs +# [FilteredImage.jobs, Analysis.jobs, ...] -# Equivalent to: -# SELECT * FROM _table1__jobs UNION SELECT * FROM _table2__jobs ... +# Iterate over all jobs tables +for jobs_table in schema.jobs: + print(f"{jobs_table.target.table_name}: {jobs_table.progress()}") + +# Query all errors across the schema +all_errors = [job for jt in schema.jobs for job in jt.errors.fetch(as_dict=True)] + +# Refresh all jobs tables +for jobs_table in schema.jobs: + jobs_table.refresh() ``` +This replaces the legacy single `~jobs` table with direct access to per-table jobs. + ## Future Extensions - [ ] Web-based dashboard for job monitoring From 68d876d15cc929c6fb19ac6e95aeb5018fb7efe0 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 22 Dec 2025 21:15:13 +0000 Subject: [PATCH 05/39] Clarify ignore status is manual, not automatic transition - Updated state transition diagram to show only automatic transitions - Added note that ignore is manually set and skipped by populate/refresh - reset() can move ignore jobs back to pending --- docs/src/design/autopopulate-2.0-spec.md | 34 ++++++++++++------------ 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index b1faf2661..16370d087 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -134,31 +134,31 @@ FilteredImage.jobs.refresh() # Refresh job queue | `reserved` | Job is currently being processed by a worker | | `success` | Job completed successfully | | `error` | Job failed with an error | -| `ignore` | Job should be skipped (manually set) | +| `ignore` | Job should be skipped (manually set, not part of automatic transitions) | ### Status Transitions +Automatic transitions during `populate()`: + ``` - ┌─────────────────────────────────────┐ - │ │ - ▼ │ -┌─────────┐ ┌──────────┐ ┌───────────┐ ┌────────┴──┐ +┌─────────┐ ┌──────────┐ ┌───────────┐ ┌───────────┐ │ (none) │───▶│ pending │───▶│ reserved │───▶│ success │ └─────────┘ └──────────┘ └───────────┘ └───────────┘ - │ │ │ - │ │ │ - │ ▼ ▼ - │ ┌──────────┐ ┌───────────┐ - └────────▶│ ignore │ │ error │───┐ - └──────────┘ └───────────┘ │ - ▲ │ │ - │ ▼ │ - │ ┌──────────┐ │ - └──────────│ pending │◀───┘ - └──────────┘ - (after reset) + refresh() reserve() complete() + │ + │ error() + ▼ + ┌───────────┐ ┌──────────┐ + │ error │───▶│ pending │ + └───────────┘ └──────────┘ + reset() ``` +**Manual status control:** +- `ignore` is set manually via `jobs.ignore(key)` and is not part of automatic transitions +- Jobs with `status='ignore'` are skipped by `populate()` and `refresh()` +- Use `jobs.reset()` to move `ignore` jobs back to `pending` + ## API Design ### JobsTable Class From f0b7cd892a917a79c4cf4fed567080866246a094 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 22 Dec 2025 21:38:25 +0000 Subject: [PATCH 06/39] Simplify job reset mechanism and migration path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Major changes: - Remove reset() method; use delete() + refresh() instead - Jobs go from any state → (none) via delete, then → pending via refresh() - Shorten deprecation roadmap: clean break, no legacy support - Jobs tables created lazily on first populate(reserve_jobs=True) - Legacy tables with extra PK attributes: jobs table uses only FK-derived keys --- docs/src/design/autopopulate-2.0-spec.md | 84 +++++++++++++----------- 1 file changed, 47 insertions(+), 37 deletions(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index 16370d087..d6623bfaf 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -74,7 +74,7 @@ class Analysis(dj.Computed): """ ``` -**Migration note**: Existing tables that violate this constraint will continue to work but cannot use the new jobs system. A deprecation warning will be issued. +**Legacy table support**: Existing tables that introduce additional primary key attributes (beyond foreign keys) can still use the jobs system, but their jobs table will only include the foreign-key-derived primary key attributes. This means multiple target rows may map to a single job entry. A deprecation warning will be issued for such tables. ## Architecture @@ -148,16 +148,24 @@ Automatic transitions during `populate()`: │ │ error() ▼ + ┌───────────┐ + │ error │ + └───────────┘ + │ + │ delete + ▼ ┌───────────┐ ┌──────────┐ - │ error │───▶│ pending │ + │ (none) │───▶│ pending │ └───────────┘ └──────────┘ - reset() + refresh() ``` +**Resetting jobs:** To reset a job (error or otherwise), simply delete it from the jobs table. The next `refresh()` will re-add it as `pending` if the key is still in `key_source`. + **Manual status control:** - `ignore` is set manually via `jobs.ignore(key)` and is not part of automatic transitions - Jobs with `status='ignore'` are skipped by `populate()` and `refresh()` -- Use `jobs.reset()` to move `ignore` jobs back to `pending` +- To reset an ignored job, delete it and call `refresh()` ## API Design @@ -223,19 +231,22 @@ class JobsTable(Table): def ignore(self, key: dict) -> None: """ Mark a job to be ignored (skipped during populate). + + To reset an ignored job, delete it and call refresh(). """ ... - def reset(self, *restrictions, include_errors: bool = True) -> int: + def delete(self, *restrictions) -> int: """ - Reset jobs to pending status. + Delete jobs matching restrictions. - Args: - restrictions: Conditions to filter which jobs to reset - include_errors: If True, also reset error jobs (default: True) + Deleted jobs return to (none) state. Call refresh() to re-add + them as pending if their keys are still in key_source. + + This is the standard way to "reset" error or ignored jobs. Returns: - Number of jobs reset. + Number of jobs deleted. """ ... @@ -409,18 +420,21 @@ FilteredImage.alter() FilteredImage.jobs.refresh() ``` -### Migration from Current System +### Lazy Table Creation -The schema-level `~jobs` table will be: -1. **Maintained** for backward compatibility during transition -2. **Deprecated** with warnings when `reserve_jobs=True` is used -3. **Migration utility** provided to convert existing jobs to new format +Jobs tables are created automatically on first use: ```python -# Migration utility -schema.migrate_jobs() # Migrates ~jobs entries to per-table jobs tables +# First call to populate with reserve_jobs=True creates the jobs table +FilteredImage.populate(reserve_jobs=True) +# Creates ~filtered_image__jobs if it doesn't exist, then populates + +# Alternatively, explicitly create/refresh the jobs table +FilteredImage.jobs.refresh() ``` +The jobs table is created with the appropriate primary key structure matching the target table's foreign-key-derived attributes. + ### Race Condition Handling Job reservation uses database-level locking to prevent race conditions: @@ -447,7 +461,7 @@ WHERE ; COMMIT; ``` -### Stale Job Detection +### Stale Reserved Job Detection Reserved jobs that have been running too long may indicate crashed workers: @@ -455,8 +469,9 @@ Reserved jobs that have been running too long may indicate crashed workers: # Find potentially stale jobs (reserved > 1 hour ago) stale = MyTable.jobs & 'status="reserved"' & 'reserved_time < NOW() - INTERVAL 1 HOUR' -# Reset stale jobs to pending -MyTable.jobs.reset(stale) +# Delete stale jobs and re-add as pending +stale.delete() +MyTable.jobs.refresh() ``` ## Configuration Options @@ -518,11 +533,14 @@ errors = FilteredImage.jobs.errors.fetch(as_dict=True) for err in errors: print(f"Key: {err['subject_id']}, Error: {err['error_message']}") -# Reset specific errors after fixing the issue -FilteredImage.jobs.reset('subject_id=42') +# Delete specific error jobs after fixing the issue +(FilteredImage.jobs & 'subject_id=42').delete() -# Reset all errors -FilteredImage.jobs.reset(include_errors=True) +# Delete all error jobs +FilteredImage.jobs.errors.delete() + +# Re-add deleted jobs as pending (if keys still in key_source) +FilteredImage.jobs.refresh() ``` ### Dashboard Queries @@ -556,21 +574,13 @@ for jt in schema.jobs: ## Backward Compatibility -### Deprecation Path - -1. **Phase 1 (Current Release)**: - - New jobs tables created alongside existing `~jobs` - - `reserve_jobs=True` uses new system by default - - `reserve_jobs='legacy'` uses old system - - Deprecation warning when using legacy system +### Migration -2. **Phase 2 (Next Release)**: - - Legacy `~jobs` table no longer updated - - `reserve_jobs='legacy'` removed - - Migration utility provided +This is a major release. The legacy schema-level `~jobs` table is replaced by per-table jobs tables: -3. **Phase 3 (Future Release)**: - - Legacy `~jobs` table dropped on schema upgrade +- **Legacy `~jobs` table**: No longer used; can be dropped manually if present +- **New jobs tables**: Created automatically on first `populate(reserve_jobs=True)` call +- **No parallel support**: Teams should migrate cleanly to the new system ### API Compatibility From 6b986ae99e7d880d21b3feb1cc485e00f23e53db Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 22 Dec 2025 21:51:20 +0000 Subject: [PATCH 07/39] Simplify job reservation: no locking, rely on make() transaction - Remove SELECT FOR UPDATE locking from job reservation - Conflicts (rare) resolved by make() transaction's duplicate key error - Second worker catches error and moves to next job - Simpler code, better performance on high-traffic jobs table --- docs/src/design/autopopulate-2.0-spec.md | 44 ++++++++++++++---------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index d6623bfaf..7771d96e0 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -204,11 +204,11 @@ class JobsTable(Table): """ Attempt to reserve a job for processing. - Uses SELECT FOR UPDATE to prevent race conditions. - Only reserves jobs with status='pending' and scheduled_time <= now. + Updates status to 'reserved' if currently 'pending' and scheduled_time <= now. + No locking is used; rare conflicts are resolved by the make() transaction. Returns: - True if reservation successful, False if already taken. + True if reservation successful, False if job not found or not pending. """ ... @@ -435,32 +435,38 @@ FilteredImage.jobs.refresh() The jobs table is created with the appropriate primary key structure matching the target table's foreign-key-derived attributes. -### Race Condition Handling +### Conflict Resolution -Job reservation uses database-level locking to prevent race conditions: +Job reservation does **not** use transaction-level locking for simplicity and performance. Instead, conflicts are resolved at the `make()` transaction level: -```sql --- Reserve a job atomically -START TRANSACTION; -SELECT * FROM `_my_table__jobs` -WHERE status = 'pending' - AND scheduled_time <= NOW() -ORDER BY priority DESC, scheduled_time ASC -LIMIT 1 -FOR UPDATE SKIP LOCKED; - --- If row found, update it +```python +# Simple reservation (no locking) UPDATE `_my_table__jobs` SET status = 'reserved', reserved_time = NOW(), user = CURRENT_USER(), host = @@hostname, pid = CONNECTION_ID() -WHERE ; - -COMMIT; +WHERE status = 'pending' + AND scheduled_time <= NOW() +ORDER BY priority DESC, scheduled_time ASC +LIMIT 1; ``` +**Conflict scenario** (rare): +1. Two workers reserve the same job nearly simultaneously +2. Both run `make()` for the same key +3. First worker's `make()` transaction commits, inserting the result +4. Second worker's `make()` transaction fails with duplicate key error +5. Second worker catches the error and moves to the next job + +**Why this is acceptable**: +- Conflicts are rare in practice (requires near-simultaneous reservation) +- The `make()` transaction already guarantees data integrity +- Duplicate key error is a clean, expected signal +- Avoids locking overhead on the high-traffic jobs table +- Wasted computation is minimal compared to locking complexity + ### Stale Reserved Job Detection Reserved jobs that have been running too long may indicate crashed workers: From 8900fea6d2ee1b47284371f0e0b436f2ef20ef63 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 22 Dec 2025 22:42:47 +0000 Subject: [PATCH 08/39] Clarify per-key reservation flow in populate() Each job is marked as 'reserved' individually before its make() call, matching the current implementation's behavior. --- docs/src/design/autopopulate-2.0-spec.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index 7771d96e0..f8d758ae9 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -310,9 +310,12 @@ def populate( New behavior with reserve_jobs=True: 1. If refresh=True, calls self.jobs.refresh(*restrictions) - 2. Fetches jobs from self.jobs where status='pending' and scheduled_time <= now - 3. Reserves and processes jobs using the jobs table - 4. Records success/error status in jobs table + 2. For each pending job (ordered by priority, scheduled_time): + a. Mark job as 'reserved' (per-key, before make) + b. Call make(key) + c. On success: mark job as 'success' + d. On error: mark job as 'error' with message/stack + 3. Continue until no more pending jobs or max_calls reached """ ... ``` From 7c22b6dfda3dec5dd4545ddde69c135e6398203b Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 22 Dec 2025 23:10:35 +0000 Subject: [PATCH 09/39] Update state diagram to Mermaid, consolidate scheduling into refresh() - Replace ASCII diagram with Mermaid stateDiagram - Remove separate schedule() and set_priority() methods - refresh() now handles scheduling via scheduled_time and priority params - Clarify complete() can delete or keep job based on settings --- docs/src/design/autopopulate-2.0-spec.md | 78 ++++++++++++++---------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index f8d758ae9..4acdeea61 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -132,35 +132,30 @@ FilteredImage.jobs.refresh() # Refresh job queue |--------|-------------| | `pending` | Job is queued and ready to be processed | | `reserved` | Job is currently being processed by a worker | -| `success` | Job completed successfully | +| `success` | Job completed successfully (optional, depends on settings) | | `error` | Job failed with an error | | `ignore` | Job should be skipped (manually set, not part of automatic transitions) | ### Status Transitions -Automatic transitions during `populate()`: - -``` -┌─────────┐ ┌──────────┐ ┌───────────┐ ┌───────────┐ -│ (none) │───▶│ pending │───▶│ reserved │───▶│ success │ -└─────────┘ └──────────┘ └───────────┘ └───────────┘ - refresh() reserve() complete() - │ - │ error() - ▼ - ┌───────────┐ - │ error │ - └───────────┘ - │ - │ delete - ▼ - ┌───────────┐ ┌──────────┐ - │ (none) │───▶│ pending │ - └───────────┘ └──────────┘ - refresh() +```mermaid +stateDiagram-v2 + [*] --> pending : refresh() + pending --> reserved : reserve() + reserved --> [*] : complete()\n[if not keeping completed] + reserved --> success : complete()\n[if keeping completed] + reserved --> error : error() + error --> [*] : delete() + success --> [*] : delete() + ignore --> [*] : delete() ``` -**Resetting jobs:** To reset a job (error or otherwise), simply delete it from the jobs table. The next `refresh()` will re-add it as `pending` if the key is still in `key_source`. +**Transition methods:** +- `refresh()` — Adds new jobs as `pending` (from `key_source - target - jobs`) +- `reserve()` — Marks a pending job as `reserved` before calling `make()` +- `complete()` — Marks reserved job as `success`, or deletes it (based on `jobs.keep_completed` setting) +- `error()` — Marks reserved job as `error` with message and stack trace +- `delete()` — Removes job entry, returning it to `(none)` state **Manual status control:** - `ignore` is set manually via `jobs.ignore(key)` and is not part of automatic transitions @@ -180,7 +175,13 @@ class JobsTable(Table): """Dynamically generated based on parent table's primary key.""" ... - def refresh(self, *restrictions, stale_timeout: float = None) -> dict: + def refresh( + self, + *restrictions, + scheduled_time: datetime = None, + priority: int = None, + stale_timeout: float = None + ) -> dict: """ Refresh the jobs queue: add new jobs and remove stale ones. @@ -191,6 +192,10 @@ class JobsTable(Table): Args: restrictions: Conditions to filter key_source + scheduled_time: When new jobs should become available for processing. + Default: now (jobs are immediately available). + Use future times to schedule jobs for later processing. + priority: Priority for new jobs (higher = processed first). Default: 0 stale_timeout: Seconds after which pending jobs are checked for staleness. Jobs older than this are removed if their key is no longer in key_source. Default from config: jobs.stale_timeout (3600s) @@ -342,17 +347,24 @@ MyTable.jobs.progress() # Returns detailed status breakdown ### Priority and Scheduling -```python -# Set priority for specific jobs (higher = processed first) -MyTable.jobs.set_priority(restriction, priority=10) +Priority and scheduling are handled via `refresh()` parameters: -# Schedule jobs for future processing +```python from datetime import datetime, timedelta + +# Add jobs with high priority (higher = processed first) +MyTable.jobs.refresh(priority=10) + +# Schedule jobs for future processing (2 hours from now) future_time = datetime.now() + timedelta(hours=2) -MyTable.jobs.schedule(restriction, scheduled_time=future_time) +MyTable.jobs.refresh(scheduled_time=future_time) + +# Combine: high-priority jobs scheduled for tonight +tonight = datetime.now().replace(hour=22, minute=0, second=0) +MyTable.jobs.refresh(priority=100, scheduled_time=tonight) -# Insert with priority during refresh -MyTable.jobs.refresh(priority=5) # All new jobs get priority=5 +# Add jobs for specific subjects with priority +MyTable.jobs.refresh(Subject & 'priority="urgent"', priority=50) ``` ## Implementation Details @@ -513,9 +525,9 @@ print(FilteredImage.jobs.progress()) ### Priority-Based Processing ```python -# Mark urgent jobs as high priority +# Add urgent jobs with high priority urgent_subjects = Subject & 'priority="urgent"' -FilteredImage.jobs.set_priority(urgent_subjects, priority=100) +FilteredImage.jobs.refresh(urgent_subjects, priority=100) # Workers will process high-priority jobs first FilteredImage.populate(reserve_jobs=True) @@ -528,7 +540,7 @@ FilteredImage.populate(reserve_jobs=True) from datetime import datetime, timedelta tonight = datetime.now().replace(hour=22, minute=0, second=0) -FilteredImage.jobs.schedule('subject_id > 100', scheduled_time=tonight) +FilteredImage.jobs.refresh('subject_id > 100', scheduled_time=tonight) # Only jobs scheduled for now or earlier will be processed FilteredImage.populate(reserve_jobs=True) From 3018b8f42c024979a0b42d3a96ebf9130e96b687 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 22 Dec 2025 23:17:46 +0000 Subject: [PATCH 10/39] Add (none)->ignore transition, simplify reserve description - ignore() can be called on keys not yet in jobs table - Reserve is done via update1() per key, client provides pid/host/connection_id - Removed specific SQL query from spec --- docs/src/design/autopopulate-2.0-spec.md | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index 4acdeea61..f48849bda 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -141,6 +141,7 @@ FilteredImage.jobs.refresh() # Refresh job queue ```mermaid stateDiagram-v2 [*] --> pending : refresh() + [*] --> ignore : ignore() pending --> reserved : reserve() reserved --> [*] : complete()\n[if not keeping completed] reserved --> success : complete()\n[if keeping completed] @@ -152,6 +153,7 @@ stateDiagram-v2 **Transition methods:** - `refresh()` — Adds new jobs as `pending` (from `key_source - target - jobs`) +- `ignore()` — Marks a key as `ignore` (can be called on keys not yet in jobs table) - `reserve()` — Marks a pending job as `reserved` before calling `make()` - `complete()` — Marks reserved job as `success`, or deletes it (based on `jobs.keep_completed` setting) - `error()` — Marks reserved job as `error` with message and stack trace @@ -452,21 +454,7 @@ The jobs table is created with the appropriate primary key structure matching th ### Conflict Resolution -Job reservation does **not** use transaction-level locking for simplicity and performance. Instead, conflicts are resolved at the `make()` transaction level: - -```python -# Simple reservation (no locking) -UPDATE `_my_table__jobs` -SET status = 'reserved', - reserved_time = NOW(), - user = CURRENT_USER(), - host = @@hostname, - pid = CONNECTION_ID() -WHERE status = 'pending' - AND scheduled_time <= NOW() -ORDER BY priority DESC, scheduled_time ASC -LIMIT 1; -``` +Job reservation is performed via `update1()` for each key individually before calling `make()`. The client provides its own `pid`, `host`, and `connection_id` information. No transaction-level locking is used. **Conflict scenario** (rare): 1. Two workers reserve the same job nearly simultaneously From 7eda583535919b6becb2cc7a7b50ef24130aefd4 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 22 Dec 2025 23:19:59 +0000 Subject: [PATCH 11/39] Add success->pending transition via refresh() If a success job's key is still in key_source but the target entry was deleted, refresh() will transition it back to pending. --- docs/src/design/autopopulate-2.0-spec.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index f48849bda..4e24e111d 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -146,13 +146,14 @@ stateDiagram-v2 reserved --> [*] : complete()\n[if not keeping completed] reserved --> success : complete()\n[if keeping completed] reserved --> error : error() + success --> pending : refresh()\n[if key in key_source] error --> [*] : delete() success --> [*] : delete() ignore --> [*] : delete() ``` **Transition methods:** -- `refresh()` — Adds new jobs as `pending` (from `key_source - target - jobs`) +- `refresh()` — Adds new jobs as `pending`; also re-pends `success` jobs if key is in `key_source` but not in target - `ignore()` — Marks a key as `ignore` (can be called on keys not yet in jobs table) - `reserve()` — Marks a pending job as `reserved` before calling `make()` - `complete()` — Marks reserved job as `success`, or deletes it (based on `jobs.keep_completed` setting) From bab7e1010a403cb0152dfcd88db11cd1ac21c875 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 22 Dec 2025 23:21:37 +0000 Subject: [PATCH 12/39] Use explicit (none) state in Mermaid diagram Replaces multiple [*] start/end states with a single explicit "(none)" state for clarity. --- docs/src/design/autopopulate-2.0-spec.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index 4e24e111d..e42accbff 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -140,16 +140,17 @@ FilteredImage.jobs.refresh() # Refresh job queue ```mermaid stateDiagram-v2 - [*] --> pending : refresh() - [*] --> ignore : ignore() + state "(none)" as none + none --> pending : refresh() + none --> ignore : ignore() pending --> reserved : reserve() - reserved --> [*] : complete()\n[if not keeping completed] + reserved --> none : complete()\n[if not keeping completed] reserved --> success : complete()\n[if keeping completed] reserved --> error : error() success --> pending : refresh()\n[if key in key_source] - error --> [*] : delete() - success --> [*] : delete() - ignore --> [*] : delete() + error --> none : delete() + success --> none : delete() + ignore --> none : delete() ``` **Transition methods:** From 586effa8a18cbf55e4d7e0c2e24b9e2913af4afd Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 22 Dec 2025 23:27:30 +0000 Subject: [PATCH 13/39] Simplify diagram notation, remove clear_completed() - Use complete() and complete()* notation for conditional transitions - Same for refresh() and refresh()* - Remove clear_completed(); use (jobs & 'status="success"').delete() instead - Note that delete() requires no confirmation (low-cost operation) --- docs/src/design/autopopulate-2.0-spec.md | 32 ++++++++++-------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index e42accbff..401657a2f 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -144,22 +144,26 @@ stateDiagram-v2 none --> pending : refresh() none --> ignore : ignore() pending --> reserved : reserve() - reserved --> none : complete()\n[if not keeping completed] - reserved --> success : complete()\n[if keeping completed] + reserved --> none : complete() + reserved --> success : complete()* reserved --> error : error() - success --> pending : refresh()\n[if key in key_source] + success --> pending : refresh()* error --> none : delete() success --> none : delete() ignore --> none : delete() ``` +- `complete()` deletes the job entry (default when `jobs.keep_completed=False`) +- `complete()*` keeps the job as `success` (when `jobs.keep_completed=True`) +- `refresh()*` re-pends a `success` job if its key is in `key_source` but not in target + **Transition methods:** - `refresh()` — Adds new jobs as `pending`; also re-pends `success` jobs if key is in `key_source` but not in target - `ignore()` — Marks a key as `ignore` (can be called on keys not yet in jobs table) - `reserve()` — Marks a pending job as `reserved` before calling `make()` - `complete()` — Marks reserved job as `success`, or deletes it (based on `jobs.keep_completed` setting) - `error()` — Marks reserved job as `error` with message and stack trace -- `delete()` — Removes job entry, returning it to `(none)` state +- `delete()` — Removes job entries without confirmation (low-cost operation) **Manual status control:** - `ignore` is set manually via `jobs.ignore(key)` and is not part of automatic transitions @@ -247,31 +251,21 @@ class JobsTable(Table): def delete(self, *restrictions) -> int: """ - Delete jobs matching restrictions. + Delete jobs matching restrictions. No confirmation required. Deleted jobs return to (none) state. Call refresh() to re-add them as pending if their keys are still in key_source. - This is the standard way to "reset" error or ignored jobs. + Examples: + jobs.errors.delete() # Delete all error jobs + (jobs & 'status="success"').delete() # Delete completed jobs + (jobs & 'subject_id=42').delete() # Delete jobs for specific key Returns: Number of jobs deleted. """ ... - def clear_completed(self, *restrictions, before: datetime = None) -> int: - """ - Remove completed jobs from the queue. - - Args: - restrictions: Conditions to filter which jobs to clear - before: Only clear jobs completed before this time - - Returns: - Number of jobs cleared. - """ - ... - @property def pending(self) -> QueryExpression: """Return query for pending jobs.""" From 5b1e3e8c796ad6b05453432e495e64494bbec43a Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 22 Dec 2025 23:39:29 +0000 Subject: [PATCH 14/39] Refine jobs spec: priority, delete, populate logic - Priority: lower = more urgent (0 = highest), default = 5 - Acyclic state diagram with dual (none) states - delete() inherited from delete_quick(), use (jobs & cond).delete() - Added 'ignored' property for consistency - populate() logic: fetch pending first, only refresh if no pending found - Updated all examples to reflect new priority semantics --- docs/src/design/autopopulate-2.0-spec.md | 90 ++++++++++++------------ 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index 401657a2f..757fa34af 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -89,7 +89,7 @@ session_id : int ... # Same primary key attributes as MyTable (NO foreign key constraints) --- status : enum('pending', 'reserved', 'success', 'error', 'ignore') -priority : int # Higher priority = processed first (default: 0) +priority : int # Lower = more urgent (0 = highest priority, default: 5) created_time : datetime # When job was added to queue scheduled_time : datetime # Process on or after this time (default: now) reserved_time : datetime # When job was reserved (null if not reserved) @@ -140,17 +140,18 @@ FilteredImage.jobs.refresh() # Refresh job queue ```mermaid stateDiagram-v2 - state "(none)" as none - none --> pending : refresh() - none --> ignore : ignore() + state "(none)" as none1 + state "(none)" as none2 + none1 --> pending : refresh() + none1 --> ignore : ignore() pending --> reserved : reserve() - reserved --> none : complete() + reserved --> none2 : complete() reserved --> success : complete()* reserved --> error : error() success --> pending : refresh()* - error --> none : delete() - success --> none : delete() - ignore --> none : delete() + error --> none2 : delete() + success --> none2 : delete() + ignore --> none2 : delete() ``` - `complete()` deletes the job entry (default when `jobs.keep_completed=False`) @@ -163,12 +164,12 @@ stateDiagram-v2 - `reserve()` — Marks a pending job as `reserved` before calling `make()` - `complete()` — Marks reserved job as `success`, or deletes it (based on `jobs.keep_completed` setting) - `error()` — Marks reserved job as `error` with message and stack trace -- `delete()` — Removes job entries without confirmation (low-cost operation) +- `delete()` — Inherited from `delete_quick()`; use `(jobs & condition).delete()` pattern **Manual status control:** - `ignore` is set manually via `jobs.ignore(key)` and is not part of automatic transitions - Jobs with `status='ignore'` are skipped by `populate()` and `refresh()` -- To reset an ignored job, delete it and call `refresh()` +- To reset an ignored job, delete it and call `refresh()`: `jobs.ignored.delete(); jobs.refresh()` ## API Design @@ -187,7 +188,7 @@ class JobsTable(Table): self, *restrictions, scheduled_time: datetime = None, - priority: int = None, + priority: int = 5, stale_timeout: float = None ) -> dict: """ @@ -203,7 +204,7 @@ class JobsTable(Table): scheduled_time: When new jobs should become available for processing. Default: now (jobs are immediately available). Use future times to schedule jobs for later processing. - priority: Priority for new jobs (higher = processed first). Default: 0 + priority: Priority for new jobs (lower = more urgent). Default: 5 stale_timeout: Seconds after which pending jobs are checked for staleness. Jobs older than this are removed if their key is no longer in key_source. Default from config: jobs.stale_timeout (3600s) @@ -249,22 +250,8 @@ class JobsTable(Table): """ ... - def delete(self, *restrictions) -> int: - """ - Delete jobs matching restrictions. No confirmation required. - - Deleted jobs return to (none) state. Call refresh() to re-add - them as pending if their keys are still in key_source. - - Examples: - jobs.errors.delete() # Delete all error jobs - (jobs & 'status="success"').delete() # Delete completed jobs - (jobs & 'subject_id=42').delete() # Delete jobs for specific key - - Returns: - Number of jobs deleted. - """ - ... + # delete() is inherited from delete_quick() - no confirmation required + # Usage: (jobs & condition).delete() or jobs.errors.delete() @property def pending(self) -> QueryExpression: @@ -281,6 +268,11 @@ class JobsTable(Table): """Return query for error jobs.""" return self & 'status="error"' + @property + def ignored(self) -> QueryExpression: + """Return query for ignored jobs.""" + return self & 'status="ignore"' + @property def completed(self) -> QueryExpression: """Return query for completed jobs.""" @@ -305,20 +297,22 @@ def populate( processes: int = 1, make_kwargs: dict = None, # New parameters - priority: int = None, # Only process jobs with this priority or higher - refresh: bool = True, # Refresh jobs queue before populating + priority: int = None, # Only process jobs at this priority or more urgent (lower values) + refresh: bool = True, # Refresh jobs queue if no pending jobs available ) -> dict: """ Populate the table by calling make() for each missing entry. New behavior with reserve_jobs=True: - 1. If refresh=True, calls self.jobs.refresh(*restrictions) - 2. For each pending job (ordered by priority, scheduled_time): + 1. Fetch all non-stale pending jobs (ordered by priority ASC, scheduled_time ASC) + 2. For each pending job: a. Mark job as 'reserved' (per-key, before make) b. Call make(key) - c. On success: mark job as 'success' + c. On success: mark job as 'success' or delete (based on keep_completed) d. On error: mark job as 'error' with message/stack - 3. Continue until no more pending jobs or max_calls reached + 3. If refresh=True and no pending jobs were found, call self.jobs.refresh() + and repeat from step 1 + 4. Continue until no more pending jobs or max_calls reached """ ... ``` @@ -345,24 +339,30 @@ MyTable.jobs.progress() # Returns detailed status breakdown ### Priority and Scheduling -Priority and scheduling are handled via `refresh()` parameters: +Priority and scheduling are handled via `refresh()` parameters. Lower priority values are more urgent (0 = highest priority). ```python from datetime import datetime, timedelta -# Add jobs with high priority (higher = processed first) +# Add urgent jobs (priority=0 is most urgent) +MyTable.jobs.refresh(priority=0) + +# Add normal jobs (default priority=5) +MyTable.jobs.refresh() + +# Add low-priority background jobs MyTable.jobs.refresh(priority=10) # Schedule jobs for future processing (2 hours from now) future_time = datetime.now() + timedelta(hours=2) MyTable.jobs.refresh(scheduled_time=future_time) -# Combine: high-priority jobs scheduled for tonight +# Combine: urgent jobs scheduled for tonight tonight = datetime.now().replace(hour=22, minute=0, second=0) -MyTable.jobs.refresh(priority=100, scheduled_time=tonight) +MyTable.jobs.refresh(priority=0, scheduled_time=tonight) -# Add jobs for specific subjects with priority -MyTable.jobs.refresh(Subject & 'priority="urgent"', priority=50) +# Add urgent jobs for specific subjects +MyTable.jobs.refresh(Subject & 'priority="urgent"', priority=0) ``` ## Implementation Details @@ -487,8 +487,8 @@ New configuration settings for job management: # In datajoint config dj.config['jobs.auto_refresh'] = True # Auto-refresh on populate (default: True) dj.config['jobs.keep_completed'] = False # Keep success records (default: False) -dj.config['jobs.stale_timeout'] = 3600 # Seconds before reserved job is stale (default: 3600) -dj.config['jobs.default_priority'] = 0 # Default priority for new jobs (default: 0) +dj.config['jobs.stale_timeout'] = 3600 # Seconds before pending job is considered stale (default: 3600) +dj.config['jobs.default_priority'] = 5 # Default priority for new jobs (lower = more urgent) ``` ## Usage Examples @@ -509,11 +509,11 @@ print(FilteredImage.jobs.progress()) ### Priority-Based Processing ```python -# Add urgent jobs with high priority +# Add urgent jobs (priority=0 is most urgent) urgent_subjects = Subject & 'priority="urgent"' -FilteredImage.jobs.refresh(urgent_subjects, priority=100) +FilteredImage.jobs.refresh(urgent_subjects, priority=0) -# Workers will process high-priority jobs first +# Workers will process lowest-priority-value jobs first FilteredImage.populate(reserve_jobs=True) ``` From 2e0a3d92cd69ac36f4c77e35c80b117eddebc9df Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 22 Dec 2025 23:55:29 +0000 Subject: [PATCH 15/39] Clarify stale vs orphaned job terminology - Add Terminology section defining stale (pending jobs with deleted upstream) and orphaned (reserved jobs from crashed processes) - Rename "Stale Reserved Job Detection" to "Orphaned Job Handling" - Clarify that orphaned job detection is orchestration-dependent (no algorithmic method) - Update stale job handling section for consistency --- docs/src/design/autopopulate-2.0-spec.md | 31 ++++++++++++++++++------ 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index 757fa34af..37216d87b 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -25,6 +25,11 @@ The existing `~jobs` table has significant limitations: ## Proposed Solution +### Terminology + +- **Stale job**: A pending job whose upstream records have been deleted. The job references keys that no longer exist in `key_source`. Stale jobs are automatically cleaned up by `refresh()`. +- **Orphaned job**: A reserved job from a crashed or terminated process. The worker that reserved the job is no longer running, but the job remains in `reserved` status. Orphaned jobs must be cleared manually (see below). + ### Core Design Principles 1. **Foreign-key-only primary keys**: Auto-populated tables cannot introduce new primary key attributes; their primary key must comprise only foreign key references @@ -394,7 +399,7 @@ class FilteredImage(dj.Computed): ### Stale Job Handling -When upstream records are deleted, their corresponding jobs become "stale" (orphaned). Since there are no FK constraints, these jobs remain in the table until cleaned up: +Stale jobs are pending jobs whose upstream records have been deleted. Since there are no FK constraints on jobs tables, these jobs remain until cleaned up by `refresh()`: ```python # refresh() handles stale jobs automatically @@ -404,7 +409,7 @@ result = FilteredImage.jobs.refresh() # Stale detection logic: # 1. Find pending jobs where created_time < (now - stale_timeout) # 2. Check if their keys still exist in key_source -# 3. Remove jobs whose keys no longer exist +# 3. Remove pending jobs whose keys no longer exist ``` **Why not use foreign key cascading deletes?** @@ -466,19 +471,29 @@ Job reservation is performed via `update1()` for each key individually before ca - Avoids locking overhead on the high-traffic jobs table - Wasted computation is minimal compared to locking complexity -### Stale Reserved Job Detection +### Orphaned Job Handling + +Orphaned jobs are reserved jobs from crashed or terminated processes. The API does not provide an algorithmic method for detecting or clearing orphaned jobs because this is dependent on the orchestration system (e.g., Slurm job IDs, Kubernetes pod status, process heartbeats). -Reserved jobs that have been running too long may indicate crashed workers: +Users must manually clear orphaned jobs using the `delete()` method: ```python -# Find potentially stale jobs (reserved > 1 hour ago) -stale = MyTable.jobs & 'status="reserved"' & 'reserved_time < NOW() - INTERVAL 1 HOUR' +# Delete all reserved jobs (use with caution - may kill active jobs!) +MyTable.jobs.reserved.delete() -# Delete stale jobs and re-add as pending -stale.delete() +# Delete reserved jobs from a specific host that crashed +(MyTable.jobs.reserved & 'host="crashed-node"').delete() + +# Delete reserved jobs older than 1 hour (likely orphaned) +(MyTable.jobs.reserved & 'reserved_time < NOW() - INTERVAL 1 HOUR').delete() + +# Delete and re-add as pending +MyTable.jobs.reserved.delete() MyTable.jobs.refresh() ``` +**Important**: Be careful when deleting reserved jobs—you may accidentally terminate jobs that are still running. Coordinate with your orchestration system to identify truly orphaned jobs. + ## Configuration Options New configuration settings for job management: From 77c7cf5e3c31ae4459f0122e908d1efdf82d71ae Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 00:00:24 +0000 Subject: [PATCH 16/39] Remove FK-only PK requirement, add hazard analysis - Remove requirement that auto-populated tables have FK-only primary keys (this constraint is handled elsewhere, not by the jobs system) - Clarify that jobs table PK includes only FK-derived attributes from the target table's primary key - Add example showing how additional PK attributes are excluded - Add comprehensive Hazard Analysis section covering: - Race conditions (reservation, refresh, completion) - State transitions (invalid, stuck, ignored) - Data integrity (stale jobs, sync, transactions) - Performance (table size, refresh speed) - Operational (accidental deletion, priority) - Migration (legacy table, version mixing) --- docs/src/design/autopopulate-2.0-spec.md | 186 +++++++++++------------ 1 file changed, 88 insertions(+), 98 deletions(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index 37216d87b..bc5770207 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -32,54 +32,11 @@ The existing `~jobs` table has significant limitations: ### Core Design Principles -1. **Foreign-key-only primary keys**: Auto-populated tables cannot introduce new primary key attributes; their primary key must comprise only foreign key references -2. **Per-table jobs**: Each computed table gets its own hidden jobs table -3. **Native primary keys**: Jobs table uses the same primary key structure as its parent table (no hashes) -4. **No FK constraints on jobs**: Jobs tables omit foreign key constraints for performance; stale jobs are cleaned by `refresh()` -5. **Rich status tracking**: Extended status values for full lifecycle visibility -6. **Automatic refresh**: `populate()` automatically refreshes the jobs queue (adding new jobs, removing stale ones) - -### Primary Key Constraint - -**Auto-populated tables (`dj.Imported` and `dj.Computed`) must have primary keys composed entirely of foreign key references.** - -This constraint ensures: -- **1:1 key_source mapping**: Each entry in `key_source` corresponds to exactly one potential job -- **Deterministic job identity**: A job's identity is fully determined by its parent records -- **Simplified jobs table**: The jobs table can directly reference the same parents as the computed table - -```python -# VALID: Primary key is entirely foreign keys -@schema -class FilteredImage(dj.Computed): - definition = """ - -> Image - --- - filtered_image : - """ - -# VALID: Multiple foreign keys in primary key -@schema -class Comparison(dj.Computed): - definition = """ - -> Image.proj(image_a='image_id') - -> Image.proj(image_b='image_id') - --- - similarity : float - """ - -# INVALID: Additional primary key attribute not allowed -@schema -class Analysis(dj.Computed): - definition = """ - -> Recording - analysis_method : varchar(32) # NOT ALLOWED - adds to primary key - --- - result : float - """ -``` - -**Legacy table support**: Existing tables that introduce additional primary key attributes (beyond foreign keys) can still use the jobs system, but their jobs table will only include the foreign-key-derived primary key attributes. This means multiple target rows may map to a single job entry. A deprecation warning will be issued for such tables. +1. **Per-table jobs**: Each computed table gets its own hidden jobs table +2. **FK-derived primary keys**: Jobs table primary key includes only attributes derived from foreign keys in the target table's primary key (not additional primary key attributes) +3. **No FK constraints on jobs**: Jobs tables omit foreign key constraints for performance; stale jobs are cleaned by `refresh()` +4. **Rich status tracking**: Extended status values for full lifecycle visibility +5. **Automatic refresh**: `populate()` automatically refreshes the jobs queue (adding new jobs, removing stale ones) ## Architecture @@ -91,7 +48,7 @@ Each `dj.Imported` or `dj.Computed` table `MyTable` will have an associated hidd # Job queue for MyTable subject_id : int session_id : int -... # Same primary key attributes as MyTable (NO foreign key constraints) +... # Only FK-derived primary key attributes (NO foreign key constraints) --- status : enum('pending', 'reserved', 'success', 'error', 'ignore') priority : int # Lower = more urgent (0 = highest priority, default: 5) @@ -109,10 +66,10 @@ connection_id : bigint unsigned # MySQL connection ID version : varchar(255) # Code version (git hash, package version, etc.) ``` -**Important**: The jobs table has the same primary key *structure* as the target table but **no foreign key constraints**. This is intentional for performance: -- Foreign key constraints add overhead on every insert/update/delete -- Jobs tables are high-traffic (frequent reservations and completions) -- Stale jobs (referencing deleted upstream records) are handled by `refresh()` instead +**Important**: The jobs table primary key includes only those attributes that come through foreign keys in the target table's primary key. Additional primary key attributes (if any) are excluded. This means: +- If a target table has primary key `(-> Subject, -> Session, method)`, the jobs table has primary key `(subject_id, session_id)` only +- Multiple target rows may map to a single job entry when additional PK attributes exist +- Jobs tables have **no foreign key constraints** for performance (stale jobs handled by `refresh()`) ### Access Pattern @@ -378,12 +335,12 @@ Jobs tables follow the existing hidden table naming pattern: - Table `FilteredImage` (stored as `__filtered_image`) - Jobs table: `~filtered_image__jobs` (stored as `_filtered_image__jobs`) -### Primary Key Matching (No Foreign Keys) +### Primary Key Derivation -The jobs table has the same primary key *attributes* as the target table, but **without foreign key constraints**: +The jobs table primary key includes only those attributes derived from foreign keys in the target table's primary key: ```python -# If FilteredImage has definition: +# Example 1: FK-only primary key (simple case) @schema class FilteredImage(dj.Computed): definition = """ @@ -391,12 +348,23 @@ class FilteredImage(dj.Computed): --- filtered_image : """ +# Jobs table primary key: (image_id) — same as target -# The jobs table will have the same primary key (image_id), -# but NO foreign key constraint to Image. -# This is for performance - FK constraints add overhead. +# Example 2: Target with additional PK attribute +@schema +class Analysis(dj.Computed): + definition = """ + -> Recording + analysis_method : varchar(32) # Additional PK attribute + --- + result : float + """ +# Jobs table primary key: (recording_id) — excludes 'analysis_method' +# One job entry covers all analysis_method values for a given recording ``` +The jobs table has **no foreign key constraints** for performance reasons. + ### Stale Job Handling Stale jobs are pending jobs whose upstream records have been deleted. Since there are no FK constraints on jobs tables, these jobs remain until cleaned up by `refresh()`: @@ -451,7 +419,7 @@ FilteredImage.populate(reserve_jobs=True) FilteredImage.jobs.refresh() ``` -The jobs table is created with the appropriate primary key structure matching the target table's foreign-key-derived attributes. +The jobs table is created with a primary key derived from the target table's foreign key attributes. ### Conflict Resolution @@ -625,6 +593,61 @@ for jobs_table in schema.jobs: This replaces the legacy single `~jobs` table with direct access to per-table jobs. +## Hazard Analysis + +This section identifies potential hazards and their mitigations. + +### Race Conditions + +| Hazard | Description | Mitigation | +|--------|-------------|------------| +| **Simultaneous reservation** | Two workers reserve the same pending job at nearly the same time | Acceptable: duplicate `make()` calls are resolved by transaction—second worker gets duplicate key error | +| **Reserve during refresh** | Worker reserves a job while another process is running `refresh()` | No conflict: `refresh()` adds new jobs and removes stale ones; reservation updates existing rows | +| **Concurrent refresh calls** | Multiple processes call `refresh()` simultaneously | Acceptable: may result in duplicate insert attempts, but primary key constraint prevents duplicates | +| **Complete vs delete race** | One process completes a job while another deletes it | Acceptable: one operation succeeds, other becomes no-op (row not found) | + +### State Transitions + +| Hazard | Description | Mitigation | +|--------|-------------|------------| +| **Invalid state transition** | Code attempts illegal transition (e.g., pending → success) | Implementation enforces valid transitions; invalid attempts raise error | +| **Stuck in reserved** | Worker crashes while job is reserved (orphaned job) | Manual intervention required: `jobs.reserved.delete()` (see Orphaned Job Handling) | +| **Success re-pended unexpectedly** | `refresh()` re-pends a success job when user expected it to stay | Only occurs if `keep_completed=True` AND key exists in `key_source` but not in target; document clearly | +| **Ignore not respected** | Ignored jobs get processed anyway | Implementation must skip `status='ignore'` in `populate()` job fetching | + +### Data Integrity + +| Hazard | Description | Mitigation | +|--------|-------------|------------| +| **Stale job processed** | Job references deleted upstream data | `make()` will fail or produce invalid results; `refresh()` cleans stale jobs before processing | +| **Jobs table out of sync** | Jobs table doesn't match `key_source` | `refresh()` synchronizes; call periodically or rely on `populate(refresh=True)` | +| **Partial make failure** | `make()` partially succeeds then fails | DataJoint transaction rollback ensures atomicity; job marked as error | +| **Error message truncation** | Error details exceed `varchar(2047)` | Full stack stored in `error_stack` (mediumblob); `error_message` is summary only | + +### Performance + +| Hazard | Description | Mitigation | +|--------|-------------|------------| +| **Large jobs table** | Jobs table grows very large with `keep_completed=True` | Default is `keep_completed=False`; provide guidance on periodic cleanup | +| **Slow refresh on large key_source** | `refresh()` queries entire `key_source` | Can restrict refresh to subsets: `jobs.refresh(Subject & 'lab="smith"')` | +| **Many jobs tables per schema** | Schema with many computed tables has many jobs tables | Jobs tables are lightweight; only created on first use | + +### Operational + +| Hazard | Description | Mitigation | +|--------|-------------|------------| +| **Accidental job deletion** | User runs `jobs.delete()` without restriction | `delete()` inherits from `delete_quick()` (no confirmation); users must apply restrictions carefully | +| **Clearing active jobs** | User clears reserved jobs while workers are running | Document warning in Orphaned Job Handling; recommend coordinating with orchestrator | +| **Priority confusion** | User expects higher number = higher priority | Document clearly: lower values are more urgent (0 = highest priority) | + +### Migration + +| Hazard | Description | Mitigation | +|--------|-------------|------------| +| **Legacy ~jobs table conflict** | Old `~jobs` table exists alongside new per-table jobs | Systems are independent; legacy table can be dropped manually | +| **Mixed version workers** | Some workers use old system, some use new | Major release; do not support mixed operation—require full migration | +| **Lost error history** | Migrating loses error records from legacy table | Document migration procedure; users can export legacy errors before migration | + ## Future Extensions - [ ] Web-based dashboard for job monitoring @@ -667,43 +690,10 @@ The current system hashes primary keys to support arbitrary key types. The new s 3. **Foreign keys**: Hash-based keys cannot participate in foreign key relationships 4. **Simplicity**: No need for hash computation and comparison -### Why Require Foreign-Key-Only Primary Keys? - -Restricting auto-populated tables to foreign-key-only primary keys provides: - -1. **1:1 job correspondence**: Each `key_source` entry maps to exactly one job, eliminating ambiguity about what constitutes a "job" -2. **Matching key structure**: The jobs table primary key exactly matches the target table, enabling efficient stale detection via `key_source` comparison -3. **Eliminates key_source complexity**: No need for custom `key_source` definitions to enumerate non-foreign-key combinations -4. **Clearer data model**: The computation graph is fully determined by table dependencies -5. **Simpler populate logic**: No need to handle partial key matching or key enumeration - -**What if I need multiple outputs per parent?** - -Use a part table pattern instead: - -```python -# Instead of adding analysis_method to primary key: -@schema -class Analysis(dj.Computed): - definition = """ - -> Recording - --- - timestamp : datetime - """ - - class Method(dj.Part): - definition = """ - -> master - analysis_method : varchar(32) - --- - result : float - """ +### Why FK-Derived Primary Keys Only? - def make(self, key): - self.insert1(key) - for method in ['pca', 'ica', 'nmf']: - result = run_analysis(key, method) - self.Method.insert1({**key, 'analysis_method': method, 'result': result}) -``` +The jobs table primary key includes only attributes derived from foreign keys in the target table's primary key. This design: -This pattern maintains the 1:1 job mapping while supporting multiple outputs per computation. +1. **Aligns with key_source**: The `key_source` query naturally produces keys matching the FK-derived attributes +2. **Simplifies job identity**: A job's identity is determined by its upstream dependencies +3. **Handles additional PK attributes**: When targets have additional PK attributes (e.g., `method`), one job covers all values for that attribute From 86e21f41d887a9d5ed399dd5094b6fda1e599797 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 00:05:35 +0000 Subject: [PATCH 17/39] Clarify conflict resolution and add pre-partitioning pattern - Clarify that transaction-based conflict resolution applies regardless of reserve_jobs setting (True or False) - Add new section "Job Reservation vs Pre-Partitioning" documenting the alternative workflow where orchestrators explicitly divide jobs before distributing to workers - Include comparison table for when to use each approach --- docs/src/design/autopopulate-2.0-spec.md | 49 +++++++++++++++++++----- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index bc5770207..4871fc275 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -423,22 +423,51 @@ The jobs table is created with a primary key derived from the target table's for ### Conflict Resolution -Job reservation is performed via `update1()` for each key individually before calling `make()`. The client provides its own `pid`, `host`, and `connection_id` information. No transaction-level locking is used. +Conflict resolution relies on the transaction surrounding each `make()` call. This applies regardless of whether `reserve_jobs=True` or `reserve_jobs=False`: -**Conflict scenario** (rare): -1. Two workers reserve the same job nearly simultaneously -2. Both run `make()` for the same key -3. First worker's `make()` transaction commits, inserting the result -4. Second worker's `make()` transaction fails with duplicate key error -5. Second worker catches the error and moves to the next job +- With `reserve_jobs=False`: Workers query `key_source` directly and may attempt the same key +- With `reserve_jobs=True`: Job reservation reduces conflicts but doesn't eliminate them entirely + +When two workers attempt to populate the same key: +1. Both call `make()` for the same key +2. First worker's `make()` transaction commits, inserting the result +3. Second worker's `make()` transaction fails with duplicate key error +4. Second worker catches the error and moves to the next job **Why this is acceptable**: -- Conflicts are rare in practice (requires near-simultaneous reservation) -- The `make()` transaction already guarantees data integrity +- The `make()` transaction guarantees data integrity - Duplicate key error is a clean, expected signal -- Avoids locking overhead on the high-traffic jobs table +- With `reserve_jobs=True`, conflicts are rare (requires near-simultaneous reservation) - Wasted computation is minimal compared to locking complexity +### Job Reservation vs Pre-Partitioning + +The job reservation mechanism (`reserve_jobs=True`) allows workers to dynamically claim jobs from a shared queue. However, some orchestration systems may prefer to **pre-partition** jobs before distributing them to workers: + +```python +# Pre-partitioning example: orchestrator divides work explicitly +all_pending = FilteredImage.jobs.pending.fetch("KEY") + +# Split jobs among workers (e.g., by worker index) +n_workers = 4 +for worker_id in range(n_workers): + worker_jobs = all_pending[worker_id::n_workers] # Round-robin assignment + # Send worker_jobs to worker via orchestration system (Slurm, K8s, etc.) + +# Worker receives its assigned keys and processes them directly +for key in assigned_keys: + FilteredImage.populate(key, reserve_jobs=False) +``` + +**When to use each approach**: + +| Approach | Use Case | +|----------|----------| +| **Dynamic reservation** (`reserve_jobs=True`) | Simple setups, variable job durations, workers that start/stop dynamically | +| **Pre-partitioning** | Batch schedulers (Slurm, PBS), predictable job counts, avoiding reservation overhead | + +Both approaches benefit from the same transaction-based conflict resolution as a safety net. + ### Orphaned Job Handling Orphaned jobs are reserved jobs from crashed or terminated processes. The API does not provide an algorithmic method for detecting or clearing orphaned jobs because this is dependent on the orchestration system (e.g., Slurm job IDs, Kubernetes pod status, process heartbeats). From 314ad0acb546dd9aa5bc313e942f2b3193392690 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 00:09:55 +0000 Subject: [PATCH 18/39] Fix incorrect statement about deleting reserved jobs Deleting a reserved job does not terminate the running worker - it only removes the reservation record. The worker continues its make() call. The actual risk is duplicated work if the job is refreshed and picked up by another worker. --- docs/src/design/autopopulate-2.0-spec.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index 4871fc275..300c628cd 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -489,7 +489,7 @@ MyTable.jobs.reserved.delete() MyTable.jobs.refresh() ``` -**Important**: Be careful when deleting reserved jobs—you may accidentally terminate jobs that are still running. Coordinate with your orchestration system to identify truly orphaned jobs. +**Note**: Deleting a reserved job does not terminate the running worker—it simply removes the reservation record. If the worker is still running, it will complete its `make()` call. If the job is then refreshed as pending and picked up by another worker, duplicated work may occur. Coordinate with your orchestration system to identify truly orphaned jobs before clearing them. ## Configuration Options @@ -666,7 +666,7 @@ This section identifies potential hazards and their mitigations. | Hazard | Description | Mitigation | |--------|-------------|------------| | **Accidental job deletion** | User runs `jobs.delete()` without restriction | `delete()` inherits from `delete_quick()` (no confirmation); users must apply restrictions carefully | -| **Clearing active jobs** | User clears reserved jobs while workers are running | Document warning in Orphaned Job Handling; recommend coordinating with orchestrator | +| **Clearing active jobs** | User clears reserved jobs while workers are still running | May cause duplicated work if job is refreshed and picked up again; coordinate with orchestrator | | **Priority confusion** | User expects higher number = higher priority | Document clearly: lower values are more urgent (0 = highest priority) | ### Migration From 61cc759520d20afdd27c476b8af7478615695cb9 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 00:12:33 +0000 Subject: [PATCH 19/39] Use relative delay (seconds) instead of absolute scheduled_time Change scheduling parameter from absolute datetime to relative seconds: - Rename scheduled_time to delay (float, seconds from now) - Uses database server time (NOW() + INTERVAL) to avoid clock sync issues - Update all examples to use delay parameter --- docs/src/design/autopopulate-2.0-spec.md | 32 +++++++++++------------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index 300c628cd..04e0f5ac4 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -149,7 +149,7 @@ class JobsTable(Table): def refresh( self, *restrictions, - scheduled_time: datetime = None, + delay: float = 0, priority: int = 5, stale_timeout: float = None ) -> dict: @@ -163,9 +163,9 @@ class JobsTable(Table): Args: restrictions: Conditions to filter key_source - scheduled_time: When new jobs should become available for processing. - Default: now (jobs are immediately available). - Use future times to schedule jobs for later processing. + delay: Seconds from now until jobs become available for processing. + Default: 0 (jobs are immediately available). + Uses database server time to avoid client clock synchronization issues. priority: Priority for new jobs (lower = more urgent). Default: 5 stale_timeout: Seconds after which pending jobs are checked for staleness. Jobs older than this are removed if their key is no longer @@ -301,11 +301,9 @@ MyTable.jobs.progress() # Returns detailed status breakdown ### Priority and Scheduling -Priority and scheduling are handled via `refresh()` parameters. Lower priority values are more urgent (0 = highest priority). +Priority and scheduling are handled via `refresh()` parameters. Lower priority values are more urgent (0 = highest priority). Scheduling uses relative time (seconds from now) based on database server time. ```python -from datetime import datetime, timedelta - # Add urgent jobs (priority=0 is most urgent) MyTable.jobs.refresh(priority=0) @@ -316,12 +314,13 @@ MyTable.jobs.refresh() MyTable.jobs.refresh(priority=10) # Schedule jobs for future processing (2 hours from now) -future_time = datetime.now() + timedelta(hours=2) -MyTable.jobs.refresh(scheduled_time=future_time) +MyTable.jobs.refresh(delay=2*60*60) # 7200 seconds + +# Schedule jobs for tomorrow (24 hours from now) +MyTable.jobs.refresh(delay=24*60*60) -# Combine: urgent jobs scheduled for tonight -tonight = datetime.now().replace(hour=22, minute=0, second=0) -MyTable.jobs.refresh(priority=0, scheduled_time=tonight) +# Combine: urgent jobs with 1-hour delay +MyTable.jobs.refresh(priority=0, delay=3600) # Add urgent jobs for specific subjects MyTable.jobs.refresh(Subject & 'priority="urgent"', priority=0) @@ -532,13 +531,10 @@ FilteredImage.populate(reserve_jobs=True) ### Scheduled Processing ```python -# Schedule jobs for overnight processing -from datetime import datetime, timedelta - -tonight = datetime.now().replace(hour=22, minute=0, second=0) -FilteredImage.jobs.refresh('subject_id > 100', scheduled_time=tonight) +# Schedule jobs for overnight processing (8 hours from now) +FilteredImage.jobs.refresh('subject_id > 100', delay=8*60*60) -# Only jobs scheduled for now or earlier will be processed +# Only jobs whose scheduled_time <= now will be processed FilteredImage.populate(reserve_jobs=True) ``` From 7b11d650e2f98a8cbc94d1d4a14f99b05d511df3 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 00:21:59 +0000 Subject: [PATCH 20/39] Clarify that only make() errors are logged as error status Duplicate key errors from collisions occur outside make() and are handled silently - the job reverts to pending or (none) state. Only genuine computation failures inside make() are logged with error status. --- docs/src/design/autopopulate-2.0-spec.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/src/design/autopopulate-2.0-spec.md b/docs/src/design/autopopulate-2.0-spec.md index 04e0f5ac4..2e471cc5e 100644 --- a/docs/src/design/autopopulate-2.0-spec.md +++ b/docs/src/design/autopopulate-2.0-spec.md @@ -431,11 +431,13 @@ When two workers attempt to populate the same key: 1. Both call `make()` for the same key 2. First worker's `make()` transaction commits, inserting the result 3. Second worker's `make()` transaction fails with duplicate key error -4. Second worker catches the error and moves to the next job +4. Second worker catches the error, and the job returns to `pending` or `(none)` state + +**Important**: Only errors that occur *inside* `make()` are logged with `error` status. Duplicate key errors from collisions occur outside the `make()` logic and are handled silently—the job is either retried or reverts to `pending`/`(none)`. This distinction ensures the error log contains only genuine computation failures, not coordination artifacts. **Why this is acceptable**: - The `make()` transaction guarantees data integrity -- Duplicate key error is a clean, expected signal +- Duplicate key error is a clean, expected signal (not a real error) - With `reserve_jobs=True`, conflicts are rare (requires near-simultaneous reservation) - Wasted computation is minimal compared to locking complexity From 086de0749a4f9f49d55ae00657aa36919d58e6cc Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 00:30:47 +0000 Subject: [PATCH 21/39] Implement Autopopulate 2.0 job system This commit implements the per-table jobs system specified in the Autopopulate 2.0 design document. New features: - Per-table JobsTable class (jobs_v2.py) with FK-derived primary keys - Status enum: pending, reserved, success, error, ignore - Priority system (lower = more urgent, 0 = highest, default = 5) - Scheduled processing via delay parameter - Methods: refresh(), reserve(), complete(), error(), ignore() - Properties: pending, reserved, errors, ignored, completed, progress() Configuration (settings.py): - New JobsSettings class with: - jobs.auto_refresh (default: True) - jobs.keep_completed (default: False) - jobs.stale_timeout (default: 3600 seconds) - jobs.default_priority (default: 5) AutoPopulate changes (autopopulate.py): - Added jobs property to access per-table JobsTable - Updated populate() with new parameters: priority, refresh - Updated _populate1() to use new JobsTable API - Collision errors (DuplicateError) handled silently per spec Schema changes (schemas.py): - Track auto-populated tables during decoration - schema.jobs now returns list of JobsTable objects - Added schema.legacy_jobs for backward compatibility --- src/datajoint/autopopulate.py | 120 +++++-- src/datajoint/jobs_v2.py | 575 ++++++++++++++++++++++++++++++++++ src/datajoint/schemas.py | 26 +- src/datajoint/settings.py | 17 + 4 files changed, 711 insertions(+), 27 deletions(-) create mode 100644 src/datajoint/jobs_v2.py diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py index 677a8113c..84446840f 100644 --- a/src/datajoint/autopopulate.py +++ b/src/datajoint/autopopulate.py @@ -55,6 +55,7 @@ class AutoPopulate: _key_source = None _allow_insert = False + _jobs_table = None # Cached JobsTable instance @property def key_source(self): @@ -160,6 +161,21 @@ def target(self): """ return self + @property + def jobs(self): + """ + Access the jobs table for this auto-populated table. + + The jobs table provides per-table job queue management with rich status + tracking (pending, reserved, success, error, ignore). + + :return: JobsTable instance for this table + """ + if self._jobs_table is None: + from .jobs_v2 import JobsTable + self._jobs_table = JobsTable(self.target) + return self._jobs_table + def _job_key(self, key): """ :param key: they key returned for the job from the key source @@ -209,6 +225,9 @@ def populate( display_progress=False, processes=1, make_kwargs=None, + # New parameters for Autopopulate 2.0 + priority=None, + refresh=True, ): """ ``table.populate()`` calls ``table.make(key)`` for every primary key in @@ -230,6 +249,10 @@ def populate( to be passed down to each ``make()`` call. Computation arguments should be specified within the pipeline e.g. using a `dj.Lookup` table. :type make_kwargs: dict, optional + :param priority: Only process jobs at this priority or more urgent (lower values). + Only applies when reserve_jobs=True. + :param refresh: If True and no pending jobs are found, refresh the jobs queue + before giving up. Only applies when reserve_jobs=True. :return: a dict with two keys "success_count": the count of successful ``make()`` calls in this ``populate()`` call "error_list": the error list that is filled if `suppress_errors` is True @@ -240,7 +263,9 @@ def populate( valid_order = ["original", "reverse", "random"] if order not in valid_order: raise DataJointError("The order argument must be one of %s" % str(valid_order)) - jobs = self.connection.schemas[self.target.database].jobs if reserve_jobs else None + + # Get the jobs table (per-table JobsTable for new system) + jobs_table = self.jobs if reserve_jobs else None if reserve_jobs: # Define a signal handler for SIGTERM @@ -250,15 +275,21 @@ def handler(signum, frame): old_handler = signal.signal(signal.SIGTERM, handler) - if keys is None: - keys = (self._jobs_to_do(restrictions) - self.target).fetch("KEY", limit=limit) + error_list = [] + success_list = [] - # exclude "error", "ignore" or "reserved" jobs if reserve_jobs: - exclude_key_hashes = ( - jobs & {"table_name": self.target.table_name} & 'status in ("error", "ignore", "reserved")' - ).fetch("key_hash") - keys = [key for key in keys if key_hash(key) not in exclude_key_hashes] + # New Autopopulate 2.0 logic: use jobs table + keys = self._get_pending_jobs( + restrictions=restrictions, + priority=priority, + limit=limit, + refresh=refresh, + ) + else: + # Legacy behavior: get keys from key_source + if keys is None: + keys = (self._jobs_to_do(restrictions) - self.target).fetch("KEY", limit=limit) if order == "reverse": keys.reverse() @@ -270,9 +301,6 @@ def handler(signum, frame): keys = keys[:max_calls] nkeys = len(keys) - error_list = [] - success_list = [] - if nkeys: processes = min(_ for _ in (processes, nkeys, mp.cpu_count()) if _) @@ -284,7 +312,7 @@ def handler(signum, frame): if processes == 1: for key in tqdm(keys, desc=self.__class__.__name__) if display_progress else keys: - status = self._populate1(key, jobs, **populate_kwargs) + status = self._populate1(key, jobs_table, **populate_kwargs) if status is True: success_list.append(1) elif isinstance(status, tuple): @@ -296,7 +324,7 @@ def handler(signum, frame): self.connection.close() # disconnect parent process from MySQL server del self.connection._conn.ctx # SSLContext is not pickleable with ( - mp.Pool(processes, _initialize_populate, (self, jobs, populate_kwargs)) as pool, + mp.Pool(processes, _initialize_populate, (self, jobs_table, populate_kwargs)) as pool, tqdm(desc="Processes: ", total=nkeys) if display_progress else contextlib.nullcontext() as progress_bar, ): for status in pool.imap(_call_populate1, keys, chunksize=1): @@ -319,23 +347,54 @@ def handler(signum, frame): "error_list": error_list, } + def _get_pending_jobs(self, restrictions, priority, limit, refresh): + """ + Get pending jobs from the jobs table. + + If no pending jobs are found and refresh=True, refreshes the jobs queue + and tries again. + + :param restrictions: Restrictions to apply when refreshing + :param priority: Only get jobs at this priority or more urgent + :param limit: Maximum number of jobs to return + :param refresh: Whether to refresh if no pending jobs found + :return: List of key dicts + """ + jobs_table = self.jobs + + # First, try to get pending jobs + keys = jobs_table.fetch_pending(limit=limit, priority=priority) + + # If no pending jobs and refresh is enabled, refresh and try again + if not keys and refresh: + logger.debug("No pending jobs found, refreshing jobs queue") + jobs_table.refresh(*restrictions) + keys = jobs_table.fetch_pending(limit=limit, priority=priority) + + return keys + def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_kwargs=None): """ populates table for one source key, calling self.make inside a transaction. - :param jobs: the jobs table or None if not reserve_jobs + :param jobs: the jobs table (JobsTable) or None if not reserve_jobs :param key: dict specifying job to populate :param suppress_errors: bool if errors should be suppressed and returned :param return_exception_objects: if True, errors must be returned as objects :return: (key, error) when suppress_errors=True, True if successfully invoke one `make()` call, otherwise False """ + import time + # use the legacy `_make_tuples` callback. make = self._make_tuples if hasattr(self, "_make_tuples") else self.make + job_key = self._job_key(key) + start_time = time.time() - if jobs is not None and not jobs.reserve(self.target.table_name, self._job_key(key)): + # Try to reserve the job (per-key, before make) + if jobs is not None and not jobs.reserve(job_key): return False - # if make is a generator, it transaction can be delayed until the final stage + # if make is a generator, transaction can be delayed until the final stage is_generator = inspect.isgeneratorfunction(make) if not is_generator: self.connection.start_transaction() @@ -344,7 +403,8 @@ def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_ if not is_generator: self.connection.cancel_transaction() if jobs is not None: - jobs.complete(self.target.table_name, self._job_key(key)) + # Job already done - mark complete or delete + jobs.complete(job_key, duration=0) return False logger.debug(f"Making {key} -> {self.target.full_table_name}") @@ -379,14 +439,23 @@ def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_ msg=": " + str(error) if str(error) else "", ) logger.debug(f"Error making {key} -> {self.target.full_table_name} - {error_message}") + + # Only log errors from inside make() - not collision errors if jobs is not None: - # show error name and error message (if any) - jobs.error( - self.target.table_name, - self._job_key(key), - error_message=error_message, - error_stack=traceback.format_exc(), - ) + from .errors import DuplicateError + if isinstance(error, DuplicateError): + # Collision error - job reverts to pending or gets deleted + # This is not a real error, just coordination artifact + logger.debug(f"Duplicate key collision for {key}, reverting job") + # Delete the reservation, letting the job be picked up again or cleaned + (jobs & job_key).delete_quick() + else: + # Real error inside make() - log it + jobs.error( + job_key, + error_message=error_message, + error_stack=traceback.format_exc(), + ) if not suppress_errors or isinstance(error, SystemExit): raise else: @@ -394,9 +463,10 @@ def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_ return key, error if return_exception_objects else error_message else: self.connection.commit_transaction() + duration = time.time() - start_time logger.debug(f"Success making {key} -> {self.target.full_table_name}") if jobs is not None: - jobs.complete(self.target.table_name, self._job_key(key)) + jobs.complete(job_key, duration=duration) return True finally: self.__class__._allow_insert = False diff --git a/src/datajoint/jobs_v2.py b/src/datajoint/jobs_v2.py new file mode 100644 index 000000000..ea5700b95 --- /dev/null +++ b/src/datajoint/jobs_v2.py @@ -0,0 +1,575 @@ +""" +Autopopulate 2.0 Jobs System + +This module implements per-table job tables for auto-populated tables. +Each dj.Imported or dj.Computed table gets its own hidden jobs table +with FK-derived primary keys and rich status tracking. +""" + +import logging +import os +import platform +from datetime import datetime +from typing import TYPE_CHECKING, Optional + +from .errors import DataJointError, DuplicateError +from .expression import QueryExpression +from .heading import Heading +from .settings import config +from .table import Table + +if TYPE_CHECKING: + from .autopopulate import AutoPopulate + +logger = logging.getLogger(__name__.split(".")[0]) + +ERROR_MESSAGE_LENGTH = 2047 +TRUNCATION_APPENDIX = "...truncated" + +# Default configuration values +DEFAULT_STALE_TIMEOUT = 3600 # 1 hour +DEFAULT_PRIORITY = 5 +DEFAULT_KEEP_COMPLETED = False + + +class JobsTable(Table): + """ + Per-table job queue for auto-populated tables. + + Each dj.Imported or dj.Computed table has an associated hidden jobs table + with the naming convention ~__jobs. + + The jobs table primary key includes only those attributes derived from + foreign keys in the target table's primary key. Additional primary key + attributes (if any) are excluded. + + Status values: + - pending: Job is queued and ready to be processed + - reserved: Job is currently being processed by a worker + - success: Job completed successfully + - error: Job failed with an error + - ignore: Job should be skipped (manually set) + """ + + def __init__(self, target: "AutoPopulate"): + """ + Initialize a JobsTable for the given auto-populated table. + + Args: + target: The auto-populated table (dj.Imported or dj.Computed) + """ + self._target = target + self._connection = target.connection + self.database = target.database + self._user = self.connection.get_user() + + # Derive the jobs table name from the target table + # e.g., __filtered_image -> _filtered_image__jobs + target_table_name = target.table_name + if target_table_name.startswith("__"): + # Computed table: __foo -> _foo__jobs + self._table_name = f"~{target_table_name[2:]}__jobs" + elif target_table_name.startswith("_"): + # Imported table: _foo -> _foo__jobs + self._table_name = f"~{target_table_name[1:]}__jobs" + else: + # Manual/Lookup (shouldn't happen for auto-populated) + self._table_name = f"~{target_table_name}__jobs" + + # Build the definition dynamically based on target's FK-derived primary key + self._definition = self._build_definition() + + # Initialize heading + self._heading = Heading( + table_info=dict( + conn=self._connection, + database=self.database, + table_name=self.table_name, + context=None, + ) + ) + self._support = [self.full_table_name] + + def _get_fk_derived_primary_key(self) -> list[tuple[str, str]]: + """ + Get the FK-derived primary key attributes from the target table. + + Returns: + List of (attribute_name, attribute_type) tuples for FK-derived PK attributes. + """ + # Get parent tables that contribute to the primary key + parents = self._target.parents(primary=True, as_objects=True, foreign_key_info=True) + + # Collect all FK-derived primary key attributes + fk_pk_attrs = set() + for parent_table, props in parents: + # attr_map maps child attr -> parent attr + for child_attr in props["attr_map"].keys(): + fk_pk_attrs.add(child_attr) + + # Get attribute definitions from target table's heading + pk_definitions = [] + for attr_name in self._target.primary_key: + if attr_name in fk_pk_attrs: + attr = self._target.heading.attributes[attr_name] + # Build attribute definition string + attr_def = f"{attr_name} : {attr.type}" + pk_definitions.append((attr_name, attr_def)) + + return pk_definitions + + def _build_definition(self) -> str: + """ + Build the table definition for the jobs table. + + Returns: + DataJoint table definition string. + """ + # Get FK-derived primary key attributes + pk_attrs = self._get_fk_derived_primary_key() + + if not pk_attrs: + raise DataJointError( + f"Cannot create jobs table for {self._target.full_table_name}: " + "no foreign-key-derived primary key attributes found." + ) + + # Build primary key section + pk_lines = [attr_def for _, attr_def in pk_attrs] + + definition = f"""# Job queue for {self._target.class_name} +{chr(10).join(pk_lines)} +--- +status : enum('pending', 'reserved', 'success', 'error', 'ignore') +priority : int # Lower = more urgent (0 = highest priority) +created_time : datetime(6) # When job was added to queue +scheduled_time : datetime(6) # Process on or after this time +reserved_time=null : datetime(6) # When job was reserved +completed_time=null : datetime(6) # When job completed +duration=null : float # Execution duration in seconds +error_message="" : varchar({ERROR_MESSAGE_LENGTH}) # Error message if failed +error_stack=null : mediumblob # Full error traceback +user="" : varchar(255) # Database user who reserved/completed job +host="" : varchar(255) # Hostname of worker +pid=0 : int unsigned # Process ID of worker +connection_id=0 : bigint unsigned # MySQL connection ID +version="" : varchar(255) # Code version +""" + return definition + + @property + def definition(self) -> str: + return self._definition + + @property + def table_name(self) -> str: + return self._table_name + + @property + def target(self) -> "AutoPopulate": + """The auto-populated table this jobs table is associated with.""" + return self._target + + def _ensure_declared(self) -> None: + """Ensure the jobs table is declared in the database.""" + if not self.is_declared: + self.declare() + + # --- Status filter properties --- + + @property + def pending(self) -> QueryExpression: + """Return query for pending jobs.""" + self._ensure_declared() + return self & 'status="pending"' + + @property + def reserved(self) -> QueryExpression: + """Return query for reserved jobs.""" + self._ensure_declared() + return self & 'status="reserved"' + + @property + def errors(self) -> QueryExpression: + """Return query for error jobs.""" + self._ensure_declared() + return self & 'status="error"' + + @property + def ignored(self) -> QueryExpression: + """Return query for ignored jobs.""" + self._ensure_declared() + return self & 'status="ignore"' + + @property + def completed(self) -> QueryExpression: + """Return query for completed (success) jobs.""" + self._ensure_declared() + return self & 'status="success"' + + # --- Core methods --- + + def delete(self) -> None: + """Delete jobs without confirmation (inherits from delete_quick).""" + self.delete_quick() + + def drop(self) -> None: + """Drop the jobs table without confirmation.""" + self.drop_quick() + + def refresh( + self, + *restrictions, + delay: float = 0, + priority: int = None, + stale_timeout: float = None, + ) -> dict: + """ + Refresh the jobs queue: add new jobs and remove stale ones. + + Operations performed: + 1. Add new jobs: (key_source & restrictions) - target - jobs → insert as 'pending' + 2. Remove stale jobs: pending jobs older than stale_timeout whose keys + are no longer in key_source + + Args: + restrictions: Conditions to filter key_source + delay: Seconds from now until jobs become available for processing. + Default: 0 (jobs are immediately available). + Uses database server time to avoid clock sync issues. + priority: Priority for new jobs (lower = more urgent). Default from config. + stale_timeout: Seconds after which pending jobs are checked for staleness. + Default from config. + + Returns: + {'added': int, 'removed': int} - counts of jobs added and stale jobs removed + """ + self._ensure_declared() + + if priority is None: + priority = config.jobs.default_priority + if stale_timeout is None: + stale_timeout = config.jobs.stale_timeout + + # Get FK-derived primary key attribute names + pk_attrs = [name for name, _ in self._get_fk_derived_primary_key()] + + # Step 1: Find new keys to add + # (key_source & restrictions) - target - jobs + key_source = self._target.key_source + if restrictions: + from .expression import AndList + key_source = key_source & AndList(restrictions) + + # Project to FK-derived attributes only + key_source_proj = key_source.proj(*pk_attrs) + target_proj = self._target.proj(*pk_attrs) + existing_jobs = self.proj(*pk_attrs) + + # Keys that need jobs: in key_source, not in target, not already in jobs + new_keys = (key_source_proj - target_proj - existing_jobs).fetch("KEY") + + # Insert new jobs + added = 0 + now = datetime.now() + for key in new_keys: + job = { + **key, + "status": "pending", + "priority": priority, + "created_time": now, + # Use SQL expression for scheduled_time to use server time + } + try: + # Use raw SQL to set scheduled_time using server time + self._insert_job_with_delay(key, priority, delay) + added += 1 + except DuplicateError: + # Job was added by another process + pass + + # Step 2: Remove stale pending jobs + # Find pending jobs older than stale_timeout whose keys are not in key_source + removed = 0 + if stale_timeout > 0: + stale_condition = ( + f'status="pending" AND ' + f'created_time < NOW() - INTERVAL {stale_timeout} SECOND' + ) + stale_jobs = (self & stale_condition).proj(*pk_attrs) + + # Check which stale jobs are no longer in key_source + orphaned_keys = (stale_jobs - key_source_proj).fetch("KEY") + for key in orphaned_keys: + (self & key).delete_quick() + removed += 1 + + return {"added": added, "removed": removed} + + def _insert_job_with_delay(self, key: dict, priority: int, delay: float) -> None: + """ + Insert a new job with scheduled_time set using database server time. + + Args: + key: Primary key dict for the job + priority: Job priority (lower = more urgent) + delay: Seconds from now until job becomes available + """ + # Build column names and values + pk_attrs = [name for name, _ in self._get_fk_derived_primary_key()] + columns = pk_attrs + [ + "status", "priority", "created_time", "scheduled_time", + "user", "host", "pid", "connection_id" + ] + + # Build values + pk_values = [f"'{key[attr]}'" if isinstance(key[attr], str) else str(key[attr]) + for attr in pk_attrs] + other_values = [ + "'pending'", + str(priority), + "NOW(6)", # created_time + f"NOW(6) + INTERVAL {delay} SECOND" if delay > 0 else "NOW(6)", # scheduled_time + f"'{self._user}'", + f"'{platform.node()}'", + str(os.getpid()), + str(self.connection.connection_id), + ] + + sql = f""" + INSERT INTO {self.full_table_name} + ({', '.join(f'`{c}`' for c in columns)}) + VALUES ({', '.join(pk_values + other_values)}) + """ + self.connection.query(sql) + + def reserve(self, key: dict) -> bool: + """ + Attempt to reserve a job for processing. + + Updates status to 'reserved' if currently 'pending' and scheduled_time <= now. + + Args: + key: Primary key dict for the job + + Returns: + True if reservation successful, False if job not found or not pending. + """ + self._ensure_declared() + + # Build WHERE clause for the key + pk_attrs = [name for name, _ in self._get_fk_derived_primary_key()] + key_conditions = " AND ".join( + f"`{attr}`='{key[attr]}'" if isinstance(key[attr], str) + else f"`{attr}`={key[attr]}" + for attr in pk_attrs + ) + + # Attempt atomic update: pending -> reserved + sql = f""" + UPDATE {self.full_table_name} + SET status='reserved', + reserved_time=NOW(6), + user='{self._user}', + host='{platform.node()}', + pid={os.getpid()}, + connection_id={self.connection.connection_id} + WHERE {key_conditions} + AND status='pending' + AND scheduled_time <= NOW(6) + """ + result = self.connection.query(sql) + return result.rowcount > 0 + + def complete(self, key: dict, duration: float = None, keep: bool = None) -> None: + """ + Mark a job as successfully completed. + + Args: + key: Primary key dict for the job + duration: Execution duration in seconds + keep: If True, mark as 'success'. If False, delete the job entry. + Default from config (jobs.keep_completed). + """ + self._ensure_declared() + + if keep is None: + keep = config.jobs.keep_completed + + pk_attrs = [name for name, _ in self._get_fk_derived_primary_key()] + job_key = {attr: key[attr] for attr in pk_attrs if attr in key} + + if keep: + # Update to success status + duration_sql = f", duration={duration}" if duration is not None else "" + key_conditions = " AND ".join( + f"`{attr}`='{job_key[attr]}'" if isinstance(job_key[attr], str) + else f"`{attr}`={job_key[attr]}" + for attr in pk_attrs + ) + sql = f""" + UPDATE {self.full_table_name} + SET status='success', + completed_time=NOW(6){duration_sql} + WHERE {key_conditions} + """ + self.connection.query(sql) + else: + # Delete the job entry + (self & job_key).delete_quick() + + def error(self, key: dict, error_message: str, error_stack: str = None) -> None: + """ + Mark a job as failed with error details. + + Args: + key: Primary key dict for the job + error_message: Error message string + error_stack: Full stack trace + """ + self._ensure_declared() + + # Truncate error message if necessary + if len(error_message) > ERROR_MESSAGE_LENGTH: + error_message = ( + error_message[: ERROR_MESSAGE_LENGTH - len(TRUNCATION_APPENDIX)] + + TRUNCATION_APPENDIX + ) + + pk_attrs = [name for name, _ in self._get_fk_derived_primary_key()] + job_key = {attr: key[attr] for attr in pk_attrs if attr in key} + + key_conditions = " AND ".join( + f"`{attr}`='{job_key[attr]}'" if isinstance(job_key[attr], str) + else f"`{attr}`={job_key[attr]}" + for attr in pk_attrs + ) + + # Escape error message for SQL + error_message_escaped = error_message.replace("'", "''").replace("\\", "\\\\") + + sql = f""" + UPDATE {self.full_table_name} + SET status='error', + completed_time=NOW(6), + error_message='{error_message_escaped}' + WHERE {key_conditions} + """ + self.connection.query(sql) + + # Update error_stack separately using parameterized query if provided + if error_stack is not None: + with config.override(enable_python_native_blobs=True): + (self & job_key)._update("error_stack", error_stack) + + def ignore(self, key: dict) -> None: + """ + Mark a key to be ignored (skipped during populate). + + Can be called on keys not yet in the jobs table. + + Args: + key: Primary key dict for the job + """ + self._ensure_declared() + + pk_attrs = [name for name, _ in self._get_fk_derived_primary_key()] + job_key = {attr: key[attr] for attr in pk_attrs if attr in key} + + # Check if job already exists + if job_key in self: + # Update existing job to ignore + key_conditions = " AND ".join( + f"`{attr}`='{job_key[attr]}'" if isinstance(job_key[attr], str) + else f"`{attr}`={job_key[attr]}" + for attr in pk_attrs + ) + sql = f""" + UPDATE {self.full_table_name} + SET status='ignore' + WHERE {key_conditions} + """ + self.connection.query(sql) + else: + # Insert new job with ignore status + self._insert_job_with_status(job_key, "ignore") + + def _insert_job_with_status(self, key: dict, status: str) -> None: + """Insert a new job with the given status.""" + pk_attrs = [name for name, _ in self._get_fk_derived_primary_key()] + columns = pk_attrs + [ + "status", "priority", "created_time", "scheduled_time", + "user", "host", "pid", "connection_id" + ] + + pk_values = [ + f"'{key[attr]}'" if isinstance(key[attr], str) else str(key[attr]) + for attr in pk_attrs + ] + other_values = [ + f"'{status}'", + str(DEFAULT_PRIORITY), + "NOW(6)", + "NOW(6)", + f"'{self._user}'", + f"'{platform.node()}'", + str(os.getpid()), + str(self.connection.connection_id), + ] + + sql = f""" + INSERT INTO {self.full_table_name} + ({', '.join(f'`{c}`' for c in columns)}) + VALUES ({', '.join(pk_values + other_values)}) + """ + self.connection.query(sql) + + def progress(self) -> dict: + """ + Report detailed progress of job processing. + + Returns: + Dict with counts for each status and total. + """ + self._ensure_declared() + + result = { + "pending": len(self.pending), + "reserved": len(self.reserved), + "success": len(self.completed), + "error": len(self.errors), + "ignore": len(self.ignored), + } + result["total"] = sum(result.values()) + return result + + def fetch_pending( + self, + limit: int = None, + priority: int = None, + ) -> list[dict]: + """ + Fetch pending jobs ordered by priority and scheduled time. + + Args: + limit: Maximum number of jobs to fetch + priority: Only fetch jobs at this priority or more urgent (lower values) + + Returns: + List of job key dicts + """ + self._ensure_declared() + + # Build query for non-stale pending jobs + query = self & 'status="pending" AND scheduled_time <= NOW(6)' + + if priority is not None: + query = query & f"priority <= {priority}" + + # Fetch with ordering + pk_attrs = [name for name, _ in self._get_fk_derived_primary_key()] + return query.fetch( + "KEY", + order_by=["priority ASC", "scheduled_time ASC"], + limit=limit, + ) diff --git a/src/datajoint/schemas.py b/src/datajoint/schemas.py index e9b83efff..b48c5310a 100644 --- a/src/datajoint/schemas.py +++ b/src/datajoint/schemas.py @@ -71,6 +71,7 @@ def __init__( self.create_schema = create_schema self.create_tables = create_tables self._jobs = None + self._auto_populated_tables = [] # Track auto-populated table classes self.external = ExternalMapping(self) self.add_objects = add_objects self.declare_list = [] @@ -227,6 +228,11 @@ def _decorate_table(self, table_class, context, assert_declared=False): else: instance.insert(contents, skip_duplicates=True) + # Track auto-populated tables for schema.jobs + if isinstance(instance, (Imported, Computed)) and not isinstance(instance, Part): + if table_class not in self._auto_populated_tables: + self._auto_populated_tables.append(table_class) + @property def log(self): self._assert_exists() @@ -338,9 +344,25 @@ def exists(self): @property def jobs(self): """ - schema.jobs provides a view of the job reservation table for the schema + Access job tables for all auto-populated tables in the schema. + + Returns a list of JobsTable objects, one for each Imported or Computed + table in the schema. + + :return: list of JobsTable objects + """ + self._assert_exists() + return [table_class().jobs for table_class in self._auto_populated_tables] + + @property + def legacy_jobs(self): + """ + Access the legacy schema-level job reservation table (~jobs). + + This is provided for backward compatibility and migration purposes. + New code should use per-table jobs via `MyTable.jobs` or `schema.jobs`. - :return: jobs table + :return: legacy JobTable """ self._assert_exists() if self._jobs is None: diff --git a/src/datajoint/settings.py b/src/datajoint/settings.py index 8e682691c..322aca099 100644 --- a/src/datajoint/settings.py +++ b/src/datajoint/settings.py @@ -188,6 +188,22 @@ class ExternalSettings(BaseSettings): aws_secret_access_key: SecretStr | None = Field(default=None, validation_alias="DJ_AWS_SECRET_ACCESS_KEY") +class JobsSettings(BaseSettings): + """Job queue settings for auto-populated tables.""" + + model_config = SettingsConfigDict( + env_prefix="DJ_JOBS_", + case_sensitive=False, + extra="forbid", + validate_assignment=True, + ) + + auto_refresh: bool = Field(default=True, description="Auto-refresh on populate") + keep_completed: bool = Field(default=False, description="Keep success records in jobs table") + stale_timeout: int = Field(default=3600, description="Seconds before pending job is considered stale") + default_priority: int = Field(default=5, description="Default priority for new jobs (lower = more urgent)") + + class ObjectStorageSettings(BaseSettings): """Object storage configuration for the object type.""" @@ -247,6 +263,7 @@ class Config(BaseSettings): connection: ConnectionSettings = Field(default_factory=ConnectionSettings) display: DisplaySettings = Field(default_factory=DisplaySettings) external: ExternalSettings = Field(default_factory=ExternalSettings) + jobs: JobsSettings = Field(default_factory=JobsSettings) object_storage: ObjectStorageSettings = Field(default_factory=ObjectStorageSettings) # Top-level settings From 53bd28de47f4a5ecf353a5e777f9e8fc8281e3ec Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 00:31:45 +0000 Subject: [PATCH 22/39] Drop jobs table when auto-populated table is dropped Override drop_quick() in Imported and Computed to also drop the associated jobs table when the main table is dropped. --- src/datajoint/user_tables.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/datajoint/user_tables.py b/src/datajoint/user_tables.py index d7faeb285..59065e7f1 100644 --- a/src/datajoint/user_tables.py +++ b/src/datajoint/user_tables.py @@ -152,6 +152,15 @@ class Imported(UserTable, AutoPopulate): _prefix = "_" tier_regexp = r"(?P" + _prefix + _base_regexp + ")" + def drop_quick(self): + """ + Drop the table and its associated jobs table. + """ + # Drop the jobs table first if it exists + if self._jobs_table is not None and self._jobs_table.is_declared: + self._jobs_table.drop_quick() + super().drop_quick() + class Computed(UserTable, AutoPopulate): """ @@ -162,6 +171,15 @@ class Computed(UserTable, AutoPopulate): _prefix = "__" tier_regexp = r"(?P" + _prefix + _base_regexp + ")" + def drop_quick(self): + """ + Drop the table and its associated jobs table. + """ + # Drop the jobs table first if it exists + if self._jobs_table is not None and self._jobs_table.is_declared: + self._jobs_table.drop_quick() + super().drop_quick() + class Part(UserTable): """ From 428c572591be6b5ea32903e4e1c1a3262242a6fb Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 00:33:17 +0000 Subject: [PATCH 23/39] Add tests for Autopopulate 2.0 jobs system Comprehensive test suite for the new per-table jobs system: - JobsTable structure and initialization - refresh() method with priority and delay - reserve() method and reservation conflicts - complete() method with keep option - error() method and message truncation - ignore() method - Status filter properties (pending, reserved, errors, ignored, completed) - progress() method - populate() with reserve_jobs=True - schema.jobs property - Configuration settings --- tests/test_jobs_v2.py | 404 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 404 insertions(+) create mode 100644 tests/test_jobs_v2.py diff --git a/tests/test_jobs_v2.py b/tests/test_jobs_v2.py new file mode 100644 index 000000000..915b9a677 --- /dev/null +++ b/tests/test_jobs_v2.py @@ -0,0 +1,404 @@ +""" +Tests for the Autopopulate 2.0 per-table jobs system. +""" + +import time +import random +import string + +import datajoint as dj +from datajoint.jobs_v2 import JobsTable, ERROR_MESSAGE_LENGTH, TRUNCATION_APPENDIX + +from . import schema + + +class TestJobsTableStructure: + """Tests for JobsTable structure and initialization.""" + + def test_jobs_property_exists(self, schema_any): + """Test that Computed tables have a jobs property.""" + assert hasattr(schema.SigIntTable, 'jobs') + jobs = schema.SigIntTable().jobs + assert isinstance(jobs, JobsTable) + + def test_jobs_table_name(self, schema_any): + """Test that jobs table has correct naming convention.""" + jobs = schema.SigIntTable().jobs + # SigIntTable is __sig_int_table, jobs should be ~sig_int_table__jobs + assert jobs.table_name.startswith('~') + assert jobs.table_name.endswith('__jobs') + + def test_jobs_table_primary_key(self, schema_any): + """Test that jobs table has FK-derived primary key.""" + jobs = schema.SigIntTable().jobs + # SigIntTable depends on SimpleSource with pk 'id' + assert 'id' in jobs.primary_key + + def test_jobs_table_status_column(self, schema_any): + """Test that jobs table has status column with correct enum values.""" + jobs = schema.SigIntTable().jobs + jobs._ensure_declared() + status_attr = jobs.heading.attributes['status'] + assert 'pending' in status_attr.type + assert 'reserved' in status_attr.type + assert 'success' in status_attr.type + assert 'error' in status_attr.type + assert 'ignore' in status_attr.type + + +class TestJobsRefresh: + """Tests for JobsTable.refresh() method.""" + + def test_refresh_adds_jobs(self, schema_any): + """Test that refresh() adds pending jobs for keys in key_source.""" + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() # Clear any existing jobs + + result = jobs.refresh() + assert result['added'] > 0 + assert len(jobs.pending) > 0 + + def test_refresh_with_priority(self, schema_any): + """Test that refresh() sets priority on new jobs.""" + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() + + jobs.refresh(priority=3) + priorities = jobs.pending.fetch('priority') + assert all(p == 3 for p in priorities) + + def test_refresh_with_delay(self, schema_any): + """Test that refresh() sets scheduled_time in the future.""" + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() + + jobs.refresh(delay=3600) # 1 hour delay + # Jobs should not be available for processing yet + keys = jobs.fetch_pending() + assert len(keys) == 0 # All jobs are scheduled for later + + def test_refresh_removes_stale_jobs(self, schema_any): + """Test that refresh() removes jobs for deleted upstream records.""" + # This test requires manipulating upstream data + pass # Skip for now + + +class TestJobsReserve: + """Tests for JobsTable.reserve() method.""" + + def test_reserve_pending_job(self, schema_any): + """Test that reserve() transitions pending -> reserved.""" + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() + jobs.refresh() + + # Get first pending job + key = jobs.pending.fetch('KEY', limit=1)[0] + assert jobs.reserve(key) + + # Verify status changed + status = (jobs & key).fetch1('status') + assert status == 'reserved' + + def test_reserve_already_reserved(self, schema_any): + """Test that reserve() returns False for already reserved job.""" + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() + jobs.refresh() + + key = jobs.pending.fetch('KEY', limit=1)[0] + assert jobs.reserve(key) + assert not jobs.reserve(key) # Second reserve should fail + + def test_reserve_scheduled_future(self, schema_any): + """Test that reserve() fails for jobs scheduled in the future.""" + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() + jobs.refresh(delay=3600) # 1 hour delay + + key = jobs.fetch('KEY', limit=1)[0] + assert not jobs.reserve(key) # Should fail - not yet scheduled + + +class TestJobsComplete: + """Tests for JobsTable.complete() method.""" + + def test_complete_with_keep_false(self, schema_any): + """Test that complete() deletes job when keep=False.""" + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() + jobs.refresh() + + key = jobs.pending.fetch('KEY', limit=1)[0] + jobs.reserve(key) + jobs.complete(key, duration=1.5, keep=False) + + assert key not in jobs + + def test_complete_with_keep_true(self, schema_any): + """Test that complete() marks job as success when keep=True.""" + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() + jobs.refresh() + + key = jobs.pending.fetch('KEY', limit=1)[0] + jobs.reserve(key) + jobs.complete(key, duration=1.5, keep=True) + + status = (jobs & key).fetch1('status') + assert status == 'success' + + +class TestJobsError: + """Tests for JobsTable.error() method.""" + + def test_error_marks_status(self, schema_any): + """Test that error() marks job as error with message.""" + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() + jobs.refresh() + + key = jobs.pending.fetch('KEY', limit=1)[0] + jobs.reserve(key) + jobs.error(key, error_message="Test error", error_stack="stack trace") + + status, msg = (jobs & key).fetch1('status', 'error_message') + assert status == 'error' + assert msg == "Test error" + + def test_error_truncates_long_message(self, schema_any): + """Test that error() truncates long error messages.""" + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() + jobs.refresh() + + long_message = ''.join(random.choice(string.ascii_letters) + for _ in range(ERROR_MESSAGE_LENGTH + 100)) + + key = jobs.pending.fetch('KEY', limit=1)[0] + jobs.reserve(key) + jobs.error(key, error_message=long_message) + + msg = (jobs & key).fetch1('error_message') + assert len(msg) == ERROR_MESSAGE_LENGTH + assert msg.endswith(TRUNCATION_APPENDIX) + + +class TestJobsIgnore: + """Tests for JobsTable.ignore() method.""" + + def test_ignore_marks_status(self, schema_any): + """Test that ignore() marks job as ignore.""" + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() + jobs.refresh() + + key = jobs.pending.fetch('KEY', limit=1)[0] + jobs.ignore(key) + + status = (jobs & key).fetch1('status') + assert status == 'ignore' + + def test_ignore_new_key(self, schema_any): + """Test that ignore() can create new job with ignore status.""" + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() + + # Don't refresh - ignore a key directly + key = {'id': 1} + jobs.ignore(key) + + status = (jobs & key).fetch1('status') + assert status == 'ignore' + + +class TestJobsStatusProperties: + """Tests for status filter properties.""" + + def test_pending_property(self, schema_any): + """Test that pending property returns pending jobs.""" + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() + jobs.refresh() + + assert len(jobs.pending) > 0 + statuses = jobs.pending.fetch('status') + assert all(s == 'pending' for s in statuses) + + def test_reserved_property(self, schema_any): + """Test that reserved property returns reserved jobs.""" + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() + jobs.refresh() + + key = jobs.pending.fetch('KEY', limit=1)[0] + jobs.reserve(key) + + assert len(jobs.reserved) == 1 + statuses = jobs.reserved.fetch('status') + assert all(s == 'reserved' for s in statuses) + + def test_errors_property(self, schema_any): + """Test that errors property returns error jobs.""" + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() + jobs.refresh() + + key = jobs.pending.fetch('KEY', limit=1)[0] + jobs.reserve(key) + jobs.error(key, error_message="test") + + assert len(jobs.errors) == 1 + + def test_ignored_property(self, schema_any): + """Test that ignored property returns ignored jobs.""" + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() + jobs.refresh() + + key = jobs.pending.fetch('KEY', limit=1)[0] + jobs.ignore(key) + + assert len(jobs.ignored) == 1 + + +class TestJobsProgress: + """Tests for JobsTable.progress() method.""" + + def test_progress_returns_counts(self, schema_any): + """Test that progress() returns status counts.""" + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() + jobs.refresh() + + progress = jobs.progress() + + assert 'pending' in progress + assert 'reserved' in progress + assert 'success' in progress + assert 'error' in progress + assert 'ignore' in progress + assert 'total' in progress + assert progress['total'] == sum( + progress[k] for k in ['pending', 'reserved', 'success', 'error', 'ignore'] + ) + + +class TestPopulateWithJobs: + """Tests for populate() with reserve_jobs=True using new system.""" + + def test_populate_creates_jobs_table(self, schema_any): + """Test that populate with reserve_jobs creates jobs table.""" + table = schema.SigIntTable() + # Clear target table to allow re-population + table.delete() + + # First populate should create jobs table + table.populate(reserve_jobs=True, suppress_errors=True, max_calls=1) + + assert table.jobs.is_declared + + def test_populate_uses_jobs_queue(self, schema_any): + """Test that populate processes jobs from queue.""" + table = schema.Experiment() + table.delete() + jobs = table.jobs + jobs.delete() + + # Refresh to add jobs + jobs.refresh() + initial_pending = len(jobs.pending) + assert initial_pending > 0 + + # Populate one job + result = table.populate(reserve_jobs=True, max_calls=1) + assert result['success_count'] >= 0 # May be 0 if error + + def test_populate_with_priority_filter(self, schema_any): + """Test that populate respects priority filter.""" + table = schema.Experiment() + table.delete() + jobs = table.jobs + jobs.delete() + + # Add jobs with different priorities + # This would require the table to have multiple keys + pass # Skip for now + + +class TestSchemaJobs: + """Tests for schema.jobs property.""" + + def test_schema_jobs_returns_list(self, schema_any): + """Test that schema.jobs returns list of JobsTable objects.""" + jobs_list = schema_any.jobs + assert isinstance(jobs_list, list) + + def test_schema_jobs_contains_jobs_tables(self, schema_any): + """Test that schema.jobs contains JobsTable instances.""" + jobs_list = schema_any.jobs + for jobs in jobs_list: + assert isinstance(jobs, JobsTable) + + +class TestTableDropLifecycle: + """Tests for table drop lifecycle.""" + + def test_drop_removes_jobs_table(self, schema_any): + """Test that dropping a table also drops its jobs table.""" + # Create a temporary computed table for this test + # This test would modify the schema, so skip for now + pass + + +class TestConfiguration: + """Tests for jobs configuration settings.""" + + def test_default_priority_config(self, schema_any): + """Test that config.jobs.default_priority is used.""" + original = dj.config.jobs.default_priority + try: + dj.config.jobs.default_priority = 3 + + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() + jobs.refresh() # Should use default priority from config + + priorities = jobs.pending.fetch('priority') + assert all(p == 3 for p in priorities) + finally: + dj.config.jobs.default_priority = original + + def test_keep_completed_config(self, schema_any): + """Test that config.jobs.keep_completed affects complete().""" + # Test with keep_completed=True + with dj.config.override(jobs__keep_completed=True): + table = schema.SigIntTable() + jobs = table.jobs + jobs.delete() + jobs.refresh() + + key = jobs.pending.fetch('KEY', limit=1)[0] + jobs.reserve(key) + jobs.complete(key) # Should use config + + status = (jobs & key).fetch1('status') + assert status == 'success' From e89e064a45d2962d559f7e5f9dd5faaadc9240fb Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 00:38:17 +0000 Subject: [PATCH 24/39] Fix ruff linting errors and reformat - Remove unused `job` dict and `now` variable in refresh() - Remove unused `pk_attrs` in fetch_pending() - Remove unused datetime import - Apply ruff-format formatting changes --- src/datajoint/autopopulate.py | 3 +- src/datajoint/jobs_v2.py | 55 ++++------------- tests/test_jobs_v2.py | 108 ++++++++++++++++------------------ 3 files changed, 66 insertions(+), 100 deletions(-) diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py index 84446840f..b964e51d4 100644 --- a/src/datajoint/autopopulate.py +++ b/src/datajoint/autopopulate.py @@ -14,7 +14,6 @@ from .errors import DataJointError, LostConnectionError from .expression import AndList, QueryExpression -from .hash import key_hash # noinspection PyExceptionInherit,PyCallingNonCallable @@ -173,6 +172,7 @@ def jobs(self): """ if self._jobs_table is None: from .jobs_v2 import JobsTable + self._jobs_table = JobsTable(self.target) return self._jobs_table @@ -443,6 +443,7 @@ def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_ # Only log errors from inside make() - not collision errors if jobs is not None: from .errors import DuplicateError + if isinstance(error, DuplicateError): # Collision error - job reverts to pending or gets deleted # This is not a real error, just coordination artifact diff --git a/src/datajoint/jobs_v2.py b/src/datajoint/jobs_v2.py index ea5700b95..9bccd3e40 100644 --- a/src/datajoint/jobs_v2.py +++ b/src/datajoint/jobs_v2.py @@ -9,8 +9,7 @@ import logging import os import platform -from datetime import datetime -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING from .errors import DataJointError, DuplicateError from .expression import QueryExpression @@ -259,6 +258,7 @@ def refresh( key_source = self._target.key_source if restrictions: from .expression import AndList + key_source = key_source & AndList(restrictions) # Project to FK-derived attributes only @@ -271,17 +271,8 @@ def refresh( # Insert new jobs added = 0 - now = datetime.now() for key in new_keys: - job = { - **key, - "status": "pending", - "priority": priority, - "created_time": now, - # Use SQL expression for scheduled_time to use server time - } try: - # Use raw SQL to set scheduled_time using server time self._insert_job_with_delay(key, priority, delay) added += 1 except DuplicateError: @@ -292,10 +283,7 @@ def refresh( # Find pending jobs older than stale_timeout whose keys are not in key_source removed = 0 if stale_timeout > 0: - stale_condition = ( - f'status="pending" AND ' - f'created_time < NOW() - INTERVAL {stale_timeout} SECOND' - ) + stale_condition = f'status="pending" AND ' f"created_time < NOW() - INTERVAL {stale_timeout} SECOND" stale_jobs = (self & stale_condition).proj(*pk_attrs) # Check which stale jobs are no longer in key_source @@ -317,14 +305,10 @@ def _insert_job_with_delay(self, key: dict, priority: int, delay: float) -> None """ # Build column names and values pk_attrs = [name for name, _ in self._get_fk_derived_primary_key()] - columns = pk_attrs + [ - "status", "priority", "created_time", "scheduled_time", - "user", "host", "pid", "connection_id" - ] + columns = pk_attrs + ["status", "priority", "created_time", "scheduled_time", "user", "host", "pid", "connection_id"] # Build values - pk_values = [f"'{key[attr]}'" if isinstance(key[attr], str) else str(key[attr]) - for attr in pk_attrs] + pk_values = [f"'{key[attr]}'" if isinstance(key[attr], str) else str(key[attr]) for attr in pk_attrs] other_values = [ "'pending'", str(priority), @@ -360,9 +344,7 @@ def reserve(self, key: dict) -> bool: # Build WHERE clause for the key pk_attrs = [name for name, _ in self._get_fk_derived_primary_key()] key_conditions = " AND ".join( - f"`{attr}`='{key[attr]}'" if isinstance(key[attr], str) - else f"`{attr}`={key[attr]}" - for attr in pk_attrs + f"`{attr}`='{key[attr]}'" if isinstance(key[attr], str) else f"`{attr}`={key[attr]}" for attr in pk_attrs ) # Attempt atomic update: pending -> reserved @@ -403,8 +385,7 @@ def complete(self, key: dict, duration: float = None, keep: bool = None) -> None # Update to success status duration_sql = f", duration={duration}" if duration is not None else "" key_conditions = " AND ".join( - f"`{attr}`='{job_key[attr]}'" if isinstance(job_key[attr], str) - else f"`{attr}`={job_key[attr]}" + f"`{attr}`='{job_key[attr]}'" if isinstance(job_key[attr], str) else f"`{attr}`={job_key[attr]}" for attr in pk_attrs ) sql = f""" @@ -431,17 +412,13 @@ def error(self, key: dict, error_message: str, error_stack: str = None) -> None: # Truncate error message if necessary if len(error_message) > ERROR_MESSAGE_LENGTH: - error_message = ( - error_message[: ERROR_MESSAGE_LENGTH - len(TRUNCATION_APPENDIX)] - + TRUNCATION_APPENDIX - ) + error_message = error_message[: ERROR_MESSAGE_LENGTH - len(TRUNCATION_APPENDIX)] + TRUNCATION_APPENDIX pk_attrs = [name for name, _ in self._get_fk_derived_primary_key()] job_key = {attr: key[attr] for attr in pk_attrs if attr in key} key_conditions = " AND ".join( - f"`{attr}`='{job_key[attr]}'" if isinstance(job_key[attr], str) - else f"`{attr}`={job_key[attr]}" + f"`{attr}`='{job_key[attr]}'" if isinstance(job_key[attr], str) else f"`{attr}`={job_key[attr]}" for attr in pk_attrs ) @@ -480,8 +457,7 @@ def ignore(self, key: dict) -> None: if job_key in self: # Update existing job to ignore key_conditions = " AND ".join( - f"`{attr}`='{job_key[attr]}'" if isinstance(job_key[attr], str) - else f"`{attr}`={job_key[attr]}" + f"`{attr}`='{job_key[attr]}'" if isinstance(job_key[attr], str) else f"`{attr}`={job_key[attr]}" for attr in pk_attrs ) sql = f""" @@ -497,15 +473,9 @@ def ignore(self, key: dict) -> None: def _insert_job_with_status(self, key: dict, status: str) -> None: """Insert a new job with the given status.""" pk_attrs = [name for name, _ in self._get_fk_derived_primary_key()] - columns = pk_attrs + [ - "status", "priority", "created_time", "scheduled_time", - "user", "host", "pid", "connection_id" - ] + columns = pk_attrs + ["status", "priority", "created_time", "scheduled_time", "user", "host", "pid", "connection_id"] - pk_values = [ - f"'{key[attr]}'" if isinstance(key[attr], str) else str(key[attr]) - for attr in pk_attrs - ] + pk_values = [f"'{key[attr]}'" if isinstance(key[attr], str) else str(key[attr]) for attr in pk_attrs] other_values = [ f"'{status}'", str(DEFAULT_PRIORITY), @@ -567,7 +537,6 @@ def fetch_pending( query = query & f"priority <= {priority}" # Fetch with ordering - pk_attrs = [name for name, _ in self._get_fk_derived_primary_key()] return query.fetch( "KEY", order_by=["priority ASC", "scheduled_time ASC"], diff --git a/tests/test_jobs_v2.py b/tests/test_jobs_v2.py index 915b9a677..1c4f2acc1 100644 --- a/tests/test_jobs_v2.py +++ b/tests/test_jobs_v2.py @@ -2,7 +2,6 @@ Tests for the Autopopulate 2.0 per-table jobs system. """ -import time import random import string @@ -17,7 +16,7 @@ class TestJobsTableStructure: def test_jobs_property_exists(self, schema_any): """Test that Computed tables have a jobs property.""" - assert hasattr(schema.SigIntTable, 'jobs') + assert hasattr(schema.SigIntTable, "jobs") jobs = schema.SigIntTable().jobs assert isinstance(jobs, JobsTable) @@ -25,25 +24,25 @@ def test_jobs_table_name(self, schema_any): """Test that jobs table has correct naming convention.""" jobs = schema.SigIntTable().jobs # SigIntTable is __sig_int_table, jobs should be ~sig_int_table__jobs - assert jobs.table_name.startswith('~') - assert jobs.table_name.endswith('__jobs') + assert jobs.table_name.startswith("~") + assert jobs.table_name.endswith("__jobs") def test_jobs_table_primary_key(self, schema_any): """Test that jobs table has FK-derived primary key.""" jobs = schema.SigIntTable().jobs # SigIntTable depends on SimpleSource with pk 'id' - assert 'id' in jobs.primary_key + assert "id" in jobs.primary_key def test_jobs_table_status_column(self, schema_any): """Test that jobs table has status column with correct enum values.""" jobs = schema.SigIntTable().jobs jobs._ensure_declared() - status_attr = jobs.heading.attributes['status'] - assert 'pending' in status_attr.type - assert 'reserved' in status_attr.type - assert 'success' in status_attr.type - assert 'error' in status_attr.type - assert 'ignore' in status_attr.type + status_attr = jobs.heading.attributes["status"] + assert "pending" in status_attr.type + assert "reserved" in status_attr.type + assert "success" in status_attr.type + assert "error" in status_attr.type + assert "ignore" in status_attr.type class TestJobsRefresh: @@ -56,7 +55,7 @@ def test_refresh_adds_jobs(self, schema_any): jobs.delete() # Clear any existing jobs result = jobs.refresh() - assert result['added'] > 0 + assert result["added"] > 0 assert len(jobs.pending) > 0 def test_refresh_with_priority(self, schema_any): @@ -66,7 +65,7 @@ def test_refresh_with_priority(self, schema_any): jobs.delete() jobs.refresh(priority=3) - priorities = jobs.pending.fetch('priority') + priorities = jobs.pending.fetch("priority") assert all(p == 3 for p in priorities) def test_refresh_with_delay(self, schema_any): @@ -97,12 +96,12 @@ def test_reserve_pending_job(self, schema_any): jobs.refresh() # Get first pending job - key = jobs.pending.fetch('KEY', limit=1)[0] + key = jobs.pending.fetch("KEY", limit=1)[0] assert jobs.reserve(key) # Verify status changed - status = (jobs & key).fetch1('status') - assert status == 'reserved' + status = (jobs & key).fetch1("status") + assert status == "reserved" def test_reserve_already_reserved(self, schema_any): """Test that reserve() returns False for already reserved job.""" @@ -111,7 +110,7 @@ def test_reserve_already_reserved(self, schema_any): jobs.delete() jobs.refresh() - key = jobs.pending.fetch('KEY', limit=1)[0] + key = jobs.pending.fetch("KEY", limit=1)[0] assert jobs.reserve(key) assert not jobs.reserve(key) # Second reserve should fail @@ -122,7 +121,7 @@ def test_reserve_scheduled_future(self, schema_any): jobs.delete() jobs.refresh(delay=3600) # 1 hour delay - key = jobs.fetch('KEY', limit=1)[0] + key = jobs.fetch("KEY", limit=1)[0] assert not jobs.reserve(key) # Should fail - not yet scheduled @@ -136,7 +135,7 @@ def test_complete_with_keep_false(self, schema_any): jobs.delete() jobs.refresh() - key = jobs.pending.fetch('KEY', limit=1)[0] + key = jobs.pending.fetch("KEY", limit=1)[0] jobs.reserve(key) jobs.complete(key, duration=1.5, keep=False) @@ -149,12 +148,12 @@ def test_complete_with_keep_true(self, schema_any): jobs.delete() jobs.refresh() - key = jobs.pending.fetch('KEY', limit=1)[0] + key = jobs.pending.fetch("KEY", limit=1)[0] jobs.reserve(key) jobs.complete(key, duration=1.5, keep=True) - status = (jobs & key).fetch1('status') - assert status == 'success' + status = (jobs & key).fetch1("status") + assert status == "success" class TestJobsError: @@ -167,12 +166,12 @@ def test_error_marks_status(self, schema_any): jobs.delete() jobs.refresh() - key = jobs.pending.fetch('KEY', limit=1)[0] + key = jobs.pending.fetch("KEY", limit=1)[0] jobs.reserve(key) jobs.error(key, error_message="Test error", error_stack="stack trace") - status, msg = (jobs & key).fetch1('status', 'error_message') - assert status == 'error' + status, msg = (jobs & key).fetch1("status", "error_message") + assert status == "error" assert msg == "Test error" def test_error_truncates_long_message(self, schema_any): @@ -182,14 +181,13 @@ def test_error_truncates_long_message(self, schema_any): jobs.delete() jobs.refresh() - long_message = ''.join(random.choice(string.ascii_letters) - for _ in range(ERROR_MESSAGE_LENGTH + 100)) + long_message = "".join(random.choice(string.ascii_letters) for _ in range(ERROR_MESSAGE_LENGTH + 100)) - key = jobs.pending.fetch('KEY', limit=1)[0] + key = jobs.pending.fetch("KEY", limit=1)[0] jobs.reserve(key) jobs.error(key, error_message=long_message) - msg = (jobs & key).fetch1('error_message') + msg = (jobs & key).fetch1("error_message") assert len(msg) == ERROR_MESSAGE_LENGTH assert msg.endswith(TRUNCATION_APPENDIX) @@ -204,11 +202,11 @@ def test_ignore_marks_status(self, schema_any): jobs.delete() jobs.refresh() - key = jobs.pending.fetch('KEY', limit=1)[0] + key = jobs.pending.fetch("KEY", limit=1)[0] jobs.ignore(key) - status = (jobs & key).fetch1('status') - assert status == 'ignore' + status = (jobs & key).fetch1("status") + assert status == "ignore" def test_ignore_new_key(self, schema_any): """Test that ignore() can create new job with ignore status.""" @@ -217,11 +215,11 @@ def test_ignore_new_key(self, schema_any): jobs.delete() # Don't refresh - ignore a key directly - key = {'id': 1} + key = {"id": 1} jobs.ignore(key) - status = (jobs & key).fetch1('status') - assert status == 'ignore' + status = (jobs & key).fetch1("status") + assert status == "ignore" class TestJobsStatusProperties: @@ -235,8 +233,8 @@ def test_pending_property(self, schema_any): jobs.refresh() assert len(jobs.pending) > 0 - statuses = jobs.pending.fetch('status') - assert all(s == 'pending' for s in statuses) + statuses = jobs.pending.fetch("status") + assert all(s == "pending" for s in statuses) def test_reserved_property(self, schema_any): """Test that reserved property returns reserved jobs.""" @@ -245,12 +243,12 @@ def test_reserved_property(self, schema_any): jobs.delete() jobs.refresh() - key = jobs.pending.fetch('KEY', limit=1)[0] + key = jobs.pending.fetch("KEY", limit=1)[0] jobs.reserve(key) assert len(jobs.reserved) == 1 - statuses = jobs.reserved.fetch('status') - assert all(s == 'reserved' for s in statuses) + statuses = jobs.reserved.fetch("status") + assert all(s == "reserved" for s in statuses) def test_errors_property(self, schema_any): """Test that errors property returns error jobs.""" @@ -259,7 +257,7 @@ def test_errors_property(self, schema_any): jobs.delete() jobs.refresh() - key = jobs.pending.fetch('KEY', limit=1)[0] + key = jobs.pending.fetch("KEY", limit=1)[0] jobs.reserve(key) jobs.error(key, error_message="test") @@ -272,7 +270,7 @@ def test_ignored_property(self, schema_any): jobs.delete() jobs.refresh() - key = jobs.pending.fetch('KEY', limit=1)[0] + key = jobs.pending.fetch("KEY", limit=1)[0] jobs.ignore(key) assert len(jobs.ignored) == 1 @@ -290,15 +288,13 @@ def test_progress_returns_counts(self, schema_any): progress = jobs.progress() - assert 'pending' in progress - assert 'reserved' in progress - assert 'success' in progress - assert 'error' in progress - assert 'ignore' in progress - assert 'total' in progress - assert progress['total'] == sum( - progress[k] for k in ['pending', 'reserved', 'success', 'error', 'ignore'] - ) + assert "pending" in progress + assert "reserved" in progress + assert "success" in progress + assert "error" in progress + assert "ignore" in progress + assert "total" in progress + assert progress["total"] == sum(progress[k] for k in ["pending", "reserved", "success", "error", "ignore"]) class TestPopulateWithJobs: @@ -329,7 +325,7 @@ def test_populate_uses_jobs_queue(self, schema_any): # Populate one job result = table.populate(reserve_jobs=True, max_calls=1) - assert result['success_count'] >= 0 # May be 0 if error + assert result["success_count"] >= 0 # May be 0 if error def test_populate_with_priority_filter(self, schema_any): """Test that populate respects priority filter.""" @@ -382,7 +378,7 @@ def test_default_priority_config(self, schema_any): jobs.delete() jobs.refresh() # Should use default priority from config - priorities = jobs.pending.fetch('priority') + priorities = jobs.pending.fetch("priority") assert all(p == 3 for p in priorities) finally: dj.config.jobs.default_priority = original @@ -396,9 +392,9 @@ def test_keep_completed_config(self, schema_any): jobs.delete() jobs.refresh() - key = jobs.pending.fetch('KEY', limit=1)[0] + key = jobs.pending.fetch("KEY", limit=1)[0] jobs.reserve(key) jobs.complete(key) # Should use config - status = (jobs & key).fetch1('status') - assert status == 'success' + status = (jobs & key).fetch1("status") + assert status == "success" From 0f98b180474fc91417c682f78702bff871c1713e Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 00:45:23 +0000 Subject: [PATCH 25/39] Remove legacy schema-wide jobs system Replace schema-wide `~jobs` table with per-table JobsTable (Autopopulate 2.0): - Delete src/datajoint/jobs.py (old JobTable class) - Remove legacy_jobs property from Schema class - Delete tests/test_jobs.py (old schema-wide tests) - Remove clean_jobs fixture and schema.jobs.delete() cleanup calls - Update test_autopopulate.py to use new per-table jobs API The new system provides per-table job queues with FK-derived primary keys, rich status tracking (pending/reserved/success/error/ignore), priority scheduling, and proper handling of job collisions. --- src/datajoint/jobs.py | 154 ------------------------------------- src/datajoint/schemas.py | 17 ---- tests/conftest.py | 32 -------- tests/test_autopopulate.py | 11 ++- tests/test_jobs.py | 130 ------------------------------- 5 files changed, 8 insertions(+), 336 deletions(-) delete mode 100644 src/datajoint/jobs.py delete mode 100644 tests/test_jobs.py diff --git a/src/datajoint/jobs.py b/src/datajoint/jobs.py deleted file mode 100644 index ff6440495..000000000 --- a/src/datajoint/jobs.py +++ /dev/null @@ -1,154 +0,0 @@ -import os -import platform - -from .errors import DuplicateError -from .hash import key_hash -from .heading import Heading -from .settings import config -from .table import Table - -ERROR_MESSAGE_LENGTH = 2047 -TRUNCATION_APPENDIX = "...truncated" - - -class JobTable(Table): - """ - A base table with no definition. Allows reserving jobs - """ - - def __init__(self, conn, database): - self.database = database - self._connection = conn - self._heading = Heading(table_info=dict(conn=conn, database=database, table_name=self.table_name, context=None)) - self._support = [self.full_table_name] - - self._definition = """ # job reservation table for `{database}` - table_name :varchar(255) # className of the table - key_hash :char(32) # key hash - --- - status :enum('reserved','error','ignore') # if tuple is missing, the job is available - key=null :blob # structure containing the key - error_message="" :varchar({error_message_length}) # error message returned if failed - error_stack=null :mediumblob # error stack if failed - user="" :varchar(255) # database user - host="" :varchar(255) # system hostname - pid=0 :int unsigned # system process id - connection_id = 0 : bigint unsigned # connection_id() - timestamp=CURRENT_TIMESTAMP :timestamp # automatic timestamp - """.format(database=database, error_message_length=ERROR_MESSAGE_LENGTH) - if not self.is_declared: - self.declare() - self._user = self.connection.get_user() - - @property - def definition(self): - return self._definition - - @property - def table_name(self): - return "~jobs" - - def delete(self): - """bypass interactive prompts and dependencies""" - self.delete_quick() - - def drop(self): - """bypass interactive prompts and dependencies""" - self.drop_quick() - - def reserve(self, table_name, key): - """ - Reserve a job for computation. When a job is reserved, the job table contains an entry for the - job key, identified by its hash. When jobs are completed, the entry is removed. - - :param table_name: `database`.`table_name` - :param key: the dict of the job's primary key - :return: True if reserved job successfully. False = the jobs is already taken - """ - job = dict( - table_name=table_name, - key_hash=key_hash(key), - status="reserved", - host=platform.node(), - pid=os.getpid(), - connection_id=self.connection.connection_id, - key=key, - user=self._user, - ) - try: - with config.override(enable_python_native_blobs=True): - self.insert1(job, ignore_extra_fields=True) - except DuplicateError: - return False - return True - - def ignore(self, table_name, key): - """ - Set a job to be ignored for computation. When a job is ignored, the job table contains an entry for the - job key, identified by its hash, with status "ignore". - - Args: - table_name: - Table name (str) - `database`.`table_name` - key: - The dict of the job's primary key - - Returns: - True if ignore job successfully. False = the jobs is already taken - """ - job = dict( - table_name=table_name, - key_hash=key_hash(key), - status="ignore", - host=platform.node(), - pid=os.getpid(), - connection_id=self.connection.connection_id, - key=key, - user=self._user, - ) - try: - with config.override(enable_python_native_blobs=True): - self.insert1(job, ignore_extra_fields=True) - except DuplicateError: - return False - return True - - def complete(self, table_name, key): - """ - Log a completed job. When a job is completed, its reservation entry is deleted. - - :param table_name: `database`.`table_name` - :param key: the dict of the job's primary key - """ - job_key = dict(table_name=table_name, key_hash=key_hash(key)) - (self & job_key).delete_quick() - - def error(self, table_name, key, error_message, error_stack=None): - """ - Log an error message. The job reservation is replaced with an error entry. - if an error occurs, leave an entry describing the problem - - :param table_name: `database`.`table_name` - :param key: the dict of the job's primary key - :param error_message: string error message - :param error_stack: stack trace - """ - if len(error_message) > ERROR_MESSAGE_LENGTH: - error_message = error_message[: ERROR_MESSAGE_LENGTH - len(TRUNCATION_APPENDIX)] + TRUNCATION_APPENDIX - with config.override(enable_python_native_blobs=True): - self.insert1( - dict( - table_name=table_name, - key_hash=key_hash(key), - status="error", - host=platform.node(), - pid=os.getpid(), - connection_id=self.connection.connection_id, - user=self._user, - key=key, - error_message=error_message, - error_stack=error_stack, - ), - replace=True, - ignore_extra_fields=True, - ) diff --git a/src/datajoint/schemas.py b/src/datajoint/schemas.py index b48c5310a..9df3ba34d 100644 --- a/src/datajoint/schemas.py +++ b/src/datajoint/schemas.py @@ -10,7 +10,6 @@ from .errors import AccessError, DataJointError from .external import ExternalMapping from .heading import Heading -from .jobs import JobTable from .settings import config from .table import FreeTable, Log, lookup_class_name from .user_tables import Computed, Imported, Lookup, Manual, Part, _get_tier @@ -70,7 +69,6 @@ def __init__( self.context = context self.create_schema = create_schema self.create_tables = create_tables - self._jobs = None self._auto_populated_tables = [] # Track auto-populated table classes self.external = ExternalMapping(self) self.add_objects = add_objects @@ -354,21 +352,6 @@ def jobs(self): self._assert_exists() return [table_class().jobs for table_class in self._auto_populated_tables] - @property - def legacy_jobs(self): - """ - Access the legacy schema-level job reservation table (~jobs). - - This is provided for backward compatibility and migration purposes. - New code should use per-table jobs via `MyTable.jobs` or `schema.jobs`. - - :return: legacy JobTable - """ - self._assert_exists() - if self._jobs is None: - self._jobs = JobTable(self.connection, self.database) - return self._jobs - @property def code(self): self._assert_exists() diff --git a/tests/conftest.py b/tests/conftest.py index d90bfc867..23222f43a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -17,7 +17,6 @@ import datajoint as dj from datajoint.errors import ( FILEPATH_FEATURE_SWITCH, - DataJointError, ) from . import schema, schema_advanced, schema_external, schema_object, schema_simple @@ -55,21 +54,6 @@ def clean_autopopulate(experiment, trial, ephys): experiment.delete() -@pytest.fixture -def clean_jobs(schema_any): - """ - Explicit cleanup fixture for jobs tests. - - Cleans jobs table before test runs. - Tests must explicitly request this fixture to get cleanup. - """ - try: - schema_any.jobs.delete() - except DataJointError: - pass - yield - - @pytest.fixture def clean_test_tables(test, test_extra, test_no_extra): """ @@ -569,10 +553,6 @@ def mock_cache(tmpdir_factory): def schema_any(connection_test, prefix): schema_any = dj.Schema(prefix + "_test1", schema.LOCALS_ANY, connection=connection_test) assert schema.LOCALS_ANY, "LOCALS_ANY is empty" - try: - schema_any.jobs.delete() - except DataJointError: - pass schema_any(schema.TTest) schema_any(schema.TTest2) schema_any(schema.TTest3) @@ -612,10 +592,6 @@ def schema_any(connection_test, prefix): schema_any(schema.Stimulus) schema_any(schema.Longblob) yield schema_any - try: - schema_any.jobs.delete() - except DataJointError: - pass schema_any.drop() @@ -624,10 +600,6 @@ def schema_any_fresh(connection_test, prefix): """Function-scoped schema_any for tests that need fresh schema state.""" schema_any = dj.Schema(prefix + "_test1_fresh", schema.LOCALS_ANY, connection=connection_test) assert schema.LOCALS_ANY, "LOCALS_ANY is empty" - try: - schema_any.jobs.delete() - except DataJointError: - pass schema_any(schema.TTest) schema_any(schema.TTest2) schema_any(schema.TTest3) @@ -667,10 +639,6 @@ def schema_any_fresh(connection_test, prefix): schema_any(schema.Stimulus) schema_any(schema.Longblob) yield schema_any - try: - schema_any.jobs.delete() - except DataJointError: - pass schema_any.drop() diff --git a/tests/test_autopopulate.py b/tests/test_autopopulate.py index b22b252ee..1f1d33a84 100644 --- a/tests/test_autopopulate.py +++ b/tests/test_autopopulate.py @@ -61,17 +61,22 @@ def test_populate_key_list(clean_autopopulate, subject, experiment, trial): assert n == ret["success_count"] -def test_populate_exclude_error_and_ignore_jobs(clean_autopopulate, schema_any, subject, experiment): +def test_populate_exclude_error_and_ignore_jobs(clean_autopopulate, subject, experiment): # test simple populate assert subject, "root tables are empty" assert not experiment, "table already filled?" + # Ensure jobs table is set up by refreshing + jobs = experiment.jobs + jobs.refresh() + keys = experiment.key_source.fetch("KEY", limit=2) for idx, key in enumerate(keys): if idx == 0: - schema_any.jobs.ignore(experiment.table_name, key) + jobs.ignore(key) else: - schema_any.jobs.error(experiment.table_name, key, "") + jobs.reserve(key) + jobs.error(key, error_message="Test error") experiment.populate(reserve_jobs=True) assert len(experiment.key_source & experiment) == len(experiment.key_source) - 2 diff --git a/tests/test_jobs.py b/tests/test_jobs.py deleted file mode 100644 index 4ffc431fe..000000000 --- a/tests/test_jobs.py +++ /dev/null @@ -1,130 +0,0 @@ -import random -import string - - -import datajoint as dj -from datajoint.jobs import ERROR_MESSAGE_LENGTH, TRUNCATION_APPENDIX - -from . import schema - - -def test_reserve_job(clean_jobs, subject, schema_any): - assert subject - table_name = "fake_table" - - # reserve jobs - for key in subject.fetch("KEY"): - assert schema_any.jobs.reserve(table_name, key), "failed to reserve a job" - - # refuse jobs - for key in subject.fetch("KEY"): - assert not schema_any.jobs.reserve(table_name, key), "failed to respect reservation" - - # complete jobs - for key in subject.fetch("KEY"): - schema_any.jobs.complete(table_name, key) - assert not schema_any.jobs, "failed to free jobs" - - # reserve jobs again - for key in subject.fetch("KEY"): - assert schema_any.jobs.reserve(table_name, key), "failed to reserve new jobs" - - # finish with error - for key in subject.fetch("KEY"): - schema_any.jobs.error(table_name, key, "error message") - - # refuse jobs with errors - for key in subject.fetch("KEY"): - assert not schema_any.jobs.reserve(table_name, key), "failed to ignore error jobs" - - # clear error jobs - (schema_any.jobs & dict(status="error")).delete() - assert not schema_any.jobs, "failed to clear error jobs" - - -def test_restrictions(clean_jobs, schema_any): - jobs = schema_any.jobs - jobs.delete() - jobs.reserve("a", {"key": "a1"}) - jobs.reserve("a", {"key": "a2"}) - jobs.reserve("b", {"key": "b1"}) - jobs.error("a", {"key": "a2"}, "error") - jobs.error("b", {"key": "b1"}, "error") - - assert len(jobs & {"table_name": "a"}) == 2 - assert len(jobs & {"status": "error"}) == 2 - assert len(jobs & {"table_name": "a", "status": "error"}) == 1 - jobs.delete() - - -def test_sigint(clean_jobs, schema_any): - try: - schema.SigIntTable().populate(reserve_jobs=True) - except KeyboardInterrupt: - pass - - assert len(schema_any.jobs.fetch()), "SigInt jobs table is empty" - status, error_message = schema_any.jobs.fetch1("status", "error_message") - assert status == "error" - assert error_message == "KeyboardInterrupt" - - -def test_sigterm(clean_jobs, schema_any): - try: - schema.SigTermTable().populate(reserve_jobs=True) - except SystemExit: - pass - - assert len(schema_any.jobs.fetch()), "SigTerm jobs table is empty" - status, error_message = schema_any.jobs.fetch1("status", "error_message") - assert status == "error" - assert error_message == "SystemExit: SIGTERM received" - - -def test_suppress_dj_errors(clean_jobs, schema_any): - """test_suppress_dj_errors: dj errors suppressible w/o native py blobs""" - with dj.config.override(enable_python_native_blobs=False): - schema.ErrorClass.populate(reserve_jobs=True, suppress_errors=True) - assert len(schema.DjExceptionName()) == len(schema_any.jobs) > 0 - - -def test_long_error_message(clean_jobs, subject, schema_any): - # create long error message - long_error_message = "".join(random.choice(string.ascii_letters) for _ in range(ERROR_MESSAGE_LENGTH + 100)) - short_error_message = "".join(random.choice(string.ascii_letters) for _ in range(ERROR_MESSAGE_LENGTH // 2)) - assert subject - table_name = "fake_table" - - key = subject.fetch("KEY", limit=1)[0] - - # test long error message - schema_any.jobs.reserve(table_name, key) - schema_any.jobs.error(table_name, key, long_error_message) - error_message = schema_any.jobs.fetch1("error_message") - assert len(error_message) == ERROR_MESSAGE_LENGTH, "error message is longer than max allowed" - assert error_message.endswith(TRUNCATION_APPENDIX), "appropriate ending missing for truncated error message" - schema_any.jobs.delete() - - # test long error message - schema_any.jobs.reserve(table_name, key) - schema_any.jobs.error(table_name, key, short_error_message) - error_message = schema_any.jobs.fetch1("error_message") - assert error_message == short_error_message, "error messages do not agree" - assert not error_message.endswith(TRUNCATION_APPENDIX), "error message should not be truncated" - schema_any.jobs.delete() - - -def test_long_error_stack(clean_jobs, subject, schema_any): - # create long error stack - STACK_SIZE = 89942 # Does not fit into small blob (should be 64k, but found to be higher) - long_error_stack = "".join(random.choice(string.ascii_letters) for _ in range(STACK_SIZE)) - assert subject - table_name = "fake_table" - - key = subject.fetch("KEY", limit=1)[0] - - # test long error stack - schema_any.jobs.reserve(table_name, key) - schema_any.jobs.error(table_name, key, "error message", long_error_stack) - error_stack = schema_any.jobs.fetch1("error_stack") - assert error_stack == long_error_stack, "error stacks do not agree" From 956fa27181e83d280182817a728cfb29144ca35e Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 00:48:00 +0000 Subject: [PATCH 26/39] Rename jobs_v2.py to jobs.py Now that the legacy schema-wide jobs system has been removed, rename the new per-table jobs module to its canonical name: - src/datajoint/jobs_v2.py -> src/datajoint/jobs.py - tests/test_jobs_v2.py -> tests/test_jobs.py - Update imports in autopopulate.py and test_jobs.py --- src/datajoint/autopopulate.py | 2 +- src/datajoint/{jobs_v2.py => jobs.py} | 0 tests/{test_jobs_v2.py => test_jobs.py} | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename src/datajoint/{jobs_v2.py => jobs.py} (100%) rename tests/{test_jobs_v2.py => test_jobs.py} (99%) diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py index b964e51d4..25f0ba344 100644 --- a/src/datajoint/autopopulate.py +++ b/src/datajoint/autopopulate.py @@ -171,7 +171,7 @@ def jobs(self): :return: JobsTable instance for this table """ if self._jobs_table is None: - from .jobs_v2 import JobsTable + from .jobs import JobsTable self._jobs_table = JobsTable(self.target) return self._jobs_table diff --git a/src/datajoint/jobs_v2.py b/src/datajoint/jobs.py similarity index 100% rename from src/datajoint/jobs_v2.py rename to src/datajoint/jobs.py diff --git a/tests/test_jobs_v2.py b/tests/test_jobs.py similarity index 99% rename from tests/test_jobs_v2.py rename to tests/test_jobs.py index 1c4f2acc1..25e1081e6 100644 --- a/tests/test_jobs_v2.py +++ b/tests/test_jobs.py @@ -6,7 +6,7 @@ import string import datajoint as dj -from datajoint.jobs_v2 import JobsTable, ERROR_MESSAGE_LENGTH, TRUNCATION_APPENDIX +from datajoint.jobs import JobsTable, ERROR_MESSAGE_LENGTH, TRUNCATION_APPENDIX from . import schema From 608020ab1672875b31817286b93d489bdc3fb5bf Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 00:59:07 +0000 Subject: [PATCH 27/39] Improve jobs.py: use update1, djblob, cleaner f-string - Use variable assignment for pk_section instead of chr(10) in f-string - Change error_stack type from mediumblob to - Use update1() in error() instead of raw SQL and deprecated _update() - Remove config.override(enable_python_native_blobs=True) wrapper Note: reserve() keeps raw SQL for atomic conditional update with rowcount check - this is required for safe concurrent job reservation. --- src/datajoint/jobs.py | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/src/datajoint/jobs.py b/src/datajoint/jobs.py index 9bccd3e40..98e259140 100644 --- a/src/datajoint/jobs.py +++ b/src/datajoint/jobs.py @@ -9,6 +9,7 @@ import logging import os import platform +from datetime import datetime from typing import TYPE_CHECKING from .errors import DataJointError, DuplicateError @@ -134,10 +135,10 @@ def _build_definition(self) -> str: ) # Build primary key section - pk_lines = [attr_def for _, attr_def in pk_attrs] + pk_section = "\n".join(attr_def for _, attr_def in pk_attrs) definition = f"""# Job queue for {self._target.class_name} -{chr(10).join(pk_lines)} +{pk_section} --- status : enum('pending', 'reserved', 'success', 'error', 'ignore') priority : int # Lower = more urgent (0 = highest priority) @@ -147,7 +148,7 @@ def _build_definition(self) -> str: completed_time=null : datetime(6) # When job completed duration=null : float # Execution duration in seconds error_message="" : varchar({ERROR_MESSAGE_LENGTH}) # Error message if failed -error_stack=null : mediumblob # Full error traceback +error_stack=null : # Full error traceback user="" : varchar(255) # Database user who reserved/completed job host="" : varchar(255) # Hostname of worker pid=0 : int unsigned # Process ID of worker @@ -417,27 +418,17 @@ def error(self, key: dict, error_message: str, error_stack: str = None) -> None: pk_attrs = [name for name, _ in self._get_fk_derived_primary_key()] job_key = {attr: key[attr] for attr in pk_attrs if attr in key} - key_conditions = " AND ".join( - f"`{attr}`='{job_key[attr]}'" if isinstance(job_key[attr], str) else f"`{attr}`={job_key[attr]}" - for attr in pk_attrs - ) - - # Escape error message for SQL - error_message_escaped = error_message.replace("'", "''").replace("\\", "\\\\") - - sql = f""" - UPDATE {self.full_table_name} - SET status='error', - completed_time=NOW(6), - error_message='{error_message_escaped}' - WHERE {key_conditions} - """ - self.connection.query(sql) - - # Update error_stack separately using parameterized query if provided + # Build update dict with all required fields + update_row = { + **job_key, + "status": "error", + "completed_time": datetime.now(), + "error_message": error_message, + } if error_stack is not None: - with config.override(enable_python_native_blobs=True): - (self & job_key)._update("error_stack", error_stack) + update_row["error_stack"] = error_stack + + self.update1(update_row) def ignore(self, key: dict) -> None: """ From 8430e2adb1c56b2626d2f0c989c9b694793e33f9 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 01:03:19 +0000 Subject: [PATCH 28/39] Simplify reserve() to use update1 - reserve() now uses update1 instead of raw SQL - Remove status='pending' check since populate verifies this - Change return type from bool to None - Update autopopulate.py to not check reserve return value - Update tests to reflect new behavior --- src/datajoint/autopopulate.py | 6 +++--- src/datajoint/jobs.py | 40 +++++++++++++---------------------- tests/test_jobs.py | 26 +++++++++++------------ 3 files changed, 30 insertions(+), 42 deletions(-) diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py index 25f0ba344..1249b472e 100644 --- a/src/datajoint/autopopulate.py +++ b/src/datajoint/autopopulate.py @@ -390,9 +390,9 @@ def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_ job_key = self._job_key(key) start_time = time.time() - # Try to reserve the job (per-key, before make) - if jobs is not None and not jobs.reserve(job_key): - return False + # Reserve the job (per-key, before make) + if jobs is not None: + jobs.reserve(job_key) # if make is a generator, transaction can be delayed until the final stage is_generator = inspect.isgeneratorfunction(make) diff --git a/src/datajoint/jobs.py b/src/datajoint/jobs.py index 98e259140..f8ed4a486 100644 --- a/src/datajoint/jobs.py +++ b/src/datajoint/jobs.py @@ -328,41 +328,31 @@ def _insert_job_with_delay(self, key: dict, priority: int, delay: float) -> None """ self.connection.query(sql) - def reserve(self, key: dict) -> bool: + def reserve(self, key: dict) -> None: """ - Attempt to reserve a job for processing. + Reserve a job for processing. - Updates status to 'reserved' if currently 'pending' and scheduled_time <= now. + Updates the job record to 'reserved' status. The caller (populate) is + responsible for verifying the job is pending before calling this method. Args: key: Primary key dict for the job - - Returns: - True if reservation successful, False if job not found or not pending. """ self._ensure_declared() - # Build WHERE clause for the key pk_attrs = [name for name, _ in self._get_fk_derived_primary_key()] - key_conditions = " AND ".join( - f"`{attr}`='{key[attr]}'" if isinstance(key[attr], str) else f"`{attr}`={key[attr]}" for attr in pk_attrs - ) + job_key = {attr: key[attr] for attr in pk_attrs if attr in key} - # Attempt atomic update: pending -> reserved - sql = f""" - UPDATE {self.full_table_name} - SET status='reserved', - reserved_time=NOW(6), - user='{self._user}', - host='{platform.node()}', - pid={os.getpid()}, - connection_id={self.connection.connection_id} - WHERE {key_conditions} - AND status='pending' - AND scheduled_time <= NOW(6) - """ - result = self.connection.query(sql) - return result.rowcount > 0 + update_row = { + **job_key, + "status": "reserved", + "reserved_time": datetime.now(), + "user": self._user, + "host": platform.node(), + "pid": os.getpid(), + "connection_id": self.connection.connection_id, + } + self.update1(update_row) def complete(self, key: dict, duration: float = None, keep: bool = None) -> None: """ diff --git a/tests/test_jobs.py b/tests/test_jobs.py index 25e1081e6..1925eb4b5 100644 --- a/tests/test_jobs.py +++ b/tests/test_jobs.py @@ -97,32 +97,30 @@ def test_reserve_pending_job(self, schema_any): # Get first pending job key = jobs.pending.fetch("KEY", limit=1)[0] - assert jobs.reserve(key) + jobs.reserve(key) # Verify status changed status = (jobs & key).fetch1("status") assert status == "reserved" - def test_reserve_already_reserved(self, schema_any): - """Test that reserve() returns False for already reserved job.""" + def test_reserve_sets_metadata(self, schema_any): + """Test that reserve() sets user, host, pid, connection_id.""" table = schema.SigIntTable() jobs = table.jobs jobs.delete() jobs.refresh() key = jobs.pending.fetch("KEY", limit=1)[0] - assert jobs.reserve(key) - assert not jobs.reserve(key) # Second reserve should fail - - def test_reserve_scheduled_future(self, schema_any): - """Test that reserve() fails for jobs scheduled in the future.""" - table = schema.SigIntTable() - jobs = table.jobs - jobs.delete() - jobs.refresh(delay=3600) # 1 hour delay + jobs.reserve(key) - key = jobs.fetch("KEY", limit=1)[0] - assert not jobs.reserve(key) # Should fail - not yet scheduled + # Verify metadata was set + row = (jobs & key).fetch1() + assert row["status"] == "reserved" + assert row["reserved_time"] is not None + assert row["user"] != "" + assert row["host"] != "" + assert row["pid"] > 0 + assert row["connection_id"] > 0 class TestJobsComplete: From 34c302ae4c83e89f56d5535179382473113a6d26 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 01:05:56 +0000 Subject: [PATCH 29/39] Use update1 in complete() method --- src/datajoint/jobs.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/datajoint/jobs.py b/src/datajoint/jobs.py index f8ed4a486..4a1f3b5fa 100644 --- a/src/datajoint/jobs.py +++ b/src/datajoint/jobs.py @@ -374,18 +374,14 @@ def complete(self, key: dict, duration: float = None, keep: bool = None) -> None if keep: # Update to success status - duration_sql = f", duration={duration}" if duration is not None else "" - key_conditions = " AND ".join( - f"`{attr}`='{job_key[attr]}'" if isinstance(job_key[attr], str) else f"`{attr}`={job_key[attr]}" - for attr in pk_attrs - ) - sql = f""" - UPDATE {self.full_table_name} - SET status='success', - completed_time=NOW(6){duration_sql} - WHERE {key_conditions} - """ - self.connection.query(sql) + update_row = { + **job_key, + "status": "success", + "completed_time": datetime.now(), + } + if duration is not None: + update_row["duration"] = duration + self.update1(update_row) else: # Delete the job entry (self & job_key).delete_quick() From e0d6fd9d42dbba472d23dec32da86c2836441367 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 01:10:26 +0000 Subject: [PATCH 30/39] Simplify: use self.proj() for jobs table projections --- src/datajoint/jobs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/datajoint/jobs.py b/src/datajoint/jobs.py index 4a1f3b5fa..b83edeb16 100644 --- a/src/datajoint/jobs.py +++ b/src/datajoint/jobs.py @@ -265,7 +265,7 @@ def refresh( # Project to FK-derived attributes only key_source_proj = key_source.proj(*pk_attrs) target_proj = self._target.proj(*pk_attrs) - existing_jobs = self.proj(*pk_attrs) + existing_jobs = self.proj() # jobs table PK is the FK-derived attrs # Keys that need jobs: in key_source, not in target, not already in jobs new_keys = (key_source_proj - target_proj - existing_jobs).fetch("KEY") @@ -285,7 +285,7 @@ def refresh( removed = 0 if stale_timeout > 0: stale_condition = f'status="pending" AND ' f"created_time < NOW() - INTERVAL {stale_timeout} SECOND" - stale_jobs = (self & stale_condition).proj(*pk_attrs) + stale_jobs = (self & stale_condition).proj() # Check which stale jobs are no longer in key_source orphaned_keys = (stale_jobs - key_source_proj).fetch("KEY") From 83b7f49d80fe2f405c2379aeb58d83e9b63d008a Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 01:12:57 +0000 Subject: [PATCH 31/39] Simplify ignore(): only insert new records, cannot convert existing --- src/datajoint/jobs.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/src/datajoint/jobs.py b/src/datajoint/jobs.py index b83edeb16..061048465 100644 --- a/src/datajoint/jobs.py +++ b/src/datajoint/jobs.py @@ -420,7 +420,8 @@ def ignore(self, key: dict) -> None: """ Mark a key to be ignored (skipped during populate). - Can be called on keys not yet in the jobs table. + Only inserts new records. Existing job entries cannot be converted to + ignore status - they must be cleared first. Args: key: Primary key dict for the job @@ -430,22 +431,10 @@ def ignore(self, key: dict) -> None: pk_attrs = [name for name, _ in self._get_fk_derived_primary_key()] job_key = {attr: key[attr] for attr in pk_attrs if attr in key} - # Check if job already exists - if job_key in self: - # Update existing job to ignore - key_conditions = " AND ".join( - f"`{attr}`='{job_key[attr]}'" if isinstance(job_key[attr], str) else f"`{attr}`={job_key[attr]}" - for attr in pk_attrs - ) - sql = f""" - UPDATE {self.full_table_name} - SET status='ignore' - WHERE {key_conditions} - """ - self.connection.query(sql) - else: - # Insert new job with ignore status + try: self._insert_job_with_status(job_key, "ignore") + except DuplicateError: + pass # Already tracked def _insert_job_with_status(self, key: dict, status: str) -> None: """Insert a new job with the given status.""" From 080b6c0972e828b3fc58cf25c36f9d9e98356b07 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 01:14:32 +0000 Subject: [PATCH 32/39] Use insert1 in _insert_job_with_status instead of explicit SQL --- src/datajoint/jobs.py | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/src/datajoint/jobs.py b/src/datajoint/jobs.py index 061048465..7dff66333 100644 --- a/src/datajoint/jobs.py +++ b/src/datajoint/jobs.py @@ -438,27 +438,19 @@ def ignore(self, key: dict) -> None: def _insert_job_with_status(self, key: dict, status: str) -> None: """Insert a new job with the given status.""" - pk_attrs = [name for name, _ in self._get_fk_derived_primary_key()] - columns = pk_attrs + ["status", "priority", "created_time", "scheduled_time", "user", "host", "pid", "connection_id"] - - pk_values = [f"'{key[attr]}'" if isinstance(key[attr], str) else str(key[attr]) for attr in pk_attrs] - other_values = [ - f"'{status}'", - str(DEFAULT_PRIORITY), - "NOW(6)", - "NOW(6)", - f"'{self._user}'", - f"'{platform.node()}'", - str(os.getpid()), - str(self.connection.connection_id), - ] - - sql = f""" - INSERT INTO {self.full_table_name} - ({', '.join(f'`{c}`' for c in columns)}) - VALUES ({', '.join(pk_values + other_values)}) - """ - self.connection.query(sql) + now = datetime.now() + row = { + **key, + "status": status, + "priority": DEFAULT_PRIORITY, + "created_time": now, + "scheduled_time": now, + "user": self._user, + "host": platform.node(), + "pid": os.getpid(), + "connection_id": self.connection.connection_id, + } + self.insert1(row) def progress(self) -> dict: """ From 84ba4b7ae97da40c65aa67b6cc5ef20544144467 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 01:25:56 +0000 Subject: [PATCH 33/39] Remove AutoPopulate._job_key - no longer needed --- src/datajoint/autopopulate.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py index 1249b472e..096a3c11a 100644 --- a/src/datajoint/autopopulate.py +++ b/src/datajoint/autopopulate.py @@ -176,14 +176,6 @@ def jobs(self): self._jobs_table = JobsTable(self.target) return self._jobs_table - def _job_key(self, key): - """ - :param key: they key returned for the job from the key source - :return: the dict to use to generate the job reservation hash - This method allows subclasses to control the job reservation granularity. - """ - return key - def _jobs_to_do(self, restrictions): """ :return: the query yielding the keys to be computed (derived from self.key_source) @@ -387,12 +379,11 @@ def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_ # use the legacy `_make_tuples` callback. make = self._make_tuples if hasattr(self, "_make_tuples") else self.make - job_key = self._job_key(key) start_time = time.time() # Reserve the job (per-key, before make) if jobs is not None: - jobs.reserve(job_key) + jobs.reserve(key) # if make is a generator, transaction can be delayed until the final stage is_generator = inspect.isgeneratorfunction(make) @@ -404,7 +395,7 @@ def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_ self.connection.cancel_transaction() if jobs is not None: # Job already done - mark complete or delete - jobs.complete(job_key, duration=0) + jobs.complete(key, duration=0) return False logger.debug(f"Making {key} -> {self.target.full_table_name}") @@ -449,11 +440,11 @@ def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_ # This is not a real error, just coordination artifact logger.debug(f"Duplicate key collision for {key}, reverting job") # Delete the reservation, letting the job be picked up again or cleaned - (jobs & job_key).delete_quick() + (jobs & key).delete_quick() else: # Real error inside make() - log it jobs.error( - job_key, + key, error_message=error_message, error_stack=traceback.format_exc(), ) @@ -467,7 +458,7 @@ def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_ duration = time.time() - start_time logger.debug(f"Success making {key} -> {self.target.full_table_name}") if jobs is not None: - jobs.complete(job_key, duration=duration) + jobs.complete(key, duration=duration) return True finally: self.__class__._allow_insert = False From 6ef2de7bb5b9f2765b012306b5c1eecf572bada4 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 01:29:27 +0000 Subject: [PATCH 34/39] Remove AutoPopulate.target property The new implementation always populates self - the target property is no longer needed. All references to self.target replaced with self. --- src/datajoint/autopopulate.py | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py index 096a3c11a..931d65630 100644 --- a/src/datajoint/autopopulate.py +++ b/src/datajoint/autopopulate.py @@ -74,7 +74,7 @@ def _rename_attributes(table, props): ) if self._key_source is None: - parents = self.target.parents(primary=True, as_objects=True, foreign_key_info=True) + parents = self.parents(primary=True, as_objects=True, foreign_key_info=True) if not parents: raise DataJointError("A table must have dependencies from its primary key for auto-populate to work") self._key_source = _rename_attributes(*parents[0]) @@ -151,15 +151,6 @@ def make(self, key): self.make_insert(key, *computed_result) yield - @property - def target(self): - """ - :return: table to be populated. - In the typical case, dj.AutoPopulate is mixed into a dj.Table class by - inheritance and the target is self. - """ - return self - @property def jobs(self): """ @@ -173,7 +164,7 @@ def jobs(self): if self._jobs_table is None: from .jobs import JobsTable - self._jobs_table = JobsTable(self.target) + self._jobs_table = JobsTable(self) return self._jobs_table def _jobs_to_do(self, restrictions): @@ -198,7 +189,7 @@ def _jobs_to_do(self, restrictions): raise DataJointError( "The populate target lacks attribute %s " "from the primary key of key_source" - % next(name for name in todo.heading.primary_key if name not in self.target.heading) + % next(name for name in todo.heading.primary_key if name not in self.heading) ) except StopIteration: pass @@ -281,7 +272,7 @@ def handler(signum, frame): else: # Legacy behavior: get keys from key_source if keys is None: - keys = (self._jobs_to_do(restrictions) - self.target).fetch("KEY", limit=limit) + keys = (self._jobs_to_do(restrictions) - self).fetch("KEY", limit=limit) if order == "reverse": keys.reverse() @@ -390,7 +381,7 @@ def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_ if not is_generator: self.connection.start_transaction() - if key in self.target: # already populated + if key in self: # already populated if not is_generator: self.connection.cancel_transaction() if jobs is not None: @@ -398,7 +389,7 @@ def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_ jobs.complete(key, duration=0) return False - logger.debug(f"Making {key} -> {self.target.full_table_name}") + logger.debug(f"Making {key} -> {self.full_table_name}") self.__class__._allow_insert = True try: @@ -429,7 +420,7 @@ def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_ exception=error.__class__.__name__, msg=": " + str(error) if str(error) else "", ) - logger.debug(f"Error making {key} -> {self.target.full_table_name} - {error_message}") + logger.debug(f"Error making {key} -> {self.full_table_name} - {error_message}") # Only log errors from inside make() - not collision errors if jobs is not None: @@ -456,7 +447,7 @@ def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_ else: self.connection.commit_transaction() duration = time.time() - start_time - logger.debug(f"Success making {key} -> {self.target.full_table_name}") + logger.debug(f"Success making {key} -> {self.full_table_name}") if jobs is not None: jobs.complete(key, duration=duration) return True @@ -470,7 +461,7 @@ def progress(self, *restrictions, display=False): """ todo = self._jobs_to_do(restrictions) total = len(todo) - remaining = len(todo - self.target) + remaining = len(todo - self) if display: logger.info( "%-20s" % self.__class__.__name__ From 55d7f32b4f6870b3647e4baa40d3d09dc40dd62a Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 01:36:10 +0000 Subject: [PATCH 35/39] Remove legacy _make_tuples callback support - use self.make exclusively --- src/datajoint/autopopulate.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py index 931d65630..a80de1f91 100644 --- a/src/datajoint/autopopulate.py +++ b/src/datajoint/autopopulate.py @@ -368,8 +368,6 @@ def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_ """ import time - # use the legacy `_make_tuples` callback. - make = self._make_tuples if hasattr(self, "_make_tuples") else self.make start_time = time.time() # Reserve the job (per-key, before make) @@ -377,7 +375,7 @@ def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_ jobs.reserve(key) # if make is a generator, transaction can be delayed until the final stage - is_generator = inspect.isgeneratorfunction(make) + is_generator = inspect.isgeneratorfunction(self.make) if not is_generator: self.connection.start_transaction() @@ -394,16 +392,16 @@ def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_ try: if not is_generator: - make(dict(key), **(make_kwargs or {})) + self.make(dict(key), **(make_kwargs or {})) else: # tripartite make - transaction is delayed until the final stage - gen = make(dict(key), **(make_kwargs or {})) + gen = self.make(dict(key), **(make_kwargs or {})) fetched_data = next(gen) fetch_hash = deepdiff.DeepHash(fetched_data, ignore_iterable_order=False)[fetched_data] computed_result = next(gen) # perform the computation # fetch and insert inside a transaction self.connection.start_transaction() - gen = make(dict(key), **(make_kwargs or {})) # restart make + gen = self.make(dict(key), **(make_kwargs or {})) # restart make fetched_data = next(gen) if ( fetch_hash != deepdiff.DeepHash(fetched_data, ignore_iterable_order=False)[fetched_data] From 7b28c645abce2d0ad6ff1efef14ee79e9dd8962d Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 01:45:09 +0000 Subject: [PATCH 36/39] Eliminate _jobs_to_do method - Inline the logic directly in populate() and progress() - Move restriction check to populate() - Use (self.key_source & AndList(restrictions)).proj() directly - Remove unused QueryExpression import --- src/datajoint/autopopulate.py | 40 ++++++++--------------------------- 1 file changed, 9 insertions(+), 31 deletions(-) diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py index a80de1f91..596bfae2e 100644 --- a/src/datajoint/autopopulate.py +++ b/src/datajoint/autopopulate.py @@ -13,7 +13,7 @@ from tqdm import tqdm from .errors import DataJointError, LostConnectionError -from .expression import AndList, QueryExpression +from .expression import AndList # noinspection PyExceptionInherit,PyCallingNonCallable @@ -167,34 +167,6 @@ def jobs(self): self._jobs_table = JobsTable(self) return self._jobs_table - def _jobs_to_do(self, restrictions): - """ - :return: the query yielding the keys to be computed (derived from self.key_source) - """ - if self.restriction: - raise DataJointError( - "Cannot call populate on a restricted table. Instead, pass conditions to populate() as arguments." - ) - todo = self.key_source - - # key_source is a QueryExpression subclass -- trigger instantiation - if inspect.isclass(todo) and issubclass(todo, QueryExpression): - todo = todo() - - if not isinstance(todo, QueryExpression): - raise DataJointError("Invalid key_source value") - - try: - # check if target lacks any attributes from the primary key of key_source - raise DataJointError( - "The populate target lacks attribute %s " - "from the primary key of key_source" - % next(name for name in todo.heading.primary_key if name not in self.heading) - ) - except StopIteration: - pass - return (todo & AndList(restrictions)).proj() - def populate( self, *restrictions, @@ -243,6 +215,11 @@ def populate( if self.connection.in_transaction: raise DataJointError("Populate cannot be called during a transaction.") + if self.restriction: + raise DataJointError( + "Cannot call populate on a restricted table. " "Instead, pass conditions to populate() as arguments." + ) + valid_order = ["original", "reverse", "random"] if order not in valid_order: raise DataJointError("The order argument must be one of %s" % str(valid_order)) @@ -272,7 +249,8 @@ def handler(signum, frame): else: # Legacy behavior: get keys from key_source if keys is None: - keys = (self._jobs_to_do(restrictions) - self).fetch("KEY", limit=limit) + todo = (self.key_source & AndList(restrictions)).proj() + keys = (todo - self).fetch("KEY", limit=limit) if order == "reverse": keys.reverse() @@ -457,7 +435,7 @@ def progress(self, *restrictions, display=False): Report the progress of populating the table. :return: (remaining, total) -- numbers of tuples to be populated """ - todo = self._jobs_to_do(restrictions) + todo = (self.key_source & AndList(restrictions)).proj() total = len(todo) remaining = len(todo - self) if display: From d28fa7c44060e398bac3bf85d8765894df537d3e Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 01:52:04 +0000 Subject: [PATCH 37/39] Simplify jobs variable usage in populate() - Remove early jobs_table assignment, use self.jobs directly - Fix comment: key_source is correct behavior, not legacy - Use self.jobs directly in _get_pending_jobs --- src/datajoint/autopopulate.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py index 596bfae2e..a028b8c59 100644 --- a/src/datajoint/autopopulate.py +++ b/src/datajoint/autopopulate.py @@ -224,9 +224,6 @@ def populate( if order not in valid_order: raise DataJointError("The order argument must be one of %s" % str(valid_order)) - # Get the jobs table (per-table JobsTable for new system) - jobs_table = self.jobs if reserve_jobs else None - if reserve_jobs: # Define a signal handler for SIGTERM def handler(signum, frame): @@ -247,7 +244,7 @@ def handler(signum, frame): refresh=refresh, ) else: - # Legacy behavior: get keys from key_source + # Without job reservations: compute keys directly from key_source if keys is None: todo = (self.key_source & AndList(restrictions)).proj() keys = (todo - self).fetch("KEY", limit=limit) @@ -271,9 +268,11 @@ def handler(signum, frame): make_kwargs=make_kwargs, ) + jobs = self.jobs if reserve_jobs else None + if processes == 1: for key in tqdm(keys, desc=self.__class__.__name__) if display_progress else keys: - status = self._populate1(key, jobs_table, **populate_kwargs) + status = self._populate1(key, jobs, **populate_kwargs) if status is True: success_list.append(1) elif isinstance(status, tuple): @@ -285,7 +284,7 @@ def handler(signum, frame): self.connection.close() # disconnect parent process from MySQL server del self.connection._conn.ctx # SSLContext is not pickleable with ( - mp.Pool(processes, _initialize_populate, (self, jobs_table, populate_kwargs)) as pool, + mp.Pool(processes, _initialize_populate, (self, jobs, populate_kwargs)) as pool, tqdm(desc="Processes: ", total=nkeys) if display_progress else contextlib.nullcontext() as progress_bar, ): for status in pool.imap(_call_populate1, keys, chunksize=1): @@ -321,16 +320,14 @@ def _get_pending_jobs(self, restrictions, priority, limit, refresh): :param refresh: Whether to refresh if no pending jobs found :return: List of key dicts """ - jobs_table = self.jobs - # First, try to get pending jobs - keys = jobs_table.fetch_pending(limit=limit, priority=priority) + keys = self.jobs.fetch_pending(limit=limit, priority=priority) # If no pending jobs and refresh is enabled, refresh and try again if not keys and refresh: logger.debug("No pending jobs found, refreshing jobs queue") - jobs_table.refresh(*restrictions) - keys = jobs_table.fetch_pending(limit=limit, priority=priority) + self.jobs.refresh(*restrictions) + keys = self.jobs.fetch_pending(limit=limit, priority=priority) return keys From 7d595fbea0f272d34ebd29dbb3c2f48eb94f0495 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 01:59:05 +0000 Subject: [PATCH 38/39] Inline _get_pending_jobs into populate() Method only called from one place, no need for separate function. --- src/datajoint/autopopulate.py | 37 ++++++----------------------------- 1 file changed, 6 insertions(+), 31 deletions(-) diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py index a028b8c59..23adf3eb5 100644 --- a/src/datajoint/autopopulate.py +++ b/src/datajoint/autopopulate.py @@ -236,13 +236,12 @@ def handler(signum, frame): success_list = [] if reserve_jobs: - # New Autopopulate 2.0 logic: use jobs table - keys = self._get_pending_jobs( - restrictions=restrictions, - priority=priority, - limit=limit, - refresh=refresh, - ) + # Use jobs table for coordinated processing + keys = self.jobs.fetch_pending(limit=limit, priority=priority) + if not keys and refresh: + logger.debug("No pending jobs found, refreshing jobs queue") + self.jobs.refresh(*restrictions) + keys = self.jobs.fetch_pending(limit=limit, priority=priority) else: # Without job reservations: compute keys directly from key_source if keys is None: @@ -307,30 +306,6 @@ def handler(signum, frame): "error_list": error_list, } - def _get_pending_jobs(self, restrictions, priority, limit, refresh): - """ - Get pending jobs from the jobs table. - - If no pending jobs are found and refresh=True, refreshes the jobs queue - and tries again. - - :param restrictions: Restrictions to apply when refreshing - :param priority: Only get jobs at this priority or more urgent - :param limit: Maximum number of jobs to return - :param refresh: Whether to refresh if no pending jobs found - :return: List of key dicts - """ - # First, try to get pending jobs - keys = self.jobs.fetch_pending(limit=limit, priority=priority) - - # If no pending jobs and refresh is enabled, refresh and try again - if not keys and refresh: - logger.debug("No pending jobs found, refreshing jobs queue") - self.jobs.refresh(*restrictions) - keys = self.jobs.fetch_pending(limit=limit, priority=priority) - - return keys - def _populate1(self, key, jobs, suppress_errors, return_exception_objects, make_kwargs=None): """ populates table for one source key, calling self.make inside a transaction. From 0a5f3a956a1575e347193d42f4d0a7d53ea9c00d Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 02:01:34 +0000 Subject: [PATCH 39/39] Remove order parameter and consolidate limit/max_calls - Remove 'order' parameter (conflicts with priority/scheduled_time) - Remove 'limit' parameter, keep only 'max_calls' for simplicity - Remove unused 'random' import --- src/datajoint/autopopulate.py | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py index 23adf3eb5..c90116a74 100644 --- a/src/datajoint/autopopulate.py +++ b/src/datajoint/autopopulate.py @@ -5,7 +5,6 @@ import inspect import logging import multiprocessing as mp -import random import signal import traceback @@ -174,13 +173,10 @@ def populate( suppress_errors=False, return_exception_objects=False, reserve_jobs=False, - order="original", - limit=None, max_calls=None, display_progress=False, processes=1, make_kwargs=None, - # New parameters for Autopopulate 2.0 priority=None, refresh=True, ): @@ -195,8 +191,6 @@ def populate( :param suppress_errors: if True, do not terminate execution. :param return_exception_objects: return error objects instead of just error messages :param reserve_jobs: if True, reserve jobs to populate in asynchronous fashion - :param order: "original"|"reverse"|"random" - the order of execution - :param limit: if not None, check at most this many keys :param max_calls: if not None, populate at most this many keys :param display_progress: if True, report progress_bar :param processes: number of processes to use. Set to None to use all cores @@ -220,10 +214,6 @@ def populate( "Cannot call populate on a restricted table. " "Instead, pass conditions to populate() as arguments." ) - valid_order = ["original", "reverse", "random"] - if order not in valid_order: - raise DataJointError("The order argument must be one of %s" % str(valid_order)) - if reserve_jobs: # Define a signal handler for SIGTERM def handler(signum, frame): @@ -237,25 +227,18 @@ def handler(signum, frame): if reserve_jobs: # Use jobs table for coordinated processing - keys = self.jobs.fetch_pending(limit=limit, priority=priority) + keys = self.jobs.fetch_pending(limit=max_calls, priority=priority) if not keys and refresh: logger.debug("No pending jobs found, refreshing jobs queue") self.jobs.refresh(*restrictions) - keys = self.jobs.fetch_pending(limit=limit, priority=priority) + keys = self.jobs.fetch_pending(limit=max_calls, priority=priority) else: # Without job reservations: compute keys directly from key_source if keys is None: todo = (self.key_source & AndList(restrictions)).proj() - keys = (todo - self).fetch("KEY", limit=limit) - - if order == "reverse": - keys.reverse() - elif order == "random": - random.shuffle(keys) + keys = (todo - self).fetch("KEY", limit=max_calls) logger.debug("Found %d keys to populate" % len(keys)) - - keys = keys[:max_calls] nkeys = len(keys) if nkeys: