From 9f28b102bd18785dc34a6e5402b56dd9998a3fe9 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 7 Jan 2026 22:40:53 -0600 Subject: [PATCH 01/17] fix: enable auto_mkdir for local filesystem in StorageBackend The fsspec local filesystem needs auto_mkdir=True to automatically create parent directories when writing files. This is required for Zarr 3.x which doesn't create directories when opening arrays with mode='w' through an FSMap. Co-Authored-By: Claude Opus 4.5 --- src/datajoint/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datajoint/storage.py b/src/datajoint/storage.py index 0d401dbdf..6dacbd7ec 100644 --- a/src/datajoint/storage.py +++ b/src/datajoint/storage.py @@ -287,7 +287,7 @@ def fs(self) -> fsspec.AbstractFileSystem: def _create_filesystem(self) -> fsspec.AbstractFileSystem: """Create fsspec filesystem based on protocol.""" if self.protocol == "file": - return fsspec.filesystem("file") + return fsspec.filesystem("file", auto_mkdir=True) elif self.protocol == "s3": # Build S3 configuration From 9737deef97b2717d4c788b149a399802bedf48fc Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 8 Jan 2026 00:52:23 -0600 Subject: [PATCH 02/17] fix(Top): allow order_by=None to inherit existing ordering (#1242) When chaining dj.Top restrictions, the second Top can now inherit the ordering from the first by using order_by=None: (Table & dj.Top(100, "score DESC")) & dj.Top(10, order_by=None) # Returns top 10 of top 100, ordered by score descending This fixes an issue where preview (to_arrays with limit) would override user-specified ordering with the default KEY ordering. Changes: - Top class now accepts order_by=None to mean "inherit" - Added Top.merge() method for combining Tops with compatible ordering - restrict() now merges Tops when order_by=None or identical - Creates subquery only when orderings differ Merge behavior: - limit = min(existing_limit, new_limit) - offset = existing_offset + new_offset - order_by = preserved from existing Top Co-Authored-By: Claude Opus 4.5 --- src/datajoint/condition.py | 61 ++++++++++--- src/datajoint/expression.py | 20 +++-- tests/integration/test_relational_operand.py | 46 ++++++++++ tests/unit/test_condition.py | 95 ++++++++++++++++++++ 4 files changed, 207 insertions(+), 15 deletions(-) create mode 100644 tests/unit/test_condition.py diff --git a/src/datajoint/condition.py b/src/datajoint/condition.py index 8ab19ca5d..ea7d7a504 100644 --- a/src/datajoint/condition.py +++ b/src/datajoint/condition.py @@ -107,32 +107,73 @@ class Top: ---------- limit : int, optional Maximum number of rows to return. Default 1. - order_by : str or list[str], optional - Attributes to order by. ``"KEY"`` for primary key. Default ``"KEY"``. + order_by : str or list[str] or None, optional + Attributes to order by. ``"KEY"`` for primary key order. + ``None`` means inherit ordering from an existing Top (or default to KEY). + Default ``"KEY"``. offset : int, optional Number of rows to skip. Default 0. + + Examples + -------- + >>> query & dj.Top(5) # Top 5 by primary key + >>> query & dj.Top(10, 'score DESC') # Top 10 by score descending + >>> query & dj.Top(10, order_by=None) # Top 10, inherit existing order + >>> query & dj.Top(5, offset=10) # Skip 10, take 5 """ limit: int | None = 1 - order_by: str | list[str] = "KEY" + order_by: str | list[str] | None = "KEY" offset: int = 0 def __post_init__(self) -> None: - self.order_by = self.order_by or ["KEY"] self.offset = self.offset or 0 if self.limit is not None and not isinstance(self.limit, int): raise TypeError("Top limit must be an integer") - if not isinstance(self.order_by, (str, collections.abc.Sequence)) or not all( - isinstance(r, str) for r in self.order_by - ): - raise TypeError("Top order_by attributes must all be strings") + if self.order_by is not None: + if not isinstance(self.order_by, (str, collections.abc.Sequence)) or not all( + isinstance(r, str) for r in self.order_by + ): + raise TypeError("Top order_by attributes must all be strings") + if isinstance(self.order_by, str): + self.order_by = [self.order_by] if not isinstance(self.offset, int): raise TypeError("The offset argument must be an integer") if self.offset and self.limit is None: self.limit = 999999999999 # arbitrary large number to allow query - if isinstance(self.order_by, str): - self.order_by = [self.order_by] + + def merge(self, other: "Top") -> "Top": + """ + Merge another Top into this one (when other inherits ordering). + + Used when ``other.order_by`` is None or matches ``self.order_by``. + + Parameters + ---------- + other : Top + The Top to merge. Its order_by should be None or equal to self.order_by. + + Returns + ------- + Top + New Top with merged limit/offset and preserved ordering. + """ + # Compute effective limit (minimum of defined limits) + if self.limit is None and other.limit is None: + new_limit = None + elif self.limit is None: + new_limit = other.limit + elif other.limit is None: + new_limit = self.limit + else: + new_limit = min(self.limit, other.limit) + + return Top( + limit=new_limit, + order_by=self.order_by, # preserve existing ordering + offset=self.offset + other.offset, # offsets add + ) class Not: diff --git a/src/datajoint/expression.py b/src/datajoint/expression.py index feb4a7d97..971a1ee5e 100644 --- a/src/datajoint/expression.py +++ b/src/datajoint/expression.py @@ -132,7 +132,9 @@ def where_clause(self): def sorting_clauses(self): if not self._top: return "" - clause = ", ".join(_wrap_attributes(_flatten_attribute_list(self.primary_key, self._top.order_by))) + # Default to KEY ordering if order_by is None (inherit with no existing order) + order_by = self._top.order_by if self._top.order_by is not None else ["KEY"] + clause = ", ".join(_wrap_attributes(_flatten_attribute_list(self.primary_key, order_by))) if clause: clause = f" ORDER BY {clause}" if self._top.limit is not None: @@ -216,10 +218,18 @@ def restrict(self, restriction, semantic_check=True): """ attributes = set() if isinstance(restriction, Top): - result = ( - self.make_subquery() if self._top and not self._top.__eq__(restriction) else copy.copy(self) - ) # make subquery to avoid overwriting existing Top - result._top = restriction + if self._top is None: + # No existing Top — apply new one directly + result = copy.copy(self) + result._top = restriction + elif restriction.order_by is None or restriction.order_by == self._top.order_by: + # Merge: new Top inherits or matches existing ordering + result = copy.copy(self) + result._top = self._top.merge(restriction) + else: + # Different ordering — need subquery + result = self.make_subquery() + result._top = restriction return result new_condition = make_condition(self, restriction, attributes, semantic_check=semantic_check) if new_condition is True: diff --git a/tests/integration/test_relational_operand.py b/tests/integration/test_relational_operand.py index 3b1586785..32fcc50d2 100644 --- a/tests/integration/test_relational_operand.py +++ b/tests/integration/test_relational_operand.py @@ -627,3 +627,49 @@ def test_top_errors(self, schema_simp_pop): assert "TypeError: Top limit must be an integer" == str(err3.exconly()) assert "TypeError: Top order_by attributes must all be strings" == str(err4.exconly()) assert "TypeError: The offset argument must be an integer" == str(err5.exconly()) + + def test_top_inherit_order(self, schema_simp_pop): + """Test that dj.Top(order_by=None) inherits existing ordering.""" + # First Top sets descending order, second Top inherits it + query = L() & dj.Top(10, "id_l desc") & dj.Top(3, order_by=None) + result = query.to_dicts() + assert len(result) == 3 + # Should be top 3 by descending id_l (L has ids 0-29) + assert result[0]["id_l"] > result[1]["id_l"] > result[2]["id_l"] + assert [r["id_l"] for r in result] == [29, 28, 27] + + def test_top_merge_identical_order(self, schema_simp_pop): + """Test that Tops with identical order_by are merged.""" + # Both Tops specify same ordering - should merge + query = L() & dj.Top(10, "id_l desc") & dj.Top(5, "id_l desc") + result = query.to_dicts() + # Merged limit is min(10, 5) = 5 + assert len(result) == 5 + assert [r["id_l"] for r in result] == [29, 28, 27, 26, 25] + + def test_top_merge_offsets_add(self, schema_simp_pop): + """Test that offsets are added when merging Tops.""" + # First Top: offset 2, second Top: offset 3, inherited order + query = L() & dj.Top(10, "id_l desc", offset=2) & dj.Top(3, order_by=None, offset=3) + result = query.to_dicts() + # Total offset = 2 + 3 = 5, so starts at 6th element (id_l=24) + assert len(result) == 3 + assert [r["id_l"] for r in result] == [24, 23, 22] + + def test_preview_respects_order(self, schema_simp_pop): + """Test that preview (to_arrays with limit) respects Top ordering (issue #1242).""" + # Apply descending order with no limit (None = unlimited) + query = L() & dj.Top(None, order_by="id_l desc") + # Preview should respect the ordering (single attr returns array directly) + id_l = query.to_arrays("id_l", limit=5) + assert list(id_l) == [29, 28, 27, 26, 25] + + def test_top_different_order_subquery(self, schema_simp_pop): + """Test that different orderings create subquery.""" + # First Top: descending, second Top: ascending - cannot merge + query = L() & dj.Top(10, "id_l desc") & dj.Top(3, "id_l asc") + result = query.to_dicts() + # Second Top reorders the result of first Top + # First Top gives ids 29-20, second Top takes lowest 3 of those + assert len(result) == 3 + assert [r["id_l"] for r in result] == [20, 21, 22] diff --git a/tests/unit/test_condition.py b/tests/unit/test_condition.py new file mode 100644 index 000000000..3200e34c4 --- /dev/null +++ b/tests/unit/test_condition.py @@ -0,0 +1,95 @@ +"""Unit tests for condition.py - Top class and merge logic.""" + +import pytest +from datajoint.condition import Top + + +class TestTopMerge: + """Tests for Top.merge() method.""" + + def test_merge_inherits_order(self): + """When other.order_by is None, ordering is inherited.""" + top1 = Top(limit=10, order_by="score desc") + top2 = Top(limit=5, order_by=None) + merged = top1.merge(top2) + assert merged.order_by == ["score desc"] + assert merged.limit == 5 + assert merged.offset == 0 + + def test_merge_limits_take_min(self): + """Merged limit is minimum of both.""" + top1 = Top(limit=10, order_by="id") + top2 = Top(limit=3, order_by=None) + merged = top1.merge(top2) + assert merged.limit == 3 + + # Reverse order + top1 = Top(limit=3, order_by="id") + top2 = Top(limit=10, order_by=None) + merged = top1.merge(top2) + assert merged.limit == 3 + + def test_merge_none_limit_preserved(self): + """None limit (unlimited) is handled correctly.""" + top1 = Top(limit=None, order_by="id") + top2 = Top(limit=5, order_by=None) + merged = top1.merge(top2) + assert merged.limit == 5 + + top1 = Top(limit=5, order_by="id") + top2 = Top(limit=None, order_by=None) + merged = top1.merge(top2) + assert merged.limit == 5 + + top1 = Top(limit=None, order_by="id") + top2 = Top(limit=None, order_by=None) + merged = top1.merge(top2) + assert merged.limit is None + + def test_merge_offsets_add(self): + """Offsets are added together.""" + top1 = Top(limit=10, order_by="id", offset=5) + top2 = Top(limit=3, order_by=None, offset=2) + merged = top1.merge(top2) + assert merged.offset == 7 + + def test_merge_preserves_existing_order(self): + """Merged Top preserves first Top's ordering.""" + top1 = Top(limit=10, order_by=["col1 desc", "col2 asc"]) + top2 = Top(limit=5, order_by=None) + merged = top1.merge(top2) + assert merged.order_by == ["col1 desc", "col2 asc"] + + +class TestTopValidation: + """Tests for Top validation.""" + + def test_order_by_none_allowed(self): + """order_by=None is valid (means inherit).""" + top = Top(limit=5, order_by=None) + assert top.order_by is None + + def test_order_by_string_converted_to_list(self): + """Single string order_by is converted to list.""" + top = Top(order_by="id desc") + assert top.order_by == ["id desc"] + + def test_order_by_list_preserved(self): + """List order_by is preserved.""" + top = Top(order_by=["col1", "col2 desc"]) + assert top.order_by == ["col1", "col2 desc"] + + def test_invalid_limit_type_raises(self): + """Non-integer limit raises TypeError.""" + with pytest.raises(TypeError): + Top(limit="5") + + def test_invalid_order_by_type_raises(self): + """Non-string order_by raises TypeError.""" + with pytest.raises(TypeError): + Top(order_by=123) + + def test_invalid_offset_type_raises(self): + """Non-integer offset raises TypeError.""" + with pytest.raises(TypeError): + Top(offset="1") From c2a2eaec808f0194ba14d429c402c1ca94c9cfe9 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 8 Jan 2026 00:56:44 -0600 Subject: [PATCH 03/17] refactor: improve API consistency for jobs and schema.drop AutoPopulate.jobs: - Changed from @property to descriptor pattern - Now accessible on both class and instance: Analysis.jobs.refresh() # class access Analysis().jobs.refresh() # instance access - Maintains lazy initialization behavior Schema.drop(): - Changed parameter from `force` to `prompt` for clarity - prompt=True: show confirmation prompt - prompt=False: drop without confirmation - prompt=None (default): use dj.config['safemode'] setting - More intuitive: prompt=False means "don't prompt" vs force=True which meant "force without prompt" Co-Authored-By: Claude Opus 4.5 --- src/datajoint/autopopulate.py | 49 ++++++++++++++++++++++------------- src/datajoint/schemas.py | 16 ++++++------ 2 files changed, 39 insertions(+), 26 deletions(-) diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py index 6c4539760..4433f9058 100644 --- a/src/datajoint/autopopulate.py +++ b/src/datajoint/autopopulate.py @@ -93,27 +93,40 @@ class AutoPopulate: _allow_insert = False _jobs = None - @property - def jobs(self) -> Job: - """ - Access the job table for this auto-populated table. + class _JobsDescriptor: + """Descriptor allowing jobs access on both class and instance.""" - The job table (``~~table_name``) is created lazily on first access. - It tracks job status, priority, scheduling, and error information - for distributed populate operations. + def __get__(self, obj, objtype=None): + """ + Access the job table for this auto-populated table. - Returns - ------- - Job - Job management object for this table. - """ - if self._jobs is None: - from .jobs import Job + The job table (``~~table_name``) is created lazily on first access. + It tracks job status, priority, scheduling, and error information + for distributed populate operations. + + Can be accessed on either the class or an instance:: - self._jobs = Job(self) - if not self._jobs.is_declared: - self._jobs.declare() - return self._jobs + # Both work equivalently + Analysis.jobs.refresh() + Analysis().jobs.refresh() + + Returns + ------- + Job + Job management object for this table. + """ + if obj is None: + # Accessed on class - instantiate first + obj = objtype() + if obj._jobs is None: + from .jobs import Job + + obj._jobs = Job(obj) + if not obj._jobs.is_declared: + obj._jobs.declare() + return obj._jobs + + jobs: Job = _JobsDescriptor() def _declare_check(self, primary_key: list[str], fk_attribute_map: dict[str, tuple[str, str]]) -> None: """ diff --git a/src/datajoint/schemas.py b/src/datajoint/schemas.py index 1f038321c..8f7acd19b 100644 --- a/src/datajoint/schemas.py +++ b/src/datajoint/schemas.py @@ -390,27 +390,27 @@ def spawn_missing_classes(self, context: dict[str, Any] | None = None) -> None: self._decorate_table(part_class, context=context, assert_declared=True) setattr(master_class, class_name, part_class) - def drop(self, force: bool = False) -> None: + def drop(self, prompt: bool | None = None) -> None: """ Drop the associated schema and all its tables. Parameters ---------- - force : bool, optional - If True, skip confirmation prompt. Default False. + prompt : bool, optional + If True, show confirmation prompt before dropping. + If False, drop without confirmation. + If None (default), use ``dj.config['safemode']`` setting. Raises ------ AccessError If insufficient permissions to drop the schema. """ + prompt = config["safemode"] if prompt is None else prompt + if not self.exists: logger.info("Schema named `{database}` does not exist. Doing nothing.".format(database=self.database)) - elif ( - not config["safemode"] - or force - or user_choice("Proceed to delete entire schema `%s`?" % self.database, default="no") == "yes" - ): + elif not prompt or user_choice("Proceed to delete entire schema `%s`?" % self.database, default="no") == "yes": logger.debug("Dropping `{database}`.".format(database=self.database)) try: self.connection.query("DROP DATABASE `{database}`".format(database=self.database)) From cabdb744cd547fce3b3f62dd33ad1dd43c25c575 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 8 Jan 2026 01:34:48 -0600 Subject: [PATCH 04/17] refactor(delete): replace force_parts/force_masters with part_integrity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduced unified `part_integrity` parameter for delete and drop operations with policies for master-part integrity: For delete(): - "enforce" (default): Error if parts would be deleted without masters - "ignore": Allow deleting parts without masters (breaks integrity) - "cascade": Also delete masters when parts are deleted For drop(): - "enforce" (default): Error - drop master instead - "ignore": Allow direct drop This replaces the previous boolean parameters: - force_parts=True → part_integrity="ignore" - force_masters=True → part_integrity="cascade" - Part.drop(force=True) → Part.drop(part_integrity="ignore") The same parameter works consistently on both Table and Part classes, providing a cleaner, more intuitive API. Co-Authored-By: Claude Opus 4.5 --- RELEASE_MEMO.md | 117 +++++++++++++++++++++ docs/src/archive/manipulation/delete.md | 6 +- src/datajoint/table.py | 24 +++-- src/datajoint/user_tables.py | 49 ++++++--- tests/integration/test_cascading_delete.py | 8 +- tests/schema_simple.py | 4 +- 6 files changed, 175 insertions(+), 33 deletions(-) create mode 100644 RELEASE_MEMO.md diff --git a/RELEASE_MEMO.md b/RELEASE_MEMO.md new file mode 100644 index 000000000..25fdc6ca0 --- /dev/null +++ b/RELEASE_MEMO.md @@ -0,0 +1,117 @@ +# DataJoint 2.0 Release Memo + +## PyPI Release Process + +### Steps + +1. **Run "Manual Draft Release" workflow** on GitHub Actions +2. **Edit the draft release**: + - Change release name to `Release 2.0.0` + - Change tag to `v2.0.0` +3. **Publish the release** +4. Automation will: + - Update `version.py` to `2.0.0` + - Build and publish to PyPI + - Create PR to merge version update back to master + +### Version Note + +The release drafter computes version from the previous tag (`v0.14.6`), so it would generate `0.14.7` or `0.15.0`. You must **manually edit** the release name to include `2.0.0`. + +The regex on line 42 of `post_draft_release_published.yaml` extracts version from the release name: +```bash +VERSION=$(echo "${{ github.event.release.name }}" | grep -oP '\d+\.\d+\.\d+') +``` + +--- + +## Conda-Forge Release Process + +DataJoint has a [conda-forge feedstock](https://github.com/conda-forge/datajoint-feedstock). + +### How Conda-Forge Updates Work + +Conda-forge has **automated bots** that detect new PyPI releases and create PRs automatically: + +1. **You publish to PyPI** (via the GitHub release workflow) +2. **regro-cf-autotick-bot** detects the new version within ~24 hours +3. **Bot creates a PR** to the feedstock with updated version and hash +4. **Maintainers review and merge** (you're listed as a maintainer) +5. **Package builds automatically** for all platforms + +### Manual Update (if bot doesn't trigger) + +If the bot doesn't create a PR, manually update the feedstock: + +1. **Fork** [conda-forge/datajoint-feedstock](https://github.com/conda-forge/datajoint-feedstock) + +2. **Edit `recipe/meta.yaml`**: + ```yaml + {% set version = "2.0.0" %} + + package: + name: datajoint + version: {{ version }} + + source: + url: https://pypi.io/packages/source/d/datajoint/datajoint-{{ version }}.tar.gz + sha256: + + build: + number: 0 # Reset to 0 for new version + ``` + +3. **Get the SHA256 hash**: + ```bash + curl -sL https://pypi.org/pypi/datajoint/2.0.0/json | jq -r '.urls[] | select(.packagetype=="sdist") | .digests.sha256' + ``` + +4. **Update license** (important for 2.0!): + ```yaml + about: + license: Apache-2.0 # Changed from LGPL-2.1-only + license_file: LICENSE + ``` + +5. **Submit PR** to the feedstock + +### Action Items for 2.0 Release + +1. **First**: Publish to PyPI via GitHub release (name it "Release 2.0.0") +2. **Wait**: ~24 hours for conda-forge bot to detect +3. **Check**: [datajoint-feedstock PRs](https://github.com/conda-forge/datajoint-feedstock/pulls) for auto-PR +4. **Review**: Ensure license changed from LGPL to Apache-2.0 +5. **Merge**: As maintainer, approve and merge the PR + +### Timeline + +| Step | When | +|------|------| +| PyPI release | Day 0 | +| Bot detects & creates PR | Day 0-1 | +| Review & merge PR | Day 1-2 | +| Conda-forge package available | Day 1-2 | + +### Verification + +After release: +```bash +conda search datajoint -c conda-forge +# Should show 2.0.0 +``` + +--- + +## Maintainers + +- @datajointbot +- @dimitri-yatsenko +- @drewyangdev +- @guzman-raphael +- @ttngu207 + +## Links + +- [datajoint-feedstock on GitHub](https://github.com/conda-forge/datajoint-feedstock) +- [datajoint on Anaconda.org](https://anaconda.org/conda-forge/datajoint) +- [datajoint on PyPI](https://pypi.org/project/datajoint/) diff --git a/docs/src/archive/manipulation/delete.md b/docs/src/archive/manipulation/delete.md index 4e34c69ce..e61e8a2b8 100644 --- a/docs/src/archive/manipulation/delete.md +++ b/docs/src/archive/manipulation/delete.md @@ -26,6 +26,6 @@ Entities in a [part table](../design/tables/master-part.md) are usually removed consequence of deleting the master table. To enforce this workflow, calling `delete` directly on a part table produces an error. -In some cases, it may be necessary to override this behavior. -To remove entities from a part table without calling `delete` master, use the argument `force_parts=True`. -To include the corresponding entries in the master table, use the argument `force_masters=True`. +In some cases, it may be necessary to override this behavior using the `part_integrity` parameter: +- `part_integrity="ignore"`: Remove entities from a part table without deleting from master (breaks integrity). +- `part_integrity="cascade"`: Delete from parts and also cascade up to delete the corresponding master entries. diff --git a/src/datajoint/table.py b/src/datajoint/table.py index 17becc49c..7543c385e 100644 --- a/src/datajoint/table.py +++ b/src/datajoint/table.py @@ -813,8 +813,7 @@ def delete( self, transaction: bool = True, prompt: bool | None = None, - force_parts: bool = False, - force_masters: bool = False, + part_integrity: str = "enforce", ) -> int: """ Deletes the contents of the table and its dependent tables, recursively. @@ -825,9 +824,10 @@ def delete( nested within another transaction. prompt: If `True`, show what will be deleted and ask for confirmation. If `False`, delete without confirmation. Default is `dj.config['safemode']`. - force_parts: Delete from parts even when not deleting from their masters. - force_masters: If `True`, include part/master pairs in the cascade. - Default is `False`. + part_integrity: Policy for master-part integrity. One of: + - ``"enforce"`` (default): Error if parts would be deleted without masters. + - ``"ignore"``: Allow deleting parts without masters (breaks integrity). + - ``"cascade"``: Also delete masters when parts are deleted (maintains integrity). Returns: Number of deleted rows (excluding those from dependent tables). @@ -835,8 +835,11 @@ def delete( Raises: DataJointError: Delete exceeds maximum number of delete attempts. DataJointError: When deleting within an existing transaction. - DataJointError: Deleting a part table before its master. + DataJointError: Deleting a part table before its master (when part_integrity="enforce"). + ValueError: Invalid part_integrity value. """ + if part_integrity not in ("enforce", "ignore", "cascade"): + raise ValueError(f"part_integrity must be 'enforce', 'ignore', or 'cascade', got {part_integrity!r}") deleted = set() visited_masters = set() @@ -892,7 +895,7 @@ def cascade(table): master_name = get_master(child.full_table_name) if ( - force_masters + part_integrity == "cascade" and master_name and master_name != table.full_table_name and master_name not in visited_masters @@ -941,15 +944,16 @@ def cascade(table): self.connection.cancel_transaction() raise - if not force_parts: - # Avoid deleting from child before master (See issue #151) + if part_integrity == "enforce": + # Avoid deleting from part before master (See issue #151) for part in deleted: master = get_master(part) if master and master not in deleted: if transaction: self.connection.cancel_transaction() raise DataJointError( - "Attempt to delete part table {part} before deleting from its master {master} first.".format( + "Attempt to delete part table {part} before deleting from its master {master} first. " + "Use part_integrity='ignore' to allow, or part_integrity='cascade' to also delete master.".format( part=part, master=master ) ) diff --git a/src/datajoint/user_tables.py b/src/datajoint/user_tables.py index dc860b52c..96e9839e9 100644 --- a/src/datajoint/user_tables.py +++ b/src/datajoint/user_tables.py @@ -213,32 +213,53 @@ class Part(UserTable, metaclass=PartMeta): + ")" ) - def delete(self, force=False, **kwargs): + def delete(self, part_integrity: str = "enforce", **kwargs): """ Delete from a Part table. Args: - force: If True, allow direct deletion from Part table. - If False (default), raise an error. + part_integrity: Policy for master-part integrity. One of: + - ``"enforce"`` (default): Error - delete from master instead. + - ``"ignore"``: Allow direct deletion (breaks master-part integrity). + - ``"cascade"``: Delete parts AND cascade up to delete master. **kwargs: Additional arguments passed to Table.delete() - (transaction, prompt, force_masters) + (transaction, prompt) Raises: - DataJointError: If force is False (direct Part deletes are prohibited) + DataJointError: If part_integrity="enforce" (direct Part deletes prohibited) """ - if force: - super().delete(force_parts=True, **kwargs) - else: - raise DataJointError("Cannot delete from a Part directly. Delete from master instead") - - def drop(self, force=False): + if part_integrity == "enforce": + raise DataJointError( + "Cannot delete from a Part directly. Delete from master instead, " + "or use part_integrity='ignore' to break integrity, " + "or part_integrity='cascade' to also delete master." + ) + super().delete(part_integrity=part_integrity, **kwargs) + + def drop(self, part_integrity: str = "enforce"): """ - unless force is True, prohibits direct deletes from parts. + Drop a Part table. + + Args: + part_integrity: Policy for master-part integrity. One of: + - ``"enforce"`` (default): Error - drop master instead. + - ``"ignore"``: Allow direct drop (breaks master-part structure). + Note: ``"cascade"`` is not supported for drop (too destructive). + + Raises: + DataJointError: If part_integrity="enforce" (direct Part drops prohibited) """ - if force: + if part_integrity == "ignore": super().drop() + elif part_integrity == "enforce": + raise DataJointError( + "Cannot drop a Part directly. Drop master instead, " + "or use part_integrity='ignore' to force." + ) else: - raise DataJointError("Cannot drop a Part directly. Delete from master instead") + raise ValueError( + f"part_integrity for drop must be 'enforce' or 'ignore', got {part_integrity!r}" + ) def alter(self, prompt=True, context=None): # without context, use declaration context which maps master keyword to master table diff --git a/tests/integration/test_cascading_delete.py b/tests/integration/test_cascading_delete.py index 19d17dc08..28f175bea 100644 --- a/tests/integration/test_cascading_delete.py +++ b/tests/integration/test_cascading_delete.py @@ -38,7 +38,7 @@ def test_delete_tree(schema_simp_pop): def test_stepwise_delete(schema_simp_pop): assert not dj.config["safemode"], "safemode must be off for testing" assert L() and A() and B() and B.C(), "schema population failed" - B.C().delete(force=True) + B.C().delete(part_integrity="ignore") assert not B.C(), "failed to delete child tables" B().delete() assert not B(), "failed to delete from the parent table following child table deletion" @@ -113,19 +113,19 @@ def test_delete_parts_error(schema_simp_pop): """test issue #151""" with pytest.raises(dj.DataJointError): Profile().populate_random() - Website().delete(force_masters=False) + Website().delete(part_integrity="enforce") def test_delete_parts(schema_simp_pop): """test issue #151""" Profile().populate_random() - Website().delete(force_masters=True) + Website().delete(part_integrity="cascade") def test_delete_parts_complex(schema_simp_pop): """test issue #151 with complex master/part. PR #1158.""" prev_len = len(G()) - (A() & "id_a=1").delete(force_masters=True) + (A() & "id_a=1").delete(part_integrity="cascade") assert prev_len - len(G()) == 16, "Failed to delete parts" diff --git a/tests/schema_simple.py b/tests/schema_simple.py index cfa7fd9c4..f0e768d1f 100644 --- a/tests/schema_simple.py +++ b/tests/schema_simple.py @@ -141,9 +141,9 @@ class H(dj.Part): """ class M(dj.Part): - definition = """ # test force_masters revisit + definition = """ # test part_integrity cascade -> E - id_m :int + id_m : uint16 --- -> E.H """ From dacf4ac22dc449c28d0bc267cdb238c2a9579153 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 8 Jan 2026 09:16:34 -0600 Subject: [PATCH 05/17] ci: add MySQL/MinIO services to GitHub Actions workflow - Use MySQL service container for integration tests - Start MinIO manually (requires server command) - Set environment variables for external container mode - Add separate unit-tests job for faster feedback - Test against Python 3.10-3.13 matrix Addresses #1272 Co-Authored-By: Claude Opus 4.5 --- .github/workflows/test.yaml | 102 ++++++++++++++++++++++++++++++------ 1 file changed, 85 insertions(+), 17 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 6267cd6f1..dae639d38 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -1,37 +1,105 @@ name: Test + on: push: branches: - - "**" # every branch - - "!gh-pages" # exclude gh-pages branch - - "!stage*" # exclude branches beginning with stage + - "**" + - "!gh-pages" + - "!stage*" paths: - - "src/datajoint" - - "tests" + - "src/datajoint/**" + - "tests/**" + - "pyproject.toml" + - ".github/workflows/test.yaml" pull_request: branches: - - "**" # every branch - - "!gh-pages" # exclude gh-pages branch - - "!stage*" # exclude branches beginning with stage + - "**" + - "!gh-pages" + - "!stage*" paths: - - "src/datajoint" - - "tests" + - "src/datajoint/**" + - "tests/**" + - "pyproject.toml" + - ".github/workflows/test.yaml" + jobs: test: runs-on: ubuntu-latest strategy: + fail-fast: false matrix: py_ver: ["3.10", "3.11", "3.12", "3.13"] mysql_ver: ["8.0"] + + services: + mysql: + image: mysql:${{ matrix.mysql_ver }} + env: + MYSQL_ROOT_PASSWORD: password + ports: + - 3306:3306 + options: >- + --health-cmd="mysqladmin ping -h localhost" + --health-interval=10s + --health-timeout=5s + --health-retries=5 + steps: - uses: actions/checkout@v4 - - name: Set up Python ${{matrix.py_ver}} + + - name: Start MinIO + run: | + docker run -d --name minio \ + -p 9000:9000 \ + -e MINIO_ROOT_USER=datajoint \ + -e MINIO_ROOT_PASSWORD=datajoint \ + minio/minio:latest server /data + # Wait for MinIO to be ready + for i in {1..30}; do + if curl -sf http://127.0.0.1:9000/minio/health/live; then + echo "MinIO is ready" + break + fi + echo "Waiting for MinIO... ($i/30)" + sleep 2 + done + + - name: Set up Python ${{ matrix.py_ver }} uses: actions/setup-python@v5 with: - python-version: ${{matrix.py_ver}} - - name: Integration test + python-version: ${{ matrix.py_ver }} + + - name: Install dependencies + run: pip install -e ".[test]" + + - name: Run tests env: - MYSQL_VER: ${{matrix.mysql_ver}} - run: | - pip install -e ".[test]" - pytest --cov-report term-missing --cov=datajoint tests + DJ_USE_EXTERNAL_CONTAINERS: "1" + DJ_HOST: 127.0.0.1 + DJ_PORT: 3306 + DJ_USER: root + DJ_PASS: password + S3_ENDPOINT: 127.0.0.1:9000 + S3_ACCESS_KEY: datajoint + S3_SECRET_KEY: datajoint + run: pytest --cov-report term-missing --cov=datajoint tests -v + + # Unit tests run without containers (faster feedback) + unit-tests: + runs-on: ubuntu-latest + strategy: + matrix: + py_ver: ["3.11"] + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.py_ver }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.py_ver }} + + - name: Install dependencies + run: pip install -e ".[test]" + + - name: Run unit tests + run: pytest tests/unit -v From ae5fd68d9c3d892935ce9b52fa50e875fb09ed42 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 8 Jan 2026 09:20:21 -0600 Subject: [PATCH 06/17] style: format user_tables.py Co-Authored-By: Claude Opus 4.5 --- src/datajoint/user_tables.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/datajoint/user_tables.py b/src/datajoint/user_tables.py index 96e9839e9..535276bbd 100644 --- a/src/datajoint/user_tables.py +++ b/src/datajoint/user_tables.py @@ -253,13 +253,10 @@ def drop(self, part_integrity: str = "enforce"): super().drop() elif part_integrity == "enforce": raise DataJointError( - "Cannot drop a Part directly. Drop master instead, " - "or use part_integrity='ignore' to force." + "Cannot drop a Part directly. Drop master instead, " "or use part_integrity='ignore' to force." ) else: - raise ValueError( - f"part_integrity for drop must be 'enforce' or 'ignore', got {part_integrity!r}" - ) + raise ValueError(f"part_integrity for drop must be 'enforce' or 'ignore', got {part_integrity!r}") def alter(self, prompt=True, context=None): # without context, use declaration context which maps master keyword to master table From fbc4cad7f06d030851a95986f9d69090c142665e Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 8 Jan 2026 09:26:57 -0600 Subject: [PATCH 07/17] ci: use docker-compose for test services Use existing docker-compose.yaml instead of duplicating service configuration in the workflow. Co-Authored-By: Claude Opus 4.5 --- .github/workflows/test.yaml | 39 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index dae639d38..a7825f1be 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -10,6 +10,7 @@ on: - "src/datajoint/**" - "tests/**" - "pyproject.toml" + - "docker-compose.yaml" - ".github/workflows/test.yaml" pull_request: branches: @@ -20,6 +21,7 @@ on: - "src/datajoint/**" - "tests/**" - "pyproject.toml" + - "docker-compose.yaml" - ".github/workflows/test.yaml" jobs: @@ -31,38 +33,13 @@ jobs: py_ver: ["3.10", "3.11", "3.12", "3.13"] mysql_ver: ["8.0"] - services: - mysql: - image: mysql:${{ matrix.mysql_ver }} - env: - MYSQL_ROOT_PASSWORD: password - ports: - - 3306:3306 - options: >- - --health-cmd="mysqladmin ping -h localhost" - --health-interval=10s - --health-timeout=5s - --health-retries=5 - steps: - uses: actions/checkout@v4 - - name: Start MinIO - run: | - docker run -d --name minio \ - -p 9000:9000 \ - -e MINIO_ROOT_USER=datajoint \ - -e MINIO_ROOT_PASSWORD=datajoint \ - minio/minio:latest server /data - # Wait for MinIO to be ready - for i in {1..30}; do - if curl -sf http://127.0.0.1:9000/minio/health/live; then - echo "MinIO is ready" - break - fi - echo "Waiting for MinIO... ($i/30)" - sleep 2 - done + - name: Start services + run: docker compose up -d db minio --wait + env: + MYSQL_VER: ${{ matrix.mysql_ver }} - name: Set up Python ${{ matrix.py_ver }} uses: actions/setup-python@v5 @@ -84,6 +61,10 @@ jobs: S3_SECRET_KEY: datajoint run: pytest --cov-report term-missing --cov=datajoint tests -v + - name: Stop services + if: always() + run: docker compose down + # Unit tests run without containers (faster feedback) unit-tests: runs-on: ubuntu-latest From d778e4f594a57e569de7a74656b0eaf51945f772 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 8 Jan 2026 09:32:48 -0600 Subject: [PATCH 08/17] ci: install graphviz for ERD tests Co-Authored-By: Claude Opus 4.5 --- .github/workflows/test.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index a7825f1be..f9b25cbe9 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -36,6 +36,9 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Install system dependencies + run: sudo apt-get update && sudo apt-get install -y graphviz + - name: Start services run: docker compose up -d db minio --wait env: From 4f4a92483a22c2c4a8db11aaf7f183697d433fa0 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 8 Jan 2026 09:52:29 -0600 Subject: [PATCH 09/17] fix(jobs): use MySQL server time consistently for all scheduling - Use MySQL NOW() instead of Python datetime.now() for consistent timing - Jobs refresh: use CURRENT_TIMESTAMP default for scheduled_time (delay=0) - Jobs reserve: use MySQL NOW() for scheduled_time comparison and reserved_time - Jobs complete/error: use MySQL NOW() for completed_time - Stale/orphan timeout checks: use MySQL NOW() - INTERVAL syntax This fixes timezone mismatches between Python and MySQL that caused jobs to not be found in CI environments. Co-Authored-By: Claude Opus 4.5 --- src/datajoint/jobs.py | 49 ++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/src/datajoint/jobs.py b/src/datajoint/jobs.py index 57533b8f3..15add43e8 100644 --- a/src/datajoint/jobs.py +++ b/src/datajoint/jobs.py @@ -348,7 +348,7 @@ def refresh( 3. Remove stale jobs: jobs older than stale_timeout whose keys not in key_source 4. Remove orphaned jobs: reserved jobs older than orphan_timeout (if specified) """ - from datetime import datetime, timedelta + from datetime import timedelta from .settings import config @@ -363,7 +363,6 @@ def refresh( stale_timeout = config.jobs.stale_timeout result = {"added": 0, "removed": 0, "orphaned": 0, "re_pended": 0} - now = datetime.now() # 1. Add new jobs key_source = self._target.key_source @@ -378,14 +377,21 @@ def refresh( new_key_list = new_keys.keys() if new_key_list: - scheduled_time = now + timedelta(seconds=delay) if delay > 0 else now + # Get MySQL server time for delayed scheduling (delay > 0 only) + # When delay == 0, omit scheduled_time to use MySQL's CURRENT_TIMESTAMP default + scheduled_time = None + if delay > 0: + server_now = self.connection.query("SELECT NOW()").fetchone()[0] + scheduled_time = server_now + timedelta(seconds=delay) + for key in new_key_list: job_entry = { **key, "status": "pending", "priority": priority, - "scheduled_time": scheduled_time, } + if scheduled_time is not None: + job_entry["scheduled_time"] = scheduled_time try: self.insert1(job_entry, ignore_extra_fields=True) result["added"] += 1 @@ -403,10 +409,9 @@ def refresh( self.insert1({**key, "status": "pending", "priority": priority}) result["re_pended"] += 1 - # 3. Remove stale jobs (not ignore status) + # 3. Remove stale jobs (not ignore status) - use MySQL NOW() for consistent timing if stale_timeout > 0: - stale_cutoff = now - timedelta(seconds=stale_timeout) - old_jobs = self & f'created_time < "{stale_cutoff}"' & 'status != "ignore"' + old_jobs = self & f"created_time < NOW() - INTERVAL {stale_timeout} SECOND" & 'status != "ignore"' for key in old_jobs.keys(): # Check if key still in key_source @@ -414,10 +419,9 @@ def refresh( (self & key).delete_quick() result["removed"] += 1 - # 4. Handle orphaned reserved jobs + # 4. Handle orphaned reserved jobs - use MySQL NOW() for consistent timing if orphan_timeout is not None and orphan_timeout > 0: - orphan_cutoff = now - timedelta(seconds=orphan_timeout) - orphaned_jobs = self.reserved & f'reserved_time < "{orphan_cutoff}"' + orphaned_jobs = self.reserved & f"reserved_time < NOW() - INTERVAL {orphan_timeout} SECOND" for key in orphaned_jobs.keys(): (self & key).delete_quick() @@ -443,21 +447,21 @@ def reserve(self, key: dict) -> bool: bool True if reservation successful, False if job not available. """ - from datetime import datetime - - # Check if job is pending and scheduled - now = datetime.now() - job = (self & key & 'status="pending"' & f'scheduled_time <= "{now}"').to_dicts() + # Check if job is pending and scheduled (use MySQL NOW() for consistent timing) + job = (self & key & 'status="pending"' & "scheduled_time <= NOW()").to_dicts() if not job: return False + # Get MySQL server time for reserved_time + server_now = self.connection.query("SELECT NOW()").fetchone()[0] + # Build update row with primary key and new values pk = self._get_pk(key) update_row = { **pk, "status": "reserved", - "reserved_time": now, + "reserved_time": server_now, "host": platform.node(), "pid": os.getpid(), "connection_id": self.connection.connection_id, @@ -489,16 +493,16 @@ def complete(self, key: dict, duration: float | None = None) -> None: - If True: updates status to ``'success'`` with completion time and duration - If False: deletes the job entry """ - from datetime import datetime - from .settings import config if config.jobs.keep_completed: + # Use MySQL server time for completed_time + server_now = self.connection.query("SELECT NOW()").fetchone()[0] pk = self._get_pk(key) update_row = { **pk, "status": "success", - "completed_time": datetime.now(), + "completed_time": server_now, } if duration is not None: update_row["duration"] = duration @@ -519,16 +523,17 @@ def error(self, key: dict, error_message: str, error_stack: str | None = None) - error_stack : str, optional Full stack trace. """ - from datetime import datetime - if len(error_message) > ERROR_MESSAGE_LENGTH: error_message = error_message[: ERROR_MESSAGE_LENGTH - len(TRUNCATION_APPENDIX)] + TRUNCATION_APPENDIX + # Use MySQL server time for completed_time + server_now = self.connection.query("SELECT NOW()").fetchone()[0] + pk = self._get_pk(key) update_row = { **pk, "status": "error", - "completed_time": datetime.now(), + "completed_time": server_now, "error_message": error_message, } if error_stack is not None: From 344de9b48445dd9a291b7bf990c0bbfa704af608 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 8 Jan 2026 09:59:52 -0600 Subject: [PATCH 10/17] fix(jobs): use NOW(3) to match CURRENT_TIMESTAMP(3) precision The scheduled_time column uses CURRENT_TIMESTAMP(3) with milliseconds, so comparisons must also use NOW(3) for consistent precision. Co-Authored-By: Claude Opus 4.5 --- src/datajoint/autopopulate.py | 4 ++-- src/datajoint/jobs.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py index 4433f9058..80c669079 100644 --- a/src/datajoint/autopopulate.py +++ b/src/datajoint/autopopulate.py @@ -487,8 +487,8 @@ def handler(signum, frame): if refresh: self.jobs.refresh(*restrictions, priority=priority) - # Fetch pending jobs ordered by priority - pending_query = self.jobs.pending & "scheduled_time <= NOW()" + # Fetch pending jobs ordered by priority (use NOW(3) to match CURRENT_TIMESTAMP(3) precision) + pending_query = self.jobs.pending & "scheduled_time <= NOW(3)" if priority is not None: pending_query = pending_query & f"priority <= {priority}" diff --git a/src/datajoint/jobs.py b/src/datajoint/jobs.py index 15add43e8..d453e825e 100644 --- a/src/datajoint/jobs.py +++ b/src/datajoint/jobs.py @@ -447,8 +447,8 @@ def reserve(self, key: dict) -> bool: bool True if reservation successful, False if job not available. """ - # Check if job is pending and scheduled (use MySQL NOW() for consistent timing) - job = (self & key & 'status="pending"' & "scheduled_time <= NOW()").to_dicts() + # Check if job is pending and scheduled (use NOW(3) to match CURRENT_TIMESTAMP(3) precision) + job = (self & key & 'status="pending"' & "scheduled_time <= NOW(3)").to_dicts() if not job: return False From 21004876824f02719a90283cfba65ebd16961326 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 8 Jan 2026 10:08:43 -0600 Subject: [PATCH 11/17] refactor(jobs): always use NOW(3) + INTERVAL for scheduled_time Simplify by always computing scheduled_time with MySQL server time, removing the special case for delay=0. Co-Authored-By: Claude Opus 4.5 --- src/datajoint/jobs.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/datajoint/jobs.py b/src/datajoint/jobs.py index d453e825e..7be80a0e5 100644 --- a/src/datajoint/jobs.py +++ b/src/datajoint/jobs.py @@ -348,8 +348,6 @@ def refresh( 3. Remove stale jobs: jobs older than stale_timeout whose keys not in key_source 4. Remove orphaned jobs: reserved jobs older than orphan_timeout (if specified) """ - from datetime import timedelta - from .settings import config # Ensure jobs table exists @@ -377,21 +375,16 @@ def refresh( new_key_list = new_keys.keys() if new_key_list: - # Get MySQL server time for delayed scheduling (delay > 0 only) - # When delay == 0, omit scheduled_time to use MySQL's CURRENT_TIMESTAMP default - scheduled_time = None - if delay > 0: - server_now = self.connection.query("SELECT NOW()").fetchone()[0] - scheduled_time = server_now + timedelta(seconds=delay) + # Always use MySQL server time for scheduling (NOW(3) matches datetime(3) precision) + scheduled_time = self.connection.query(f"SELECT NOW(3) + INTERVAL {delay} SECOND").fetchone()[0] for key in new_key_list: job_entry = { **key, "status": "pending", "priority": priority, + "scheduled_time": scheduled_time, } - if scheduled_time is not None: - job_entry["scheduled_time"] = scheduled_time try: self.insert1(job_entry, ignore_extra_fields=True) result["added"] += 1 From 1fdfb3ea2f2ad9c7a5cff7a345262774c2877320 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 8 Jan 2026 10:18:40 -0600 Subject: [PATCH 12/17] ci: use pixi for CI workflow - Use setup-pixi action instead of manual setup - Graphviz installed automatically via pixi conda dependency - Testcontainers manages MySQL/MinIO containers automatically - No manual pip install needed, pixi handles dependencies Addresses reviewer feedback on PR #1312. Co-Authored-By: Claude Opus 4.5 --- .github/workflows/test.yaml | 56 +++++++------------------------------ 1 file changed, 10 insertions(+), 46 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f9b25cbe9..34bf40d7a 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -10,7 +10,7 @@ on: - "src/datajoint/**" - "tests/**" - "pyproject.toml" - - "docker-compose.yaml" + - "pixi.lock" - ".github/workflows/test.yaml" pull_request: branches: @@ -21,69 +21,33 @@ on: - "src/datajoint/**" - "tests/**" - "pyproject.toml" - - "docker-compose.yaml" + - "pixi.lock" - ".github/workflows/test.yaml" jobs: test: runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - py_ver: ["3.10", "3.11", "3.12", "3.13"] - mysql_ver: ["8.0"] - steps: - uses: actions/checkout@v4 - - name: Install system dependencies - run: sudo apt-get update && sudo apt-get install -y graphviz - - - name: Start services - run: docker compose up -d db minio --wait - env: - MYSQL_VER: ${{ matrix.mysql_ver }} - - - name: Set up Python ${{ matrix.py_ver }} - uses: actions/setup-python@v5 + - name: Set up pixi + uses: prefix-dev/setup-pixi@v0.9.3 with: - python-version: ${{ matrix.py_ver }} - - - name: Install dependencies - run: pip install -e ".[test]" + cache: true - name: Run tests - env: - DJ_USE_EXTERNAL_CONTAINERS: "1" - DJ_HOST: 127.0.0.1 - DJ_PORT: 3306 - DJ_USER: root - DJ_PASS: password - S3_ENDPOINT: 127.0.0.1:9000 - S3_ACCESS_KEY: datajoint - S3_SECRET_KEY: datajoint - run: pytest --cov-report term-missing --cov=datajoint tests -v - - - name: Stop services - if: always() - run: docker compose down + run: pixi run -e test test-cov # Unit tests run without containers (faster feedback) unit-tests: runs-on: ubuntu-latest - strategy: - matrix: - py_ver: ["3.11"] steps: - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.py_ver }} - uses: actions/setup-python@v5 + - name: Set up pixi + uses: prefix-dev/setup-pixi@v0.9.3 with: - python-version: ${{ matrix.py_ver }} - - - name: Install dependencies - run: pip install -e ".[test]" + cache: true - name: Run unit tests - run: pytest tests/unit -v + run: pixi run -e test pytest tests/unit -v From 307983aed9ae2f5f25b1707108a5f1f7244bf7f9 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 8 Jan 2026 10:24:39 -0600 Subject: [PATCH 13/17] docs: update developer guide to use pixi as primary toolchain - Make pixi the recommended development setup (matches CI) - Add DOCKER_HOST note for macOS Docker Desktop users - Keep pip as alternative for users who prefer it - Update pre-commit commands to use pixi Co-Authored-By: Claude Opus 4.5 --- README.md | 73 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 85c3269e7..40a1a2c7c 100644 --- a/README.md +++ b/README.md @@ -100,23 +100,25 @@ Scientific data includes both structured metadata and large data objects (time s ### Prerequisites - [Docker](https://docs.docker.com/get-docker/) (Docker daemon must be running) -- Python 3.10+ +- [pixi](https://pixi.sh) (recommended) or Python 3.10+ -### Quick Start +### Quick Start with pixi (Recommended) + +[pixi](https://pixi.sh) manages all dependencies including Python, graphviz, and test tools: ```bash -# Clone and install +# Clone the repo git clone https://github.com/datajoint/datajoint-python.git cd datajoint-python -pip install -e ".[test]" -# Run all tests (containers start automatically via testcontainers) -pytest tests/ +# Install dependencies and run tests (containers managed by testcontainers) +pixi run test -# Install and run pre-commit hooks -pip install pre-commit -pre-commit install -pre-commit run --all-files +# Run with coverage +pixi run test-cov + +# Run pre-commit hooks +pixi run pre-commit run --all-files ``` ### Running Tests @@ -126,16 +128,30 @@ Tests use [testcontainers](https://testcontainers.com/) to automatically manage ```bash # Run all tests (recommended) -pytest tests/ +pixi run test # Run with coverage report -pytest --cov-report term-missing --cov=datajoint tests/ +pixi run test-cov + +# Run only unit tests (no containers needed) +pixi run -e test pytest tests/unit/ # Run specific test file -pytest tests/integration/test_blob.py -v +pixi run -e test pytest tests/integration/test_blob.py -v +``` -# Run only unit tests (no containers needed) -pytest tests/unit/ +**macOS Docker Desktop users:** If tests fail to connect to Docker, set `DOCKER_HOST`: +```bash +export DOCKER_HOST=unix://$HOME/.docker/run/docker.sock +``` + +### Alternative: Using pip + +If you prefer pip over pixi: + +```bash +pip install -e ".[test]" +pytest tests/ ``` ### Alternative: External Containers @@ -147,7 +163,8 @@ For development/debugging, you may prefer persistent containers that survive tes docker compose up -d db minio # Run tests using external containers -DJ_USE_EXTERNAL_CONTAINERS=1 pytest tests/ +DJ_USE_EXTERNAL_CONTAINERS=1 pixi run test +# Or with pip: DJ_USE_EXTERNAL_CONTAINERS=1 pytest tests/ # Stop containers when done docker compose down @@ -161,15 +178,6 @@ Run tests entirely in Docker (no local Python needed): docker compose --profile test up djtest --build ``` -### Alternative: Using pixi - -[pixi](https://pixi.sh) users can run tests with: - -```bash -pixi install # First time setup -pixi run test # Runs tests (testcontainers manages containers) -``` - ### Pre-commit Hooks Pre-commit hooks run automatically on `git commit` to check code quality. @@ -177,15 +185,14 @@ Pre-commit hooks run automatically on `git commit` to check code quality. ```bash # Install hooks (first time only) -pip install pre-commit -pre-commit install +pixi run pre-commit install +# Or with pip: pip install pre-commit && pre-commit install # Run all checks manually -pre-commit run --all-files +pixi run pre-commit run --all-files # Run specific hook -pre-commit run ruff --all-files -pre-commit run codespell --all-files +pixi run pre-commit run ruff --all-files ``` Hooks include: @@ -196,9 +203,9 @@ Hooks include: ### Before Submitting a PR -1. **Run all tests**: `pytest tests/` -2. **Run pre-commit**: `pre-commit run --all-files` -3. **Check coverage**: `pytest --cov-report term-missing --cov=datajoint tests/` +1. **Run all tests**: `pixi run test` +2. **Run pre-commit**: `pixi run pre-commit run --all-files` +3. **Check coverage**: `pixi run test-cov` ### Environment Variables From 272fcb5be91fd77e994a9f5d45a54b5ecef7aeed Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 8 Jan 2026 10:27:57 -0600 Subject: [PATCH 14/17] ci: disable locked mode for pixi install Allow lock file updates in CI since the lock file may be stale. Co-Authored-By: Claude Opus 4.5 --- .github/workflows/test.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 34bf40d7a..a4a91448f 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -34,6 +34,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.3 with: cache: true + locked: false - name: Run tests run: pixi run -e test test-cov @@ -48,6 +49,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.3 with: cache: true + locked: false - name: Run unit tests run: pixi run -e test pytest tests/unit -v From b8645f826127421e4ac71000407b4544c41da8ba Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 8 Jan 2026 10:33:08 -0600 Subject: [PATCH 15/17] fix(pixi): add test extras to feature-specific pypi-dependencies The test feature was not including s3fs and other test dependencies because the feature-specific pypi-dependencies were missing. Co-Authored-By: Claude Opus 4.5 --- pyproject.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index f3eee2313..de29b88a9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -180,6 +180,12 @@ platforms = ["linux-64", "osx-arm64", "linux-aarch64"] [tool.pixi.pypi-dependencies] datajoint = { path = ".", editable = true } +[tool.pixi.feature.test.pypi-dependencies] +datajoint = { path = ".", editable = true, extras = ["test"] } + +[tool.pixi.feature.dev.pypi-dependencies] +datajoint = { path = ".", editable = true, extras = ["dev", "test"] } + [tool.pixi.environments] default = { solve-group = "default" } dev = { features = ["dev"], solve-group = "default" } From 27391c764d80d0e945b0c76a6205ae015d598037 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 8 Jan 2026 11:53:12 -0600 Subject: [PATCH 16/17] feat: add mypy type checking to pre-commit - Add mypy configuration to pyproject.toml - Add mypy hook to pre-commit with type stubs - Start with lenient settings, strict checking for content_registry - All other modules excluded until fully typed (gradual adoption) Addresses #1266. Co-Authored-By: Claude Opus 4.5 --- .pre-commit-config.yaml | 15 +++++++++--- pyproject.toml | 54 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6c38487d2..3ea96126f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -36,6 +36,15 @@ repos: hooks: # lint github actions workflow yaml - id: actionlint - -## Suggest to add pytest hook that runs unit test | Prerequisite: split unit/integration test -## https://github.com/datajoint/datajoint-python/issues/1211 +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.14.1 + hooks: + - id: mypy + files: ^src/datajoint/ + additional_dependencies: + - pydantic + - pydantic-settings + - types-PyMySQL + - types-tqdm + - pandas-stubs + - numpy diff --git a/pyproject.toml b/pyproject.toml index de29b88a9..ac3af18df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -152,6 +152,60 @@ quote-style = "double" indent-style = "space" line-ending = "auto" +[tool.mypy] +python_version = "3.10" +ignore_missing_imports = true +# Start with lenient settings, gradually enable stricter checks +warn_return_any = false +warn_unused_ignores = false +disallow_untyped_defs = false +disallow_incomplete_defs = false +check_untyped_defs = true + +# Modules with complete type coverage - strict checking enabled +[[tool.mypy.overrides]] +module = [ + "datajoint.content_registry", +] +disallow_untyped_defs = true +disallow_incomplete_defs = true +warn_return_any = true + +# Modules excluded from type checking until fully typed +[[tool.mypy.overrides]] +module = [ + "datajoint.admin", + "datajoint.autopopulate", + "datajoint.blob", + "datajoint.builtin_codecs", + "datajoint.cli", + "datajoint.codecs", + "datajoint.condition", + "datajoint.connection", + "datajoint.declare", + "datajoint.dependencies", + "datajoint.diagram", + "datajoint.errors", + "datajoint.expression", + "datajoint.gc", + "datajoint.hash", + "datajoint.heading", + "datajoint.jobs", + "datajoint.lineage", + "datajoint.logging", + "datajoint.migrate", + "datajoint.objectref", + "datajoint.preview", + "datajoint.schemas", + "datajoint.settings", + "datajoint.staged_insert", + "datajoint.storage", + "datajoint.table", + "datajoint.user_tables", + "datajoint.utils", +] +ignore_errors = true + [tool.setuptools] packages = ["datajoint"] package-dir = {"" = "src"} From f195110a7f64b3dedcd0d7b426ede65ff98026e9 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 8 Jan 2026 12:00:35 -0600 Subject: [PATCH 17/17] feat: add unit tests to pre-commit hooks Run unit tests locally before commit to catch issues before CI. Addresses feedback from @drewyangdev on #1211. Co-Authored-By: Claude Opus 4.5 --- .pre-commit-config.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3ea96126f..f2bd59004 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -48,3 +48,12 @@ repos: - types-tqdm - pandas-stubs - numpy +- repo: local + hooks: + - id: unit-tests + name: unit tests + entry: pytest tests/unit/ -v --tb=short + language: system + pass_filenames: false + always_run: true + stages: [pre-commit]