diff --git a/changes/3702.bugfix.md b/changes/3702.bugfix.md new file mode 100644 index 0000000000..94a2902567 --- /dev/null +++ b/changes/3702.bugfix.md @@ -0,0 +1 @@ +Skip chunk coordinate enumeration in resize when the array is only growing, avoiding unbounded memory usage for large arrays. \ No newline at end of file diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 7abb0075dc..564d0e915a 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -5990,7 +5990,10 @@ async def _resize( assert len(new_shape) == len(array.metadata.shape) new_metadata = array.metadata.update_shape(new_shape) - if delete_outside_chunks: + # ensure deletion is only run if array is shrinking as the delete_outside_chunks path is unbounded in memory + only_growing = all(new >= old for new, old in zip(new_shape, array.metadata.shape, strict=True)) + + if delete_outside_chunks and not only_growing: # Remove all chunks outside of the new shape old_chunk_coords = set(array.metadata.chunk_grid.all_chunk_coords(array.metadata.shape)) new_chunk_coords = set(array.metadata.chunk_grid.all_chunk_coords(new_shape)) diff --git a/tests/test_array.py b/tests/test_array.py index b7d7bc723d..01a82e1938 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -781,6 +781,73 @@ def test_resize_2d(store: MemoryStore, zarr_format: ZarrFormat) -> None: assert new_shape == result.shape +@pytest.mark.parametrize("store", ["memory"], indirect=True) +def test_resize_growing_skips_chunk_enumeration( + store: MemoryStore, zarr_format: ZarrFormat +) -> None: + """Growing an array should not enumerate chunk coords for deletion (#3650 mitigation).""" + from zarr.core.chunk_grids import RegularChunkGrid + + z = zarr.create( + shape=(10, 10), + chunks=(5, 5), + dtype="i4", + fill_value=0, + store=store, + zarr_format=zarr_format, + ) + z[:] = np.ones((10, 10), dtype="i4") + + # growth only - ensure no chunk coords are enumerated + with mock.patch.object( + RegularChunkGrid, + "all_chunk_coords", + wraps=z.metadata.chunk_grid.all_chunk_coords, + ) as mock_coords: + z.resize((20, 20)) + mock_coords.assert_not_called() + + assert z.shape == (20, 20) + np.testing.assert_array_equal(np.ones((10, 10), dtype="i4"), z[:10, :10]) + np.testing.assert_array_equal(np.zeros((10, 10), dtype="i4"), z[10:, 10:]) + + # shrink - ensure no regression of behaviour + with mock.patch.object( + RegularChunkGrid, + "all_chunk_coords", + wraps=z.metadata.chunk_grid.all_chunk_coords, + ) as mock_coords: + z.resize((5, 5)) + assert mock_coords.call_count > 0 + + assert z.shape == (5, 5) + np.testing.assert_array_equal(np.ones((5, 5), dtype="i4"), z[:]) + + # mixed: grow dim 0, shrink dim 1 - ensure deletion path runs + z2 = zarr.create( + shape=(10, 10), + chunks=(5, 5), + dtype="i4", + fill_value=0, + store=store, + zarr_format=zarr_format, + overwrite=True, + ) + z2[:] = np.ones((10, 10), dtype="i4") + + with mock.patch.object( + RegularChunkGrid, + "all_chunk_coords", + wraps=z2.metadata.chunk_grid.all_chunk_coords, + ) as mock_coords: + z2.resize((20, 5)) + assert mock_coords.call_count > 0 + + assert z2.shape == (20, 5) + np.testing.assert_array_equal(np.ones((10, 5), dtype="i4"), z2[:10, :]) + np.testing.assert_array_equal(np.zeros((10, 5), dtype="i4"), z2[10:, :]) + + @pytest.mark.parametrize("store", ["memory"], indirect=True) def test_append_1d(store: MemoryStore, zarr_format: ZarrFormat) -> None: a = np.arange(105)