Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/3702.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Skip chunk coordinate enumeration in resize when the array is only growing, avoiding unbounded memory usage for large arrays.
5 changes: 4 additions & 1 deletion src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -5990,7 +5990,10 @@ async def _resize(
assert len(new_shape) == len(array.metadata.shape)
new_metadata = array.metadata.update_shape(new_shape)

if delete_outside_chunks:
# ensure deletion is only run if array is shrinking as the delete_outside_chunks path is unbounded in memory
only_growing = all(new >= old for new, old in zip(new_shape, array.metadata.shape, strict=True))

if delete_outside_chunks and not only_growing:
# Remove all chunks outside of the new shape
old_chunk_coords = set(array.metadata.chunk_grid.all_chunk_coords(array.metadata.shape))
new_chunk_coords = set(array.metadata.chunk_grid.all_chunk_coords(new_shape))
Expand Down
67 changes: 67 additions & 0 deletions tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,73 @@ def test_resize_2d(store: MemoryStore, zarr_format: ZarrFormat) -> None:
assert new_shape == result.shape


@pytest.mark.parametrize("store", ["memory"], indirect=True)
def test_resize_growing_skips_chunk_enumeration(
store: MemoryStore, zarr_format: ZarrFormat
) -> None:
"""Growing an array should not enumerate chunk coords for deletion (#3650 mitigation)."""
from zarr.core.chunk_grids import RegularChunkGrid

z = zarr.create(
shape=(10, 10),
chunks=(5, 5),
dtype="i4",
fill_value=0,
store=store,
zarr_format=zarr_format,
)
z[:] = np.ones((10, 10), dtype="i4")

# growth only - ensure no chunk coords are enumerated
with mock.patch.object(
RegularChunkGrid,
"all_chunk_coords",
wraps=z.metadata.chunk_grid.all_chunk_coords,
) as mock_coords:
z.resize((20, 20))
mock_coords.assert_not_called()

assert z.shape == (20, 20)
np.testing.assert_array_equal(np.ones((10, 10), dtype="i4"), z[:10, :10])
np.testing.assert_array_equal(np.zeros((10, 10), dtype="i4"), z[10:, 10:])

# shrink - ensure no regression of behaviour
with mock.patch.object(
RegularChunkGrid,
"all_chunk_coords",
wraps=z.metadata.chunk_grid.all_chunk_coords,
) as mock_coords:
z.resize((5, 5))
assert mock_coords.call_count > 0

assert z.shape == (5, 5)
np.testing.assert_array_equal(np.ones((5, 5), dtype="i4"), z[:])

# mixed: grow dim 0, shrink dim 1 - ensure deletion path runs
z2 = zarr.create(
shape=(10, 10),
chunks=(5, 5),
dtype="i4",
fill_value=0,
store=store,
zarr_format=zarr_format,
overwrite=True,
)
z2[:] = np.ones((10, 10), dtype="i4")

with mock.patch.object(
RegularChunkGrid,
"all_chunk_coords",
wraps=z2.metadata.chunk_grid.all_chunk_coords,
) as mock_coords:
z2.resize((20, 5))
assert mock_coords.call_count > 0

assert z2.shape == (20, 5)
np.testing.assert_array_equal(np.ones((10, 5), dtype="i4"), z2[:10, :])
np.testing.assert_array_equal(np.zeros((10, 5), dtype="i4"), z2[10:, :])


@pytest.mark.parametrize("store", ["memory"], indirect=True)
def test_append_1d(store: MemoryStore, zarr_format: ZarrFormat) -> None:
a = np.arange(105)
Expand Down