From 672d23bc9c8dff0c914fb4030a9adf5722cb4bc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc=20Serf=C5=91z=C5=91?= Date: Tue, 18 Nov 2025 14:06:44 +0100 Subject: [PATCH 1/9] Iterative search method for memory estimator with test --- httomo/method_wrappers/generic.py | 40 +++++++++++ httomo/runner/methods_repository_interface.py | 6 ++ tests/method_wrappers/test_generic.py | 69 ++++++++++++++++++- 3 files changed, 114 insertions(+), 1 deletion(-) diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py index 1aa59deaf..90e882b98 100644 --- a/httomo/method_wrappers/generic.py +++ b/httomo/method_wrappers/generic.py @@ -447,6 +447,8 @@ def calculate_max_slices( * np.prod(non_slice_dims_shape) * data_dtype.itemsize ) + elif self.memory_gpu.method == "iterative": + return self._calculate_max_slices_iterative(data_dtype, non_slice_dims_shape, available_memory), available_memory else: ( memory_bytes_method, @@ -462,6 +464,44 @@ def calculate_max_slices( available_memory - subtract_bytes ) // memory_bytes_method, available_memory + def _calculate_max_slices_iterative(self, data_dtype: np.dtype, non_slice_dims_shape: Tuple[int, int], available_memory: int) -> int: + def get_mem_bytes(current_slices): + memory_bytes, _ = self._query.calculate_memory_bytes_for_slices(dims_shape=(current_slices, non_slice_dims_shape[0], non_slice_dims_shape[1]), + dtype=data_dtype, + **self._unwrap_output_ref_values()) + return memory_bytes + + # Find a number of slices that does not fit + current_slices = 100 + slices_high = None + memory_bytes = get_mem_bytes(current_slices) + if memory_bytes > available_memory: + slices_high = current_slices + else: + # linear approximation + current_slices = (available_memory + memory_bytes - 1) // memory_bytes + while True: + memory_bytes = get_mem_bytes(current_slices) + if memory_bytes > available_memory: + break + # If linear approximation is not enough, just double every iteration + current_slices *= 2 + slices_high = current_slices + + # Binary search between low and high + slices_low = 0 + while slices_high - slices_low > 1: + current_slices = (slices_low + slices_high) // 2 + memory_bytes = get_mem_bytes(current_slices) + if memory_bytes > available_memory: + slices_high = current_slices + elif memory_bytes < available_memory: + slices_low = current_slices + else: # memory_bytes == available_memory + return current_slices + + return slices_low + def _unwrap_output_ref_values(self) -> Dict[str, Any]: """ Iterate through params in `self.config_params` and, for any value of type `OutputRef`, diff --git a/httomo/runner/methods_repository_interface.py b/httomo/runner/methods_repository_interface.py index 88be412b8..2fe70469d 100644 --- a/httomo/runner/methods_repository_interface.py +++ b/httomo/runner/methods_repository_interface.py @@ -50,6 +50,12 @@ def calculate_memory_bytes( """Calculate the memory required in bytes, returning bytes method and subtract bytes tuple""" ... # pragma: no cover + def calculate_memory_bytes_for_slices( + self, dims_shape: Tuple[int, int, int], dtype: np.dtype, **kwargs + ) -> Tuple[int, int]: + """Calculate the memory required in bytes, returning bytes method and subtract bytes tuple""" + ... # pragma: no cover + def calculate_output_dims( self, non_slice_dims_shape: Tuple[int, int], **kwargs ) -> Tuple[int, int]: diff --git a/tests/method_wrappers/test_generic.py b/tests/method_wrappers/test_generic.py index e62fe0425..ef8876db2 100644 --- a/tests/method_wrappers/test_generic.py +++ b/tests/method_wrappers/test_generic.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Union +from typing import Callable, List, Optional, Union import numpy as np from httomo.method_wrappers import make_method_wrapper @@ -688,6 +688,73 @@ def test_method(data): assert max_slices > dummy_block.chunk_shape[0] assert available_memory == 1_000_000_000 +def _linear_mem(*args, **kwargs): + proj, x, y = kwargs['dims_shape'] + dtype = kwargs['dtype'] + return proj * x * y * dtype.itemsize, 0 + +def _linear_offset_mem(*args, **kwargs): + proj, x, y = kwargs['dims_shape'] + dtype = kwargs['dtype'] + return (x * y + proj * x * y + proj * x ** 2) * dtype.itemsize, 0 + +def _quadratic_mem(*args, **kwargs): + proj, x, y = kwargs['dims_shape'] + dtype = kwargs['dtype'] + return (4 * x * y + proj * proj * x * y) * dtype.itemsize, 0 + +@pytest.mark.cupy +@pytest.mark.parametrize("available_memory", [0, 1_000, 1_000_000, 1_000_000_000]) +@pytest.mark.parametrize("memcalc_fn", [_linear_mem, _linear_offset_mem, _quadratic_mem]) +def test_generic_calculate_max_slices_iterative( + mocker: MockerFixture, dummy_block: DataSetBlock, available_memory: int, memcalc_fn: Callable +): + class FakeModule: + def test_method(data): + return data + + mocker.patch( + "httomo.method_wrappers.generic.import_module", return_value=FakeModule + ) + + memory_gpu = GpuMemoryRequirement(multiplier=None, method="iterative") + repo = make_mock_repo( + mocker, + pattern=Pattern.projection, + output_dims_change=True, + implementation="gpu_cupy", + memory_gpu=memory_gpu, + ) + + memcalc_mock = mocker.patch.object( + repo.query("", ""), "calculate_memory_bytes_for_slices", side_effect=memcalc_fn + ) + wrp = make_method_wrapper( + repo, + "mocked_module_path", + "test_method", + MPI.COMM_WORLD, + make_mock_preview_config(mocker), + ) + shape_t = list(dummy_block.chunk_shape) + shape_t.pop(0) + shape = (shape_t[0], shape_t[1]) + max_slices, _ = wrp.calculate_max_slices( + dummy_block.data.dtype, + shape, + available_memory, + ) + + if memcalc_mock(dims_shape=(1, shape[0], shape[1]), dtype=dummy_block.data.dtype)[0] > available_memory: + # If zero slice fits + assert max_slices == 0 + else: + # Computed slices must fit in the available memory + assert memcalc_mock(dims_shape=(max_slices, shape[0], shape[1]), dtype=dummy_block.data.dtype)[0] <= available_memory + + # And one more slice must not fit + assert memcalc_mock(dims_shape=(max_slices + 1, shape[0], shape[1]), dtype=dummy_block.data.dtype)[0] > available_memory + @pytest.mark.cupy def test_generic_calculate_output_dims(mocker: MockerFixture): From b425d47e22afeb09d43912d2f298ed714a89a339 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc=20Serf=C5=91z=C5=91?= Date: Thu, 4 Dec 2025 10:52:14 +0100 Subject: [PATCH 2/9] Catch exceptions from memory estimator Exceptions can be thrown, e.g. when the estimation itself fails due to running out of memory. Test is also added to covert this case. --- httomo/method_wrappers/generic.py | 11 +++++++---- tests/method_wrappers/test_generic.py | 26 +++++++++++++++++++------- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py index 90e882b98..20e08d614 100644 --- a/httomo/method_wrappers/generic.py +++ b/httomo/method_wrappers/generic.py @@ -466,10 +466,13 @@ def calculate_max_slices( def _calculate_max_slices_iterative(self, data_dtype: np.dtype, non_slice_dims_shape: Tuple[int, int], available_memory: int) -> int: def get_mem_bytes(current_slices): - memory_bytes, _ = self._query.calculate_memory_bytes_for_slices(dims_shape=(current_slices, non_slice_dims_shape[0], non_slice_dims_shape[1]), - dtype=data_dtype, - **self._unwrap_output_ref_values()) - return memory_bytes + try: + memory_bytes, _ = self._query.calculate_memory_bytes_for_slices(dims_shape=(current_slices, non_slice_dims_shape[0], non_slice_dims_shape[1]), + dtype=data_dtype, + **self._unwrap_output_ref_values()) + return memory_bytes + except: + return 2**64 # Find a number of slices that does not fit current_slices = 100 diff --git a/tests/method_wrappers/test_generic.py b/tests/method_wrappers/test_generic.py index ef8876db2..14cbce57b 100644 --- a/tests/method_wrappers/test_generic.py +++ b/tests/method_wrappers/test_generic.py @@ -703,9 +703,17 @@ def _quadratic_mem(*args, **kwargs): dtype = kwargs['dtype'] return (4 * x * y + proj * proj * x * y) * dtype.itemsize, 0 +THROW_OVER_SLICES = 77 +def _quadratic_mem_throws(*args, **kwargs): + proj, x, y = kwargs['dims_shape'] + dtype = kwargs['dtype'] + if proj > THROW_OVER_SLICES: + raise Exception('Memory estimator failed') + return (4 * x * y + proj * proj * x * y) * dtype.itemsize, 0 + @pytest.mark.cupy @pytest.mark.parametrize("available_memory", [0, 1_000, 1_000_000, 1_000_000_000]) -@pytest.mark.parametrize("memcalc_fn", [_linear_mem, _linear_offset_mem, _quadratic_mem]) +@pytest.mark.parametrize("memcalc_fn", [_linear_mem, _linear_offset_mem, _quadratic_mem, _quadratic_mem_throws]) def test_generic_calculate_max_slices_iterative( mocker: MockerFixture, dummy_block: DataSetBlock, available_memory: int, memcalc_fn: Callable ): @@ -745,16 +753,20 @@ def test_method(data): available_memory, ) - if memcalc_mock(dims_shape=(1, shape[0], shape[1]), dtype=dummy_block.data.dtype)[0] > available_memory: + check_slices = lambda slices: memcalc_mock(dims_shape=(slices, shape[0], shape[1]), dtype=dummy_block.data.dtype)[0] + if check_slices(1) > available_memory: # If zero slice fits assert max_slices == 0 else: # Computed slices must fit in the available memory - assert memcalc_mock(dims_shape=(max_slices, shape[0], shape[1]), dtype=dummy_block.data.dtype)[0] <= available_memory - - # And one more slice must not fit - assert memcalc_mock(dims_shape=(max_slices + 1, shape[0], shape[1]), dtype=dummy_block.data.dtype)[0] > available_memory - + assert check_slices(max_slices) <= available_memory + + if memcalc_fn == _quadratic_mem_throws and max_slices + 1 >= THROW_OVER_SLICES: + with pytest.raises(Exception): + check_slices(max_slices + 1) + else: + # And one more slice must not fit + assert check_slices(max_slices + 1) > available_memory @pytest.mark.cupy def test_generic_calculate_output_dims(mocker: MockerFixture): From 0d7a40374ddd2a082021420d376dbaba1ec705ee Mon Sep 17 00:00:00 2001 From: algol Date: Thu, 4 Dec 2025 11:28:27 +0000 Subject: [PATCH 3/9] linting applied --- httomo/method_wrappers/generic.py | 30 ++++++++++++++----- tests/method_wrappers/test_generic.py | 42 ++++++++++++++++++--------- 2 files changed, 52 insertions(+), 20 deletions(-) diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py index 20e08d614..4533e31bb 100644 --- a/httomo/method_wrappers/generic.py +++ b/httomo/method_wrappers/generic.py @@ -448,7 +448,12 @@ def calculate_max_slices( * data_dtype.itemsize ) elif self.memory_gpu.method == "iterative": - return self._calculate_max_slices_iterative(data_dtype, non_slice_dims_shape, available_memory), available_memory + return ( + self._calculate_max_slices_iterative( + data_dtype, non_slice_dims_shape, available_memory + ), + available_memory, + ) else: ( memory_bytes_method, @@ -464,12 +469,23 @@ def calculate_max_slices( available_memory - subtract_bytes ) // memory_bytes_method, available_memory - def _calculate_max_slices_iterative(self, data_dtype: np.dtype, non_slice_dims_shape: Tuple[int, int], available_memory: int) -> int: + def _calculate_max_slices_iterative( + self, + data_dtype: np.dtype, + non_slice_dims_shape: Tuple[int, int], + available_memory: int, + ) -> int: def get_mem_bytes(current_slices): try: - memory_bytes, _ = self._query.calculate_memory_bytes_for_slices(dims_shape=(current_slices, non_slice_dims_shape[0], non_slice_dims_shape[1]), - dtype=data_dtype, - **self._unwrap_output_ref_values()) + memory_bytes, _ = self._query.calculate_memory_bytes_for_slices( + dims_shape=( + current_slices, + non_slice_dims_shape[0], + non_slice_dims_shape[1], + ), + dtype=data_dtype, + **self._unwrap_output_ref_values(), + ) return memory_bytes except: return 2**64 @@ -491,7 +507,7 @@ def get_mem_bytes(current_slices): current_slices *= 2 slices_high = current_slices - # Binary search between low and high + # Binary search between low and high slices_low = 0 while slices_high - slices_low > 1: current_slices = (slices_low + slices_high) // 2 @@ -500,7 +516,7 @@ def get_mem_bytes(current_slices): slices_high = current_slices elif memory_bytes < available_memory: slices_low = current_slices - else: # memory_bytes == available_memory + else: # memory_bytes == available_memory return current_slices return slices_low diff --git a/tests/method_wrappers/test_generic.py b/tests/method_wrappers/test_generic.py index 14cbce57b..d0352de03 100644 --- a/tests/method_wrappers/test_generic.py +++ b/tests/method_wrappers/test_generic.py @@ -688,34 +688,47 @@ def test_method(data): assert max_slices > dummy_block.chunk_shape[0] assert available_memory == 1_000_000_000 + def _linear_mem(*args, **kwargs): - proj, x, y = kwargs['dims_shape'] - dtype = kwargs['dtype'] + proj, x, y = kwargs["dims_shape"] + dtype = kwargs["dtype"] return proj * x * y * dtype.itemsize, 0 + def _linear_offset_mem(*args, **kwargs): - proj, x, y = kwargs['dims_shape'] - dtype = kwargs['dtype'] - return (x * y + proj * x * y + proj * x ** 2) * dtype.itemsize, 0 + proj, x, y = kwargs["dims_shape"] + dtype = kwargs["dtype"] + return (x * y + proj * x * y + proj * x**2) * dtype.itemsize, 0 + def _quadratic_mem(*args, **kwargs): - proj, x, y = kwargs['dims_shape'] - dtype = kwargs['dtype'] + proj, x, y = kwargs["dims_shape"] + dtype = kwargs["dtype"] return (4 * x * y + proj * proj * x * y) * dtype.itemsize, 0 + THROW_OVER_SLICES = 77 + + def _quadratic_mem_throws(*args, **kwargs): - proj, x, y = kwargs['dims_shape'] - dtype = kwargs['dtype'] + proj, x, y = kwargs["dims_shape"] + dtype = kwargs["dtype"] if proj > THROW_OVER_SLICES: - raise Exception('Memory estimator failed') + raise Exception("Memory estimator failed") return (4 * x * y + proj * proj * x * y) * dtype.itemsize, 0 + @pytest.mark.cupy @pytest.mark.parametrize("available_memory", [0, 1_000, 1_000_000, 1_000_000_000]) -@pytest.mark.parametrize("memcalc_fn", [_linear_mem, _linear_offset_mem, _quadratic_mem, _quadratic_mem_throws]) +@pytest.mark.parametrize( + "memcalc_fn", + [_linear_mem, _linear_offset_mem, _quadratic_mem, _quadratic_mem_throws], +) def test_generic_calculate_max_slices_iterative( - mocker: MockerFixture, dummy_block: DataSetBlock, available_memory: int, memcalc_fn: Callable + mocker: MockerFixture, + dummy_block: DataSetBlock, + available_memory: int, + memcalc_fn: Callable, ): class FakeModule: def test_method(data): @@ -753,7 +766,9 @@ def test_method(data): available_memory, ) - check_slices = lambda slices: memcalc_mock(dims_shape=(slices, shape[0], shape[1]), dtype=dummy_block.data.dtype)[0] + check_slices = lambda slices: memcalc_mock( + dims_shape=(slices, shape[0], shape[1]), dtype=dummy_block.data.dtype + )[0] if check_slices(1) > available_memory: # If zero slice fits assert max_slices == 0 @@ -768,6 +783,7 @@ def test_method(data): # And one more slice must not fit assert check_slices(max_slices + 1) > available_memory + @pytest.mark.cupy def test_generic_calculate_output_dims(mocker: MockerFixture): class FakeModule: From a4d3cac23c63eca3e486cda11fcf001f17c2cb6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc=20Serf=C5=91z=C5=91?= Date: Thu, 4 Dec 2025 16:34:44 +0100 Subject: [PATCH 4/9] Change iterative mem. est. interface not to return subtract_bytes --- httomo/method_wrappers/generic.py | 2 +- httomo/runner/methods_repository_interface.py | 4 ++-- tests/method_wrappers/test_generic.py | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py index 4533e31bb..934750770 100644 --- a/httomo/method_wrappers/generic.py +++ b/httomo/method_wrappers/generic.py @@ -477,7 +477,7 @@ def _calculate_max_slices_iterative( ) -> int: def get_mem_bytes(current_slices): try: - memory_bytes, _ = self._query.calculate_memory_bytes_for_slices( + memory_bytes = self._query.calculate_memory_bytes_for_slices( dims_shape=( current_slices, non_slice_dims_shape[0], diff --git a/httomo/runner/methods_repository_interface.py b/httomo/runner/methods_repository_interface.py index 2fe70469d..75d3a5a56 100644 --- a/httomo/runner/methods_repository_interface.py +++ b/httomo/runner/methods_repository_interface.py @@ -52,8 +52,8 @@ def calculate_memory_bytes( def calculate_memory_bytes_for_slices( self, dims_shape: Tuple[int, int, int], dtype: np.dtype, **kwargs - ) -> Tuple[int, int]: - """Calculate the memory required in bytes, returning bytes method and subtract bytes tuple""" + ) -> int: + """Calculate the memory required in bytes for a given 3D grid""" ... # pragma: no cover def calculate_output_dims( diff --git a/tests/method_wrappers/test_generic.py b/tests/method_wrappers/test_generic.py index d0352de03..6a76cbd11 100644 --- a/tests/method_wrappers/test_generic.py +++ b/tests/method_wrappers/test_generic.py @@ -692,19 +692,19 @@ def test_method(data): def _linear_mem(*args, **kwargs): proj, x, y = kwargs["dims_shape"] dtype = kwargs["dtype"] - return proj * x * y * dtype.itemsize, 0 + return proj * x * y * dtype.itemsize def _linear_offset_mem(*args, **kwargs): proj, x, y = kwargs["dims_shape"] dtype = kwargs["dtype"] - return (x * y + proj * x * y + proj * x**2) * dtype.itemsize, 0 + return (x * y + proj * x * y + proj * x**2) * dtype.itemsize def _quadratic_mem(*args, **kwargs): proj, x, y = kwargs["dims_shape"] dtype = kwargs["dtype"] - return (4 * x * y + proj * proj * x * y) * dtype.itemsize, 0 + return (4 * x * y + proj * proj * x * y) * dtype.itemsize THROW_OVER_SLICES = 77 @@ -715,7 +715,7 @@ def _quadratic_mem_throws(*args, **kwargs): dtype = kwargs["dtype"] if proj > THROW_OVER_SLICES: raise Exception("Memory estimator failed") - return (4 * x * y + proj * proj * x * y) * dtype.itemsize, 0 + return (4 * x * y + proj * proj * x * y) * dtype.itemsize @pytest.mark.cupy @@ -768,7 +768,7 @@ def test_method(data): check_slices = lambda slices: memcalc_mock( dims_shape=(slices, shape[0], shape[1]), dtype=dummy_block.data.dtype - )[0] + ) if check_slices(1) > available_memory: # If zero slice fits assert max_slices == 0 From 01645118e63839e4e0c61b8ba2e8f5ef745c08db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc=20Serf=C5=91z=C5=91?= Date: Mon, 8 Dec 2025 17:11:45 +0100 Subject: [PATCH 5/9] Fix GPU mem usage in iterative memory estimation --- httomo/method_wrappers/generic.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py index 934750770..f9998293f 100644 --- a/httomo/method_wrappers/generic.py +++ b/httomo/method_wrappers/generic.py @@ -448,12 +448,16 @@ def calculate_max_slices( * data_dtype.itemsize ) elif self.memory_gpu.method == "iterative": - return ( - self._calculate_max_slices_iterative( - data_dtype, non_slice_dims_shape, available_memory - ), - available_memory, - ) + # The iterative method may use the GPU + assert gpu_enabled, "GPU method used on a system without GPU support" + with xp.cuda.Device(self._gpu_id): + gpumem_cleanup() + return ( + self._calculate_max_slices_iterative( + data_dtype, non_slice_dims_shape, available_memory + ), + available_memory, + ) else: ( memory_bytes_method, @@ -489,6 +493,8 @@ def get_mem_bytes(current_slices): return memory_bytes except: return 2**64 + finally: + gpumem_cleanup() # Find a number of slices that does not fit current_slices = 100 @@ -498,7 +504,7 @@ def get_mem_bytes(current_slices): slices_high = current_slices else: # linear approximation - current_slices = (available_memory + memory_bytes - 1) // memory_bytes + current_slices = int(current_slices * available_memory / memory_bytes) while True: memory_bytes = get_mem_bytes(current_slices) if memory_bytes > available_memory: From 43b46cace3113050f440cb2cf48a89df643a0da2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc=20Serf=C5=91z=C5=91?= Date: Wed, 10 Dec 2025 16:25:32 +0100 Subject: [PATCH 6/9] Reduce the number of required iterations by introducing a "good enough" threshold --- httomo/method_wrappers/generic.py | 14 +++++++++++--- tests/method_wrappers/test_generic.py | 5 +++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py index f9998293f..9a25bb898 100644 --- a/httomo/method_wrappers/generic.py +++ b/httomo/method_wrappers/generic.py @@ -479,6 +479,8 @@ def _calculate_max_slices_iterative( non_slice_dims_shape: Tuple[int, int], available_memory: int, ) -> int: + MEM_RATIO_THRESHOLD = 0.9 + def get_mem_bytes(current_slices): try: memory_bytes = self._query.calculate_memory_bytes_for_slices( @@ -501,6 +503,7 @@ def get_mem_bytes(current_slices): slices_high = None memory_bytes = get_mem_bytes(current_slices) if memory_bytes > available_memory: + # Found upper limit, continue to binary search slices_high = current_slices else: # linear approximation @@ -508,7 +511,12 @@ def get_mem_bytes(current_slices): while True: memory_bytes = get_mem_bytes(current_slices) if memory_bytes > available_memory: + # Found upper limit, continue to binary search break + elif memory_bytes >= available_memory * MEM_RATIO_THRESHOLD: + # This is "good enough", return + return current_slices + # If linear approximation is not enough, just double every iteration current_slices *= 2 slices_high = current_slices @@ -520,10 +528,10 @@ def get_mem_bytes(current_slices): memory_bytes = get_mem_bytes(current_slices) if memory_bytes > available_memory: slices_high = current_slices - elif memory_bytes < available_memory: - slices_low = current_slices - else: # memory_bytes == available_memory + elif memory_bytes >= available_memory * MEM_RATIO_THRESHOLD: return current_slices + else: + slices_low = current_slices return slices_low diff --git a/tests/method_wrappers/test_generic.py b/tests/method_wrappers/test_generic.py index 6a76cbd11..251b4f558 100644 --- a/tests/method_wrappers/test_generic.py +++ b/tests/method_wrappers/test_generic.py @@ -769,6 +769,7 @@ def test_method(data): check_slices = lambda slices: memcalc_mock( dims_shape=(slices, shape[0], shape[1]), dtype=dummy_block.data.dtype ) + threshold = 0.9 if check_slices(1) > available_memory: # If zero slice fits assert max_slices == 0 @@ -780,8 +781,8 @@ def test_method(data): with pytest.raises(Exception): check_slices(max_slices + 1) else: - # And one more slice must not fit - assert check_slices(max_slices + 1) > available_memory + # And one more slice must not fit OR above threshold + assert check_slices(max_slices + 1) > available_memory or check_slices(max_slices) >= available_memory * threshold @pytest.mark.cupy From 5bd55e3656403de1f0c4b65552b4dfe6c06bb26b Mon Sep 17 00:00:00 2001 From: algol Date: Tue, 16 Dec 2025 17:27:43 +0000 Subject: [PATCH 7/9] linting --- httomo/method_wrappers/generic.py | 4 ++-- tests/method_wrappers/test_generic.py | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py index 9a25bb898..855dd3cd2 100644 --- a/httomo/method_wrappers/generic.py +++ b/httomo/method_wrappers/generic.py @@ -479,7 +479,7 @@ def _calculate_max_slices_iterative( non_slice_dims_shape: Tuple[int, int], available_memory: int, ) -> int: - MEM_RATIO_THRESHOLD = 0.9 + MEM_RATIO_THRESHOLD = 0.9 # 90% of the used device memory is the target def get_mem_bytes(current_slices): try: @@ -516,7 +516,7 @@ def get_mem_bytes(current_slices): elif memory_bytes >= available_memory * MEM_RATIO_THRESHOLD: # This is "good enough", return return current_slices - + # If linear approximation is not enough, just double every iteration current_slices *= 2 slices_high = current_slices diff --git a/tests/method_wrappers/test_generic.py b/tests/method_wrappers/test_generic.py index 251b4f558..dad9ff70c 100644 --- a/tests/method_wrappers/test_generic.py +++ b/tests/method_wrappers/test_generic.py @@ -782,7 +782,10 @@ def test_method(data): check_slices(max_slices + 1) else: # And one more slice must not fit OR above threshold - assert check_slices(max_slices + 1) > available_memory or check_slices(max_slices) >= available_memory * threshold + assert ( + check_slices(max_slices + 1) > available_memory + or check_slices(max_slices) >= available_memory * threshold + ) @pytest.mark.cupy From 6bd7039596df82c4efad645d0284b5f01998ea31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc=20Serf=C5=91z=C5=91?= Date: Wed, 17 Dec 2025 09:54:30 +0100 Subject: [PATCH 8/9] Use iterative Paganin memory estimator in sweep runner --- httomo/method_wrappers/generic.py | 48 ++++--------------- httomo/sweep_runner/param_sweep_runner.py | 21 ++++++--- httomo/utils.py | 57 +++++++++++++++++++++++ 3 files changed, 80 insertions(+), 46 deletions(-) diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py index 855dd3cd2..ffe5dfade 100644 --- a/httomo/method_wrappers/generic.py +++ b/httomo/method_wrappers/generic.py @@ -14,7 +14,14 @@ MethodRepository, ) from httomo.runner.output_ref import OutputRef -from httomo.utils import catch_gputime, catchtime, gpu_enabled, log_rank, xp +from httomo.utils import ( + catch_gputime, + catchtime, + gpu_enabled, + log_rank, + xp, + search_max_slices_iterative, +) import numpy as np @@ -479,8 +486,6 @@ def _calculate_max_slices_iterative( non_slice_dims_shape: Tuple[int, int], available_memory: int, ) -> int: - MEM_RATIO_THRESHOLD = 0.9 # 90% of the used device memory is the target - def get_mem_bytes(current_slices): try: memory_bytes = self._query.calculate_memory_bytes_for_slices( @@ -498,42 +503,7 @@ def get_mem_bytes(current_slices): finally: gpumem_cleanup() - # Find a number of slices that does not fit - current_slices = 100 - slices_high = None - memory_bytes = get_mem_bytes(current_slices) - if memory_bytes > available_memory: - # Found upper limit, continue to binary search - slices_high = current_slices - else: - # linear approximation - current_slices = int(current_slices * available_memory / memory_bytes) - while True: - memory_bytes = get_mem_bytes(current_slices) - if memory_bytes > available_memory: - # Found upper limit, continue to binary search - break - elif memory_bytes >= available_memory * MEM_RATIO_THRESHOLD: - # This is "good enough", return - return current_slices - - # If linear approximation is not enough, just double every iteration - current_slices *= 2 - slices_high = current_slices - - # Binary search between low and high - slices_low = 0 - while slices_high - slices_low > 1: - current_slices = (slices_low + slices_high) // 2 - memory_bytes = get_mem_bytes(current_slices) - if memory_bytes > available_memory: - slices_high = current_slices - elif memory_bytes >= available_memory * MEM_RATIO_THRESHOLD: - return current_slices - else: - slices_low = current_slices - - return slices_low + return search_max_slices_iterative(available_memory, get_mem_bytes) def _unwrap_output_ref_values(self) -> Dict[str, Any]: """ diff --git a/httomo/sweep_runner/param_sweep_runner.py b/httomo/sweep_runner/param_sweep_runner.py index c453c79b9..f200c5617 100644 --- a/httomo/sweep_runner/param_sweep_runner.py +++ b/httomo/sweep_runner/param_sweep_runner.py @@ -16,12 +16,12 @@ from httomo.sweep_runner.param_sweep_block import ParamSweepBlock from httomo.sweep_runner.side_output_manager import SideOutputManager from httomo.sweep_runner.stages import NonSweepStage, Stages, SweepStage -from httomo.utils import catchtime, log_exception, log_once -from httomo.runner.gpu_utils import get_available_gpu_memory +from httomo.utils import catchtime, log_exception, log_once, search_max_slices_iterative +from httomo.runner.gpu_utils import get_available_gpu_memory, gpumem_cleanup from httomo.preview import PreviewConfig, PreviewDimConfig from httomo.runner.dataset_store_interfaces import DataSetSource from httomo_backends.methods_database.packages.backends.httomolibgpu.supporting_funcs.prep.phase import ( - _calc_memory_bytes_paganin_filter, + _calc_memory_bytes_for_slices_paganin_filter, ) @@ -322,8 +322,15 @@ def _slices_to_fit_memory_Paganin(source: DataSetSource) -> int: angles_total = source.aux_data.angles_length det_X_length = source.chunk_shape[2] - (memory_bytes_method, subtract_bytes) = _calc_memory_bytes_paganin_filter( - (angles_total, det_X_length), dtype=np.float32() - ) + def get_mem_bytes(slices): + try: + return _calc_memory_bytes_for_slices_paganin_filter( + (slices, angles_total, det_X_length), dtype=np.float32() + ) + except: + return 2**64 + finally: + gpumem_cleanup() - return (available_memory - subtract_bytes) // memory_bytes_method + gpumem_cleanup() + return search_max_slices_iterative(available_memory, get_mem_bytes) diff --git a/httomo/utils.py b/httomo/utils.py index 26436760a..585db502f 100644 --- a/httomo/utils.py +++ b/httomo/utils.py @@ -299,3 +299,60 @@ def mpi_abort_excepthook(type, value, traceback): log_rank("\n".join(format_tb(traceback)), MPI.COMM_WORLD) MPI.COMM_WORLD.Abort() sys.__excepthook__(type, value, traceback) + + +def search_max_slices_iterative( + available_memory: int, get_mem_bytes: Callable[[int], int] +) -> int: + """ + Approximates the maximum number of fitting slices to the GPU memory for a given function. + The memory profile of the function must be increasing in the function of the number of slices. + First, a linear approximation of the memory profile is performed. If this is not accurate enough, + a binary search follows to determine the number of fitting slices. This function never returns a + number of slices for what `get_mem_bytes(slices) > available_memory`. + + :param available_memory: Bytes of available device memory + :type available_memory: int + :param get_mem_bytes: A functor that produces the bytes of device memory needed for a given number of slices. + :type get_mem_bytes: Callable[[int], int] + :return: Returns the approximation of the maximum number of fitting slices. + :rtype: int + """ + MEM_RATIO_THRESHOLD = 0.9 # 90% of the used device memory is the target + + # Find a number of slices that does not fit + current_slices = 100 + slices_high = None + memory_bytes = get_mem_bytes(current_slices) + if memory_bytes > available_memory: + # Found upper limit, continue to binary search + slices_high = current_slices + else: + # linear approximation + current_slices = int(current_slices * available_memory / memory_bytes) + while True: + memory_bytes = get_mem_bytes(current_slices) + if memory_bytes > available_memory: + # Found upper limit, continue to binary search + break + elif memory_bytes >= available_memory * MEM_RATIO_THRESHOLD: + # This is "good enough", return + return current_slices + + # If linear approximation is not enough, just double every iteration + current_slices *= 2 + slices_high = current_slices + + # Binary search between low and high + slices_low = 0 + while slices_high - slices_low > 1: + current_slices = (slices_low + slices_high) // 2 + memory_bytes = get_mem_bytes(current_slices) + if memory_bytes > available_memory: + slices_high = current_slices + elif memory_bytes >= available_memory * MEM_RATIO_THRESHOLD: + return current_slices + else: + slices_low = current_slices + + return slices_low From be1e8c0c32a986280a3a7b9a7c86c04422227fd2 Mon Sep 17 00:00:00 2001 From: algol Date: Thu, 18 Dec 2025 11:10:21 +0000 Subject: [PATCH 9/9] docs correction --- docs/source/pipelines/yaml.rst | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/docs/source/pipelines/yaml.rst b/docs/source/pipelines/yaml.rst index 5c499ae3d..408ad3899 100644 --- a/docs/source/pipelines/yaml.rst +++ b/docs/source/pipelines/yaml.rst @@ -58,19 +58,6 @@ Those pipelines consist of methods from HTTomolibgpu (GPU) and HTTomolib (CPU) b .. literalinclude:: ../pipelines_full/FISTA3d_tomobar.yaml :language: yaml -.. _tutorials_pl_templates_cpu: - -Pipelines using TomoPy library ------------------------------- - -One can build CPU-only pipelines by using mostly TomoPy methods. They are expected to be slower than the pipelines above. - -.. dropdown:: CPU pipeline using auto-centering and the gridrec reconstruction method from TomoPy. - - .. literalinclude:: ../pipelines_full/tomopy_gridrec.yaml - :language: yaml - - .. _tutorials_pl_templates_dls: DLS-specific pipelines @@ -95,17 +82,31 @@ Pipelines with parameter sweeps Here we demonstrate how to perform a sweep across multiple values of a single parameter (see :ref:`parameter_sweeping` for more details). -.. note:: There is no need to add image saving plugin for sweep runs as it will be added automatically. It is also preferable to keep the `preview` small as the time of computation can be substantial. +.. note:: There is no need to add image saving plugin for sweep runs as it will be added automatically. .. dropdown:: Parameter sweep using the :code:`!SweepRange` tag to do a sweep over several CoR values of the :code:`center` parameter in the reconstruction method. .. literalinclude:: ../pipelines_full/sweep_center_FBP3d_tomobar.yaml :language: yaml - :emphasize-lines: 34-37 + :emphasize-lines: 36-39 .. dropdown:: Parameter sweep using the :code:`!Sweep` tag over several particular values (not a range) of the :code:`ratio_delta_beta` parameter for the Paganin filter. .. literalinclude:: ../pipelines_full/sweep_paganin_FBP3d_tomobar.yaml :language: yaml - :emphasize-lines: 53-56 + :emphasize-lines: 51-54 + +.. _tutorials_pl_templates_cpu: + +Pipelines using TomoPy library +------------------------------ + +One can build CPU-only pipelines by using mostly TomoPy methods. + +.. note:: Methods from TomoPy are expected to be slower than the GPU-accelerated methods from the libraries above. + +.. dropdown:: CPU pipeline using auto-centering and the gridrec reconstruction method from TomoPy. + + .. literalinclude:: ../pipelines_full/tomopy_gridrec.yaml + :language: yaml \ No newline at end of file