From 672d23bc9c8dff0c914fb4030a9adf5722cb4bc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc=20Serf=C5=91z=C5=91?=
 <lorinc.serfozo@radwaytech.com>
Date: Tue, 18 Nov 2025 14:06:44 +0100
Subject: [PATCH 1/9] Iterative search method for memory estimator with test

---
 httomo/method_wrappers/generic.py             | 40 +++++++++++
 httomo/runner/methods_repository_interface.py |  6 ++
 tests/method_wrappers/test_generic.py         | 69 ++++++++++++++++++-
 3 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py
index 1aa59deaf..90e882b98 100644
--- a/httomo/method_wrappers/generic.py
+++ b/httomo/method_wrappers/generic.py
@@ -447,6 +447,8 @@ def calculate_max_slices(
                 * np.prod(non_slice_dims_shape)
                 * data_dtype.itemsize
             )
+        elif self.memory_gpu.method == "iterative":
+            return self._calculate_max_slices_iterative(data_dtype, non_slice_dims_shape, available_memory), available_memory
         else:
             (
                 memory_bytes_method,
@@ -462,6 +464,44 @@ def calculate_max_slices(
             available_memory - subtract_bytes
         ) // memory_bytes_method, available_memory
 
+    def _calculate_max_slices_iterative(self, data_dtype: np.dtype, non_slice_dims_shape: Tuple[int, int], available_memory: int) -> int:
+        def get_mem_bytes(current_slices):
+            memory_bytes, _ = self._query.calculate_memory_bytes_for_slices(dims_shape=(current_slices, non_slice_dims_shape[0], non_slice_dims_shape[1]),
+                                                                            dtype=data_dtype,
+                                                                            **self._unwrap_output_ref_values())
+            return memory_bytes
+
+        # Find a number of slices that does not fit
+        current_slices = 100
+        slices_high = None
+        memory_bytes = get_mem_bytes(current_slices)
+        if memory_bytes > available_memory:
+            slices_high = current_slices
+        else:
+            # linear approximation
+            current_slices = (available_memory + memory_bytes - 1) // memory_bytes
+            while True:
+                memory_bytes = get_mem_bytes(current_slices)
+                if memory_bytes > available_memory:
+                    break
+                # If linear approximation is not enough, just double every iteration
+                current_slices *= 2
+            slices_high = current_slices
+
+        # Binary search between low and high 
+        slices_low = 0
+        while slices_high - slices_low > 1:
+            current_slices = (slices_low + slices_high) // 2
+            memory_bytes = get_mem_bytes(current_slices)
+            if memory_bytes > available_memory:
+                slices_high = current_slices
+            elif memory_bytes < available_memory:
+                slices_low = current_slices
+            else: # memory_bytes == available_memory
+                return current_slices
+
+        return slices_low
+
     def _unwrap_output_ref_values(self) -> Dict[str, Any]:
         """
         Iterate through params in `self.config_params` and, for any value of type `OutputRef`,
diff --git a/httomo/runner/methods_repository_interface.py b/httomo/runner/methods_repository_interface.py
index 88be412b8..2fe70469d 100644
--- a/httomo/runner/methods_repository_interface.py
+++ b/httomo/runner/methods_repository_interface.py
@@ -50,6 +50,12 @@ def calculate_memory_bytes(
         """Calculate the memory required in bytes, returning bytes method and subtract bytes tuple"""
         ...  # pragma: no cover
 
+    def calculate_memory_bytes_for_slices(
+        self, dims_shape: Tuple[int, int, int], dtype: np.dtype, **kwargs
+    ) -> Tuple[int, int]:
+        """Calculate the memory required in bytes, returning bytes method and subtract bytes tuple"""
+        ...  # pragma: no cover
+
     def calculate_output_dims(
         self, non_slice_dims_shape: Tuple[int, int], **kwargs
     ) -> Tuple[int, int]:
diff --git a/tests/method_wrappers/test_generic.py b/tests/method_wrappers/test_generic.py
index e62fe0425..ef8876db2 100644
--- a/tests/method_wrappers/test_generic.py
+++ b/tests/method_wrappers/test_generic.py
@@ -1,4 +1,4 @@
-from typing import List, Optional, Union
+from typing import Callable, List, Optional, Union
 
 import numpy as np
 from httomo.method_wrappers import make_method_wrapper
@@ -688,6 +688,73 @@ def test_method(data):
         assert max_slices > dummy_block.chunk_shape[0]
         assert available_memory == 1_000_000_000
 
+def _linear_mem(*args, **kwargs):
+    proj, x, y = kwargs['dims_shape']
+    dtype = kwargs['dtype']
+    return proj * x * y * dtype.itemsize, 0
+
+def _linear_offset_mem(*args, **kwargs):
+    proj, x, y = kwargs['dims_shape']
+    dtype = kwargs['dtype']
+    return (x * y + proj * x * y + proj * x ** 2) * dtype.itemsize, 0
+
+def _quadratic_mem(*args, **kwargs):
+    proj, x, y = kwargs['dims_shape']
+    dtype = kwargs['dtype']
+    return (4 * x * y + proj * proj * x * y) * dtype.itemsize, 0
+
+@pytest.mark.cupy
+@pytest.mark.parametrize("available_memory", [0, 1_000, 1_000_000, 1_000_000_000])
+@pytest.mark.parametrize("memcalc_fn", [_linear_mem, _linear_offset_mem, _quadratic_mem])
+def test_generic_calculate_max_slices_iterative(
+    mocker: MockerFixture, dummy_block: DataSetBlock, available_memory: int, memcalc_fn: Callable
+):
+    class FakeModule:
+        def test_method(data):
+            return data
+
+    mocker.patch(
+        "httomo.method_wrappers.generic.import_module", return_value=FakeModule
+    )
+
+    memory_gpu = GpuMemoryRequirement(multiplier=None, method="iterative")
+    repo = make_mock_repo(
+        mocker,
+        pattern=Pattern.projection,
+        output_dims_change=True,
+        implementation="gpu_cupy",
+        memory_gpu=memory_gpu,
+    )
+
+    memcalc_mock = mocker.patch.object(
+        repo.query("", ""), "calculate_memory_bytes_for_slices", side_effect=memcalc_fn
+    )
+    wrp = make_method_wrapper(
+        repo,
+        "mocked_module_path",
+        "test_method",
+        MPI.COMM_WORLD,
+        make_mock_preview_config(mocker),
+    )
+    shape_t = list(dummy_block.chunk_shape)
+    shape_t.pop(0)
+    shape = (shape_t[0], shape_t[1])
+    max_slices, _ = wrp.calculate_max_slices(
+        dummy_block.data.dtype,
+        shape,
+        available_memory,
+    )
+
+    if memcalc_mock(dims_shape=(1, shape[0], shape[1]), dtype=dummy_block.data.dtype)[0] > available_memory:
+        # If zero slice fits
+        assert max_slices == 0
+    else:
+        # Computed slices must fit in the available memory
+        assert memcalc_mock(dims_shape=(max_slices, shape[0], shape[1]), dtype=dummy_block.data.dtype)[0] <= available_memory
+
+        # And one more slice must not fit
+        assert memcalc_mock(dims_shape=(max_slices + 1, shape[0], shape[1]), dtype=dummy_block.data.dtype)[0] > available_memory
+
 
 @pytest.mark.cupy
 def test_generic_calculate_output_dims(mocker: MockerFixture):

From b425d47e22afeb09d43912d2f298ed714a89a339 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc=20Serf=C5=91z=C5=91?=
 <lorinc.serfozo@radwaytech.com>
Date: Thu, 4 Dec 2025 10:52:14 +0100
Subject: [PATCH 2/9] Catch exceptions from memory estimator

Exceptions can be thrown, e.g. when the estimation itself fails due to running out of memory. Test is also added to covert this case.
---
 httomo/method_wrappers/generic.py     | 11 +++++++----
 tests/method_wrappers/test_generic.py | 26 +++++++++++++++++++-------
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py
index 90e882b98..20e08d614 100644
--- a/httomo/method_wrappers/generic.py
+++ b/httomo/method_wrappers/generic.py
@@ -466,10 +466,13 @@ def calculate_max_slices(
 
     def _calculate_max_slices_iterative(self, data_dtype: np.dtype, non_slice_dims_shape: Tuple[int, int], available_memory: int) -> int:
         def get_mem_bytes(current_slices):
-            memory_bytes, _ = self._query.calculate_memory_bytes_for_slices(dims_shape=(current_slices, non_slice_dims_shape[0], non_slice_dims_shape[1]),
-                                                                            dtype=data_dtype,
-                                                                            **self._unwrap_output_ref_values())
-            return memory_bytes
+            try:
+                memory_bytes, _ = self._query.calculate_memory_bytes_for_slices(dims_shape=(current_slices, non_slice_dims_shape[0], non_slice_dims_shape[1]),
+                                                                                dtype=data_dtype,
+                                                                                **self._unwrap_output_ref_values())
+                return memory_bytes
+            except:
+                return 2**64
 
         # Find a number of slices that does not fit
         current_slices = 100
diff --git a/tests/method_wrappers/test_generic.py b/tests/method_wrappers/test_generic.py
index ef8876db2..14cbce57b 100644
--- a/tests/method_wrappers/test_generic.py
+++ b/tests/method_wrappers/test_generic.py
@@ -703,9 +703,17 @@ def _quadratic_mem(*args, **kwargs):
     dtype = kwargs['dtype']
     return (4 * x * y + proj * proj * x * y) * dtype.itemsize, 0
 
+THROW_OVER_SLICES = 77
+def _quadratic_mem_throws(*args, **kwargs):
+    proj, x, y = kwargs['dims_shape']
+    dtype = kwargs['dtype']
+    if proj > THROW_OVER_SLICES:
+        raise Exception('Memory estimator failed')
+    return (4 * x * y + proj * proj * x * y) * dtype.itemsize, 0
+
 @pytest.mark.cupy
 @pytest.mark.parametrize("available_memory", [0, 1_000, 1_000_000, 1_000_000_000])
-@pytest.mark.parametrize("memcalc_fn", [_linear_mem, _linear_offset_mem, _quadratic_mem])
+@pytest.mark.parametrize("memcalc_fn", [_linear_mem, _linear_offset_mem, _quadratic_mem, _quadratic_mem_throws])
 def test_generic_calculate_max_slices_iterative(
     mocker: MockerFixture, dummy_block: DataSetBlock, available_memory: int, memcalc_fn: Callable
 ):
@@ -745,16 +753,20 @@ def test_method(data):
         available_memory,
     )
 
-    if memcalc_mock(dims_shape=(1, shape[0], shape[1]), dtype=dummy_block.data.dtype)[0] > available_memory:
+    check_slices = lambda slices: memcalc_mock(dims_shape=(slices, shape[0], shape[1]), dtype=dummy_block.data.dtype)[0]
+    if check_slices(1) > available_memory:
         # If zero slice fits
         assert max_slices == 0
     else:
         # Computed slices must fit in the available memory
-        assert memcalc_mock(dims_shape=(max_slices, shape[0], shape[1]), dtype=dummy_block.data.dtype)[0] <= available_memory
-
-        # And one more slice must not fit
-        assert memcalc_mock(dims_shape=(max_slices + 1, shape[0], shape[1]), dtype=dummy_block.data.dtype)[0] > available_memory
-
+        assert check_slices(max_slices) <= available_memory
+
+        if memcalc_fn == _quadratic_mem_throws and max_slices + 1 >= THROW_OVER_SLICES:
+            with pytest.raises(Exception):
+                check_slices(max_slices + 1)
+        else:
+            # And one more slice must not fit
+            assert check_slices(max_slices + 1) > available_memory
 
 @pytest.mark.cupy
 def test_generic_calculate_output_dims(mocker: MockerFixture):

From 0d7a40374ddd2a082021420d376dbaba1ec705ee Mon Sep 17 00:00:00 2001
From: algol <dkazanc@hotmail.com>
Date: Thu, 4 Dec 2025 11:28:27 +0000
Subject: [PATCH 3/9] linting applied

---
 httomo/method_wrappers/generic.py     | 30 ++++++++++++++-----
 tests/method_wrappers/test_generic.py | 42 ++++++++++++++++++---------
 2 files changed, 52 insertions(+), 20 deletions(-)

diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py
index 20e08d614..4533e31bb 100644
--- a/httomo/method_wrappers/generic.py
+++ b/httomo/method_wrappers/generic.py
@@ -448,7 +448,12 @@ def calculate_max_slices(
                 * data_dtype.itemsize
             )
         elif self.memory_gpu.method == "iterative":
-            return self._calculate_max_slices_iterative(data_dtype, non_slice_dims_shape, available_memory), available_memory
+            return (
+                self._calculate_max_slices_iterative(
+                    data_dtype, non_slice_dims_shape, available_memory
+                ),
+                available_memory,
+            )
         else:
             (
                 memory_bytes_method,
@@ -464,12 +469,23 @@ def calculate_max_slices(
             available_memory - subtract_bytes
         ) // memory_bytes_method, available_memory
 
-    def _calculate_max_slices_iterative(self, data_dtype: np.dtype, non_slice_dims_shape: Tuple[int, int], available_memory: int) -> int:
+    def _calculate_max_slices_iterative(
+        self,
+        data_dtype: np.dtype,
+        non_slice_dims_shape: Tuple[int, int],
+        available_memory: int,
+    ) -> int:
         def get_mem_bytes(current_slices):
             try:
-                memory_bytes, _ = self._query.calculate_memory_bytes_for_slices(dims_shape=(current_slices, non_slice_dims_shape[0], non_slice_dims_shape[1]),
-                                                                                dtype=data_dtype,
-                                                                                **self._unwrap_output_ref_values())
+                memory_bytes, _ = self._query.calculate_memory_bytes_for_slices(
+                    dims_shape=(
+                        current_slices,
+                        non_slice_dims_shape[0],
+                        non_slice_dims_shape[1],
+                    ),
+                    dtype=data_dtype,
+                    **self._unwrap_output_ref_values(),
+                )
                 return memory_bytes
             except:
                 return 2**64
@@ -491,7 +507,7 @@ def get_mem_bytes(current_slices):
                 current_slices *= 2
             slices_high = current_slices
 
-        # Binary search between low and high 
+        # Binary search between low and high
         slices_low = 0
         while slices_high - slices_low > 1:
             current_slices = (slices_low + slices_high) // 2
@@ -500,7 +516,7 @@ def get_mem_bytes(current_slices):
                 slices_high = current_slices
             elif memory_bytes < available_memory:
                 slices_low = current_slices
-            else: # memory_bytes == available_memory
+            else:  # memory_bytes == available_memory
                 return current_slices
 
         return slices_low
diff --git a/tests/method_wrappers/test_generic.py b/tests/method_wrappers/test_generic.py
index 14cbce57b..d0352de03 100644
--- a/tests/method_wrappers/test_generic.py
+++ b/tests/method_wrappers/test_generic.py
@@ -688,34 +688,47 @@ def test_method(data):
         assert max_slices > dummy_block.chunk_shape[0]
         assert available_memory == 1_000_000_000
 
+
 def _linear_mem(*args, **kwargs):
-    proj, x, y = kwargs['dims_shape']
-    dtype = kwargs['dtype']
+    proj, x, y = kwargs["dims_shape"]
+    dtype = kwargs["dtype"]
     return proj * x * y * dtype.itemsize, 0
 
+
 def _linear_offset_mem(*args, **kwargs):
-    proj, x, y = kwargs['dims_shape']
-    dtype = kwargs['dtype']
-    return (x * y + proj * x * y + proj * x ** 2) * dtype.itemsize, 0
+    proj, x, y = kwargs["dims_shape"]
+    dtype = kwargs["dtype"]
+    return (x * y + proj * x * y + proj * x**2) * dtype.itemsize, 0
+
 
 def _quadratic_mem(*args, **kwargs):
-    proj, x, y = kwargs['dims_shape']
-    dtype = kwargs['dtype']
+    proj, x, y = kwargs["dims_shape"]
+    dtype = kwargs["dtype"]
     return (4 * x * y + proj * proj * x * y) * dtype.itemsize, 0
 
+
 THROW_OVER_SLICES = 77
+
+
 def _quadratic_mem_throws(*args, **kwargs):
-    proj, x, y = kwargs['dims_shape']
-    dtype = kwargs['dtype']
+    proj, x, y = kwargs["dims_shape"]
+    dtype = kwargs["dtype"]
     if proj > THROW_OVER_SLICES:
-        raise Exception('Memory estimator failed')
+        raise Exception("Memory estimator failed")
     return (4 * x * y + proj * proj * x * y) * dtype.itemsize, 0
 
+
 @pytest.mark.cupy
 @pytest.mark.parametrize("available_memory", [0, 1_000, 1_000_000, 1_000_000_000])
-@pytest.mark.parametrize("memcalc_fn", [_linear_mem, _linear_offset_mem, _quadratic_mem, _quadratic_mem_throws])
+@pytest.mark.parametrize(
+    "memcalc_fn",
+    [_linear_mem, _linear_offset_mem, _quadratic_mem, _quadratic_mem_throws],
+)
 def test_generic_calculate_max_slices_iterative(
-    mocker: MockerFixture, dummy_block: DataSetBlock, available_memory: int, memcalc_fn: Callable
+    mocker: MockerFixture,
+    dummy_block: DataSetBlock,
+    available_memory: int,
+    memcalc_fn: Callable,
 ):
     class FakeModule:
         def test_method(data):
@@ -753,7 +766,9 @@ def test_method(data):
         available_memory,
     )
 
-    check_slices = lambda slices: memcalc_mock(dims_shape=(slices, shape[0], shape[1]), dtype=dummy_block.data.dtype)[0]
+    check_slices = lambda slices: memcalc_mock(
+        dims_shape=(slices, shape[0], shape[1]), dtype=dummy_block.data.dtype
+    )[0]
     if check_slices(1) > available_memory:
         # If zero slice fits
         assert max_slices == 0
@@ -768,6 +783,7 @@ def test_method(data):
             # And one more slice must not fit
             assert check_slices(max_slices + 1) > available_memory
 
+
 @pytest.mark.cupy
 def test_generic_calculate_output_dims(mocker: MockerFixture):
     class FakeModule:

From a4d3cac23c63eca3e486cda11fcf001f17c2cb6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc=20Serf=C5=91z=C5=91?=
 <lorinc.serfozo@radwaytech.com>
Date: Thu, 4 Dec 2025 16:34:44 +0100
Subject: [PATCH 4/9] Change iterative mem. est. interface not to return
 subtract_bytes

---
 httomo/method_wrappers/generic.py             |  2 +-
 httomo/runner/methods_repository_interface.py |  4 ++--
 tests/method_wrappers/test_generic.py         | 10 +++++-----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py
index 4533e31bb..934750770 100644
--- a/httomo/method_wrappers/generic.py
+++ b/httomo/method_wrappers/generic.py
@@ -477,7 +477,7 @@ def _calculate_max_slices_iterative(
     ) -> int:
         def get_mem_bytes(current_slices):
             try:
-                memory_bytes, _ = self._query.calculate_memory_bytes_for_slices(
+                memory_bytes = self._query.calculate_memory_bytes_for_slices(
                     dims_shape=(
                         current_slices,
                         non_slice_dims_shape[0],
diff --git a/httomo/runner/methods_repository_interface.py b/httomo/runner/methods_repository_interface.py
index 2fe70469d..75d3a5a56 100644
--- a/httomo/runner/methods_repository_interface.py
+++ b/httomo/runner/methods_repository_interface.py
@@ -52,8 +52,8 @@ def calculate_memory_bytes(
 
     def calculate_memory_bytes_for_slices(
         self, dims_shape: Tuple[int, int, int], dtype: np.dtype, **kwargs
-    ) -> Tuple[int, int]:
-        """Calculate the memory required in bytes, returning bytes method and subtract bytes tuple"""
+    ) -> int:
+        """Calculate the memory required in bytes for a given 3D grid"""
         ...  # pragma: no cover
 
     def calculate_output_dims(
diff --git a/tests/method_wrappers/test_generic.py b/tests/method_wrappers/test_generic.py
index d0352de03..6a76cbd11 100644
--- a/tests/method_wrappers/test_generic.py
+++ b/tests/method_wrappers/test_generic.py
@@ -692,19 +692,19 @@ def test_method(data):
 def _linear_mem(*args, **kwargs):
     proj, x, y = kwargs["dims_shape"]
     dtype = kwargs["dtype"]
-    return proj * x * y * dtype.itemsize, 0
+    return proj * x * y * dtype.itemsize
 
 
 def _linear_offset_mem(*args, **kwargs):
     proj, x, y = kwargs["dims_shape"]
     dtype = kwargs["dtype"]
-    return (x * y + proj * x * y + proj * x**2) * dtype.itemsize, 0
+    return (x * y + proj * x * y + proj * x**2) * dtype.itemsize
 
 
 def _quadratic_mem(*args, **kwargs):
     proj, x, y = kwargs["dims_shape"]
     dtype = kwargs["dtype"]
-    return (4 * x * y + proj * proj * x * y) * dtype.itemsize, 0
+    return (4 * x * y + proj * proj * x * y) * dtype.itemsize
 
 
 THROW_OVER_SLICES = 77
@@ -715,7 +715,7 @@ def _quadratic_mem_throws(*args, **kwargs):
     dtype = kwargs["dtype"]
     if proj > THROW_OVER_SLICES:
         raise Exception("Memory estimator failed")
-    return (4 * x * y + proj * proj * x * y) * dtype.itemsize, 0
+    return (4 * x * y + proj * proj * x * y) * dtype.itemsize
 
 
 @pytest.mark.cupy
@@ -768,7 +768,7 @@ def test_method(data):
 
     check_slices = lambda slices: memcalc_mock(
         dims_shape=(slices, shape[0], shape[1]), dtype=dummy_block.data.dtype
-    )[0]
+    )
     if check_slices(1) > available_memory:
         # If zero slice fits
         assert max_slices == 0

From 01645118e63839e4e0c61b8ba2e8f5ef745c08db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc=20Serf=C5=91z=C5=91?=
 <lorinc.serfozo@radwaytech.com>
Date: Mon, 8 Dec 2025 17:11:45 +0100
Subject: [PATCH 5/9] Fix GPU mem usage in iterative memory estimation

---
 httomo/method_wrappers/generic.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py
index 934750770..f9998293f 100644
--- a/httomo/method_wrappers/generic.py
+++ b/httomo/method_wrappers/generic.py
@@ -448,12 +448,16 @@ def calculate_max_slices(
                 * data_dtype.itemsize
             )
         elif self.memory_gpu.method == "iterative":
-            return (
-                self._calculate_max_slices_iterative(
-                    data_dtype, non_slice_dims_shape, available_memory
-                ),
-                available_memory,
-            )
+            # The iterative method may use the GPU
+            assert gpu_enabled, "GPU method used on a system without GPU support"
+            with xp.cuda.Device(self._gpu_id):
+                gpumem_cleanup()
+                return (
+                    self._calculate_max_slices_iterative(
+                        data_dtype, non_slice_dims_shape, available_memory
+                    ),
+                    available_memory,
+                )
         else:
             (
                 memory_bytes_method,
@@ -489,6 +493,8 @@ def get_mem_bytes(current_slices):
                 return memory_bytes
             except:
                 return 2**64
+            finally:
+                gpumem_cleanup()
 
         # Find a number of slices that does not fit
         current_slices = 100
@@ -498,7 +504,7 @@ def get_mem_bytes(current_slices):
             slices_high = current_slices
         else:
             # linear approximation
-            current_slices = (available_memory + memory_bytes - 1) // memory_bytes
+            current_slices = int(current_slices * available_memory / memory_bytes)
             while True:
                 memory_bytes = get_mem_bytes(current_slices)
                 if memory_bytes > available_memory:

From 43b46cace3113050f440cb2cf48a89df643a0da2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc=20Serf=C5=91z=C5=91?=
 <lorinc.serfozo@radwaytech.com>
Date: Wed, 10 Dec 2025 16:25:32 +0100
Subject: [PATCH 6/9] Reduce the number of required iterations by introducing a
 "good enough" threshold

---
 httomo/method_wrappers/generic.py     | 14 +++++++++++---
 tests/method_wrappers/test_generic.py |  5 +++--
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py
index f9998293f..9a25bb898 100644
--- a/httomo/method_wrappers/generic.py
+++ b/httomo/method_wrappers/generic.py
@@ -479,6 +479,8 @@ def _calculate_max_slices_iterative(
         non_slice_dims_shape: Tuple[int, int],
         available_memory: int,
     ) -> int:
+        MEM_RATIO_THRESHOLD = 0.9
+
         def get_mem_bytes(current_slices):
             try:
                 memory_bytes = self._query.calculate_memory_bytes_for_slices(
@@ -501,6 +503,7 @@ def get_mem_bytes(current_slices):
         slices_high = None
         memory_bytes = get_mem_bytes(current_slices)
         if memory_bytes > available_memory:
+            # Found upper limit, continue to binary search
             slices_high = current_slices
         else:
             # linear approximation
@@ -508,7 +511,12 @@ def get_mem_bytes(current_slices):
             while True:
                 memory_bytes = get_mem_bytes(current_slices)
                 if memory_bytes > available_memory:
+                    # Found upper limit, continue to binary search
                     break
+                elif memory_bytes >= available_memory * MEM_RATIO_THRESHOLD:
+                    # This is "good enough", return
+                    return current_slices
+            
                 # If linear approximation is not enough, just double every iteration
                 current_slices *= 2
             slices_high = current_slices
@@ -520,10 +528,10 @@ def get_mem_bytes(current_slices):
             memory_bytes = get_mem_bytes(current_slices)
             if memory_bytes > available_memory:
                 slices_high = current_slices
-            elif memory_bytes < available_memory:
-                slices_low = current_slices
-            else:  # memory_bytes == available_memory
+            elif memory_bytes >= available_memory * MEM_RATIO_THRESHOLD:
                 return current_slices
+            else:
+                slices_low = current_slices
 
         return slices_low
 
diff --git a/tests/method_wrappers/test_generic.py b/tests/method_wrappers/test_generic.py
index 6a76cbd11..251b4f558 100644
--- a/tests/method_wrappers/test_generic.py
+++ b/tests/method_wrappers/test_generic.py
@@ -769,6 +769,7 @@ def test_method(data):
     check_slices = lambda slices: memcalc_mock(
         dims_shape=(slices, shape[0], shape[1]), dtype=dummy_block.data.dtype
     )
+    threshold = 0.9
     if check_slices(1) > available_memory:
         # If zero slice fits
         assert max_slices == 0
@@ -780,8 +781,8 @@ def test_method(data):
             with pytest.raises(Exception):
                 check_slices(max_slices + 1)
         else:
-            # And one more slice must not fit
-            assert check_slices(max_slices + 1) > available_memory
+            # And one more slice must not fit OR above threshold
+            assert check_slices(max_slices + 1) > available_memory or check_slices(max_slices) >= available_memory * threshold
 
 
 @pytest.mark.cupy

From 5bd55e3656403de1f0c4b65552b4dfe6c06bb26b Mon Sep 17 00:00:00 2001
From: algol <dkazanc@hotmail.com>
Date: Tue, 16 Dec 2025 17:27:43 +0000
Subject: [PATCH 7/9] linting

---
 httomo/method_wrappers/generic.py     | 4 ++--
 tests/method_wrappers/test_generic.py | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py
index 9a25bb898..855dd3cd2 100644
--- a/httomo/method_wrappers/generic.py
+++ b/httomo/method_wrappers/generic.py
@@ -479,7 +479,7 @@ def _calculate_max_slices_iterative(
         non_slice_dims_shape: Tuple[int, int],
         available_memory: int,
     ) -> int:
-        MEM_RATIO_THRESHOLD = 0.9
+        MEM_RATIO_THRESHOLD = 0.9 # 90% of the used device memory is the target
 
         def get_mem_bytes(current_slices):
             try:
@@ -516,7 +516,7 @@ def get_mem_bytes(current_slices):
                 elif memory_bytes >= available_memory * MEM_RATIO_THRESHOLD:
                     # This is "good enough", return
                     return current_slices
-            
+
                 # If linear approximation is not enough, just double every iteration
                 current_slices *= 2
             slices_high = current_slices
diff --git a/tests/method_wrappers/test_generic.py b/tests/method_wrappers/test_generic.py
index 251b4f558..dad9ff70c 100644
--- a/tests/method_wrappers/test_generic.py
+++ b/tests/method_wrappers/test_generic.py
@@ -782,7 +782,10 @@ def test_method(data):
                 check_slices(max_slices + 1)
         else:
             # And one more slice must not fit OR above threshold
-            assert check_slices(max_slices + 1) > available_memory or check_slices(max_slices) >= available_memory * threshold
+            assert (
+                check_slices(max_slices + 1) > available_memory
+                or check_slices(max_slices) >= available_memory * threshold
+            )
 
 
 @pytest.mark.cupy

From 6bd7039596df82c4efad645d0284b5f01998ea31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc=20Serf=C5=91z=C5=91?=
 <lorinc.serfozo@radwaytech.com>
Date: Wed, 17 Dec 2025 09:54:30 +0100
Subject: [PATCH 8/9] Use iterative Paganin memory estimator in sweep runner

---
 httomo/method_wrappers/generic.py         | 48 ++++---------------
 httomo/sweep_runner/param_sweep_runner.py | 21 ++++++---
 httomo/utils.py                           | 57 +++++++++++++++++++++++
 3 files changed, 80 insertions(+), 46 deletions(-)

diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py
index 855dd3cd2..ffe5dfade 100644
--- a/httomo/method_wrappers/generic.py
+++ b/httomo/method_wrappers/generic.py
@@ -14,7 +14,14 @@
     MethodRepository,
 )
 from httomo.runner.output_ref import OutputRef
-from httomo.utils import catch_gputime, catchtime, gpu_enabled, log_rank, xp
+from httomo.utils import (
+    catch_gputime,
+    catchtime,
+    gpu_enabled,
+    log_rank,
+    xp,
+    search_max_slices_iterative,
+)
 
 
 import numpy as np
@@ -479,8 +486,6 @@ def _calculate_max_slices_iterative(
         non_slice_dims_shape: Tuple[int, int],
         available_memory: int,
     ) -> int:
-        MEM_RATIO_THRESHOLD = 0.9 # 90% of the used device memory is the target
-
         def get_mem_bytes(current_slices):
             try:
                 memory_bytes = self._query.calculate_memory_bytes_for_slices(
@@ -498,42 +503,7 @@ def get_mem_bytes(current_slices):
             finally:
                 gpumem_cleanup()
 
-        # Find a number of slices that does not fit
-        current_slices = 100
-        slices_high = None
-        memory_bytes = get_mem_bytes(current_slices)
-        if memory_bytes > available_memory:
-            # Found upper limit, continue to binary search
-            slices_high = current_slices
-        else:
-            # linear approximation
-            current_slices = int(current_slices * available_memory / memory_bytes)
-            while True:
-                memory_bytes = get_mem_bytes(current_slices)
-                if memory_bytes > available_memory:
-                    # Found upper limit, continue to binary search
-                    break
-                elif memory_bytes >= available_memory * MEM_RATIO_THRESHOLD:
-                    # This is "good enough", return
-                    return current_slices
-
-                # If linear approximation is not enough, just double every iteration
-                current_slices *= 2
-            slices_high = current_slices
-
-        # Binary search between low and high
-        slices_low = 0
-        while slices_high - slices_low > 1:
-            current_slices = (slices_low + slices_high) // 2
-            memory_bytes = get_mem_bytes(current_slices)
-            if memory_bytes > available_memory:
-                slices_high = current_slices
-            elif memory_bytes >= available_memory * MEM_RATIO_THRESHOLD:
-                return current_slices
-            else:
-                slices_low = current_slices
-
-        return slices_low
+        return search_max_slices_iterative(available_memory, get_mem_bytes)
 
     def _unwrap_output_ref_values(self) -> Dict[str, Any]:
         """
diff --git a/httomo/sweep_runner/param_sweep_runner.py b/httomo/sweep_runner/param_sweep_runner.py
index c453c79b9..f200c5617 100644
--- a/httomo/sweep_runner/param_sweep_runner.py
+++ b/httomo/sweep_runner/param_sweep_runner.py
@@ -16,12 +16,12 @@
 from httomo.sweep_runner.param_sweep_block import ParamSweepBlock
 from httomo.sweep_runner.side_output_manager import SideOutputManager
 from httomo.sweep_runner.stages import NonSweepStage, Stages, SweepStage
-from httomo.utils import catchtime, log_exception, log_once
-from httomo.runner.gpu_utils import get_available_gpu_memory
+from httomo.utils import catchtime, log_exception, log_once, search_max_slices_iterative
+from httomo.runner.gpu_utils import get_available_gpu_memory, gpumem_cleanup
 from httomo.preview import PreviewConfig, PreviewDimConfig
 from httomo.runner.dataset_store_interfaces import DataSetSource
 from httomo_backends.methods_database.packages.backends.httomolibgpu.supporting_funcs.prep.phase import (
-    _calc_memory_bytes_paganin_filter,
+    _calc_memory_bytes_for_slices_paganin_filter,
 )
 
 
@@ -322,8 +322,15 @@ def _slices_to_fit_memory_Paganin(source: DataSetSource) -> int:
     angles_total = source.aux_data.angles_length
     det_X_length = source.chunk_shape[2]
 
-    (memory_bytes_method, subtract_bytes) = _calc_memory_bytes_paganin_filter(
-        (angles_total, det_X_length), dtype=np.float32()
-    )
+    def get_mem_bytes(slices):
+        try:
+            return _calc_memory_bytes_for_slices_paganin_filter(
+                (slices, angles_total, det_X_length), dtype=np.float32()
+            )
+        except:
+            return 2**64
+        finally:
+            gpumem_cleanup()
 
-    return (available_memory - subtract_bytes) // memory_bytes_method
+    gpumem_cleanup()
+    return search_max_slices_iterative(available_memory, get_mem_bytes)
diff --git a/httomo/utils.py b/httomo/utils.py
index 26436760a..585db502f 100644
--- a/httomo/utils.py
+++ b/httomo/utils.py
@@ -299,3 +299,60 @@ def mpi_abort_excepthook(type, value, traceback):
     log_rank("\n".join(format_tb(traceback)), MPI.COMM_WORLD)
     MPI.COMM_WORLD.Abort()
     sys.__excepthook__(type, value, traceback)
+
+
+def search_max_slices_iterative(
+    available_memory: int, get_mem_bytes: Callable[[int], int]
+) -> int:
+    """
+    Approximates the maximum number of fitting slices to the GPU memory for a given function.
+    The memory profile of the function must be increasing in the function of the number of slices.
+    First, a linear approximation of the memory profile is performed. If this is not accurate enough,
+    a binary search follows to determine the number of fitting slices. This function never returns a
+    number of slices for what `get_mem_bytes(slices) > available_memory`.
+
+    :param available_memory: Bytes of available device memory
+    :type available_memory: int
+    :param get_mem_bytes: A functor that produces the bytes of device memory needed for a given number of slices.
+    :type get_mem_bytes: Callable[[int], int]
+    :return: Returns the approximation of the maximum number of fitting slices.
+    :rtype: int
+    """
+    MEM_RATIO_THRESHOLD = 0.9  # 90% of the used device memory is the target
+
+    # Find a number of slices that does not fit
+    current_slices = 100
+    slices_high = None
+    memory_bytes = get_mem_bytes(current_slices)
+    if memory_bytes > available_memory:
+        # Found upper limit, continue to binary search
+        slices_high = current_slices
+    else:
+        # linear approximation
+        current_slices = int(current_slices * available_memory / memory_bytes)
+        while True:
+            memory_bytes = get_mem_bytes(current_slices)
+            if memory_bytes > available_memory:
+                # Found upper limit, continue to binary search
+                break
+            elif memory_bytes >= available_memory * MEM_RATIO_THRESHOLD:
+                # This is "good enough", return
+                return current_slices
+
+            # If linear approximation is not enough, just double every iteration
+            current_slices *= 2
+        slices_high = current_slices
+
+    # Binary search between low and high
+    slices_low = 0
+    while slices_high - slices_low > 1:
+        current_slices = (slices_low + slices_high) // 2
+        memory_bytes = get_mem_bytes(current_slices)
+        if memory_bytes > available_memory:
+            slices_high = current_slices
+        elif memory_bytes >= available_memory * MEM_RATIO_THRESHOLD:
+            return current_slices
+        else:
+            slices_low = current_slices
+
+    return slices_low

From be1e8c0c32a986280a3a7b9a7c86c04422227fd2 Mon Sep 17 00:00:00 2001
From: algol <dkazanc@hotmail.com>
Date: Thu, 18 Dec 2025 11:10:21 +0000
Subject: [PATCH 9/9] docs correction

---
 docs/source/pipelines/yaml.rst | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/docs/source/pipelines/yaml.rst b/docs/source/pipelines/yaml.rst
index 5c499ae3d..408ad3899 100644
--- a/docs/source/pipelines/yaml.rst
+++ b/docs/source/pipelines/yaml.rst
@@ -58,19 +58,6 @@ Those pipelines consist of methods from HTTomolibgpu (GPU) and HTTomolib (CPU) b
     .. literalinclude:: ../pipelines_full/FISTA3d_tomobar.yaml
         :language: yaml
 
-.. _tutorials_pl_templates_cpu:
-
-Pipelines using TomoPy library
-------------------------------
-
-One can build CPU-only pipelines by using mostly TomoPy methods. They are expected to be slower than the pipelines above.
-
-.. dropdown:: CPU pipeline using auto-centering and the gridrec reconstruction method from TomoPy.
-
-    .. literalinclude:: ../pipelines_full/tomopy_gridrec.yaml
-        :language: yaml
-
-
 .. _tutorials_pl_templates_dls:
 
 DLS-specific pipelines
@@ -95,17 +82,31 @@ Pipelines with parameter sweeps
 
 Here we demonstrate how to perform a sweep across multiple values of a single parameter (see :ref:`parameter_sweeping` for more details).
 
-.. note::  There is no need to add image saving plugin for sweep runs as it will be added automatically. It is also preferable to keep the `preview` small as the time of computation can be substantial.
+.. note::  There is no need to add image saving plugin for sweep runs as it will be added automatically. 
 
 .. dropdown:: Parameter sweep using the :code:`!SweepRange` tag to do a sweep over several CoR values of the :code:`center` parameter in the reconstruction method. 
 
    .. literalinclude:: ../pipelines_full/sweep_center_FBP3d_tomobar.yaml
        :language: yaml
-       :emphasize-lines: 34-37
+       :emphasize-lines: 36-39
 
 .. dropdown:: Parameter sweep using the :code:`!Sweep` tag over several particular values (not a range) of the :code:`ratio_delta_beta` parameter for the Paganin filter. 
 
    .. literalinclude:: ../pipelines_full/sweep_paganin_FBP3d_tomobar.yaml
        :language: yaml
-       :emphasize-lines: 53-56
+       :emphasize-lines: 51-54
             
+
+.. _tutorials_pl_templates_cpu:
+
+Pipelines using TomoPy library
+------------------------------
+
+One can build CPU-only pipelines by using mostly TomoPy methods. 
+
+.. note::  Methods from TomoPy are expected to be slower than the GPU-accelerated methods from the libraries above.
+
+.. dropdown:: CPU pipeline using auto-centering and the gridrec reconstruction method from TomoPy.
+
+    .. literalinclude:: ../pipelines_full/tomopy_gridrec.yaml
+        :language: yaml
\ No newline at end of file