Skip to content
33 changes: 17 additions & 16 deletions docs/source/pipelines/yaml.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,6 @@ Those pipelines consist of methods from HTTomolibgpu (GPU) and HTTomolib (CPU) b
.. literalinclude:: ../pipelines_full/FISTA3d_tomobar.yaml
:language: yaml

.. _tutorials_pl_templates_cpu:

Pipelines using TomoPy library
------------------------------

One can build CPU-only pipelines by using mostly TomoPy methods. They are expected to be slower than the pipelines above.

.. dropdown:: CPU pipeline using auto-centering and the gridrec reconstruction method from TomoPy.

.. literalinclude:: ../pipelines_full/tomopy_gridrec.yaml
:language: yaml


.. _tutorials_pl_templates_dls:

DLS-specific pipelines
Expand All @@ -95,17 +82,31 @@ Pipelines with parameter sweeps

Here we demonstrate how to perform a sweep across multiple values of a single parameter (see :ref:`parameter_sweeping` for more details).

.. note:: There is no need to add image saving plugin for sweep runs as it will be added automatically. It is also preferable to keep the `preview` small as the time of computation can be substantial.
.. note:: There is no need to add image saving plugin for sweep runs as it will be added automatically.

.. dropdown:: Parameter sweep using the :code:`!SweepRange` tag to do a sweep over several CoR values of the :code:`center` parameter in the reconstruction method.

.. literalinclude:: ../pipelines_full/sweep_center_FBP3d_tomobar.yaml
:language: yaml
:emphasize-lines: 34-37
:emphasize-lines: 36-39

.. dropdown:: Parameter sweep using the :code:`!Sweep` tag over several particular values (not a range) of the :code:`ratio_delta_beta` parameter for the Paganin filter.

.. literalinclude:: ../pipelines_full/sweep_paganin_FBP3d_tomobar.yaml
:language: yaml
:emphasize-lines: 53-56
:emphasize-lines: 51-54


.. _tutorials_pl_templates_cpu:

Pipelines using TomoPy library
------------------------------

One can build CPU-only pipelines by using mostly TomoPy methods.

.. note:: Methods from TomoPy are expected to be slower than the GPU-accelerated methods from the libraries above.

.. dropdown:: CPU pipeline using auto-centering and the gridrec reconstruction method from TomoPy.

.. literalinclude:: ../pipelines_full/tomopy_gridrec.yaml
:language: yaml
45 changes: 44 additions & 1 deletion httomo/method_wrappers/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,14 @@
MethodRepository,
)
from httomo.runner.output_ref import OutputRef
from httomo.utils import catch_gputime, catchtime, gpu_enabled, log_rank, xp
from httomo.utils import (
catch_gputime,
catchtime,
gpu_enabled,
log_rank,
xp,
search_max_slices_iterative,
)


import numpy as np
Expand Down Expand Up @@ -447,6 +454,17 @@ def calculate_max_slices(
* np.prod(non_slice_dims_shape)
* data_dtype.itemsize
)
elif self.memory_gpu.method == "iterative":
# The iterative method may use the GPU
assert gpu_enabled, "GPU method used on a system without GPU support"
with xp.cuda.Device(self._gpu_id):
gpumem_cleanup()
return (
self._calculate_max_slices_iterative(
data_dtype, non_slice_dims_shape, available_memory
),
available_memory,
)
else:
(
memory_bytes_method,
Expand All @@ -462,6 +480,31 @@ def calculate_max_slices(
available_memory - subtract_bytes
) // memory_bytes_method, available_memory

def _calculate_max_slices_iterative(
self,
data_dtype: np.dtype,
non_slice_dims_shape: Tuple[int, int],
available_memory: int,
) -> int:
def get_mem_bytes(current_slices):
try:
memory_bytes = self._query.calculate_memory_bytes_for_slices(
dims_shape=(
current_slices,
non_slice_dims_shape[0],
non_slice_dims_shape[1],
),
dtype=data_dtype,
**self._unwrap_output_ref_values(),
)
return memory_bytes
except:
return 2**64
finally:
gpumem_cleanup()

return search_max_slices_iterative(available_memory, get_mem_bytes)

def _unwrap_output_ref_values(self) -> Dict[str, Any]:
"""
Iterate through params in `self.config_params` and, for any value of type `OutputRef`,
Expand Down
6 changes: 6 additions & 0 deletions httomo/runner/methods_repository_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ def calculate_memory_bytes(
"""Calculate the memory required in bytes, returning bytes method and subtract bytes tuple"""
... # pragma: no cover

def calculate_memory_bytes_for_slices(
self, dims_shape: Tuple[int, int, int], dtype: np.dtype, **kwargs
) -> int:
"""Calculate the memory required in bytes for a given 3D grid"""
... # pragma: no cover

def calculate_output_dims(
self, non_slice_dims_shape: Tuple[int, int], **kwargs
) -> Tuple[int, int]:
Expand Down
21 changes: 14 additions & 7 deletions httomo/sweep_runner/param_sweep_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
from httomo.sweep_runner.param_sweep_block import ParamSweepBlock
from httomo.sweep_runner.side_output_manager import SideOutputManager
from httomo.sweep_runner.stages import NonSweepStage, Stages, SweepStage
from httomo.utils import catchtime, log_exception, log_once
from httomo.runner.gpu_utils import get_available_gpu_memory
from httomo.utils import catchtime, log_exception, log_once, search_max_slices_iterative
from httomo.runner.gpu_utils import get_available_gpu_memory, gpumem_cleanup
from httomo.preview import PreviewConfig, PreviewDimConfig
from httomo.runner.dataset_store_interfaces import DataSetSource
from httomo_backends.methods_database.packages.backends.httomolibgpu.supporting_funcs.prep.phase import (
_calc_memory_bytes_paganin_filter,
_calc_memory_bytes_for_slices_paganin_filter,
)


Expand Down Expand Up @@ -322,8 +322,15 @@ def _slices_to_fit_memory_Paganin(source: DataSetSource) -> int:
angles_total = source.aux_data.angles_length
det_X_length = source.chunk_shape[2]

(memory_bytes_method, subtract_bytes) = _calc_memory_bytes_paganin_filter(
(angles_total, det_X_length), dtype=np.float32()
)
def get_mem_bytes(slices):
try:
return _calc_memory_bytes_for_slices_paganin_filter(
(slices, angles_total, det_X_length), dtype=np.float32()
)
except:
return 2**64
finally:
gpumem_cleanup()

return (available_memory - subtract_bytes) // memory_bytes_method
gpumem_cleanup()
return search_max_slices_iterative(available_memory, get_mem_bytes)
57 changes: 57 additions & 0 deletions httomo/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,3 +299,60 @@ def mpi_abort_excepthook(type, value, traceback):
log_rank("\n".join(format_tb(traceback)), MPI.COMM_WORLD)
MPI.COMM_WORLD.Abort()
sys.__excepthook__(type, value, traceback)


def search_max_slices_iterative(
available_memory: int, get_mem_bytes: Callable[[int], int]
) -> int:
"""
Approximates the maximum number of fitting slices to the GPU memory for a given function.
The memory profile of the function must be increasing in the function of the number of slices.
First, a linear approximation of the memory profile is performed. If this is not accurate enough,
a binary search follows to determine the number of fitting slices. This function never returns a
number of slices for what `get_mem_bytes(slices) > available_memory`.

:param available_memory: Bytes of available device memory
:type available_memory: int
:param get_mem_bytes: A functor that produces the bytes of device memory needed for a given number of slices.
:type get_mem_bytes: Callable[[int], int]
:return: Returns the approximation of the maximum number of fitting slices.
:rtype: int
"""
MEM_RATIO_THRESHOLD = 0.9 # 90% of the used device memory is the target

# Find a number of slices that does not fit
current_slices = 100
slices_high = None
memory_bytes = get_mem_bytes(current_slices)
if memory_bytes > available_memory:
# Found upper limit, continue to binary search
slices_high = current_slices
else:
# linear approximation
current_slices = int(current_slices * available_memory / memory_bytes)
while True:
memory_bytes = get_mem_bytes(current_slices)
if memory_bytes > available_memory:
# Found upper limit, continue to binary search
break
elif memory_bytes >= available_memory * MEM_RATIO_THRESHOLD:
# This is "good enough", return
return current_slices

# If linear approximation is not enough, just double every iteration
current_slices *= 2
slices_high = current_slices

# Binary search between low and high
slices_low = 0
while slices_high - slices_low > 1:
current_slices = (slices_low + slices_high) // 2
memory_bytes = get_mem_bytes(current_slices)
if memory_bytes > available_memory:
slices_high = current_slices
elif memory_bytes >= available_memory * MEM_RATIO_THRESHOLD:
return current_slices
else:
slices_low = current_slices

return slices_low
101 changes: 100 additions & 1 deletion tests/method_wrappers/test_generic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Optional, Union
from typing import Callable, List, Optional, Union

import numpy as np
from httomo.method_wrappers import make_method_wrapper
Expand Down Expand Up @@ -689,6 +689,105 @@ def test_method(data):
assert available_memory == 1_000_000_000


def _linear_mem(*args, **kwargs):
proj, x, y = kwargs["dims_shape"]
dtype = kwargs["dtype"]
return proj * x * y * dtype.itemsize


def _linear_offset_mem(*args, **kwargs):
proj, x, y = kwargs["dims_shape"]
dtype = kwargs["dtype"]
return (x * y + proj * x * y + proj * x**2) * dtype.itemsize


def _quadratic_mem(*args, **kwargs):
proj, x, y = kwargs["dims_shape"]
dtype = kwargs["dtype"]
return (4 * x * y + proj * proj * x * y) * dtype.itemsize


THROW_OVER_SLICES = 77


def _quadratic_mem_throws(*args, **kwargs):
proj, x, y = kwargs["dims_shape"]
dtype = kwargs["dtype"]
if proj > THROW_OVER_SLICES:
raise Exception("Memory estimator failed")
return (4 * x * y + proj * proj * x * y) * dtype.itemsize


@pytest.mark.cupy
@pytest.mark.parametrize("available_memory", [0, 1_000, 1_000_000, 1_000_000_000])
@pytest.mark.parametrize(
"memcalc_fn",
[_linear_mem, _linear_offset_mem, _quadratic_mem, _quadratic_mem_throws],
)
def test_generic_calculate_max_slices_iterative(
mocker: MockerFixture,
dummy_block: DataSetBlock,
available_memory: int,
memcalc_fn: Callable,
):
class FakeModule:
def test_method(data):
return data

mocker.patch(
"httomo.method_wrappers.generic.import_module", return_value=FakeModule
)

memory_gpu = GpuMemoryRequirement(multiplier=None, method="iterative")
repo = make_mock_repo(
mocker,
pattern=Pattern.projection,
output_dims_change=True,
implementation="gpu_cupy",
memory_gpu=memory_gpu,
)

memcalc_mock = mocker.patch.object(
repo.query("", ""), "calculate_memory_bytes_for_slices", side_effect=memcalc_fn
)
wrp = make_method_wrapper(
repo,
"mocked_module_path",
"test_method",
MPI.COMM_WORLD,
make_mock_preview_config(mocker),
)
shape_t = list(dummy_block.chunk_shape)
shape_t.pop(0)
shape = (shape_t[0], shape_t[1])
max_slices, _ = wrp.calculate_max_slices(
dummy_block.data.dtype,
shape,
available_memory,
)

check_slices = lambda slices: memcalc_mock(
dims_shape=(slices, shape[0], shape[1]), dtype=dummy_block.data.dtype
)
threshold = 0.9
if check_slices(1) > available_memory:
# If zero slice fits
assert max_slices == 0
else:
# Computed slices must fit in the available memory
assert check_slices(max_slices) <= available_memory

if memcalc_fn == _quadratic_mem_throws and max_slices + 1 >= THROW_OVER_SLICES:
with pytest.raises(Exception):
check_slices(max_slices + 1)
else:
# And one more slice must not fit OR above threshold
assert (
check_slices(max_slices + 1) > available_memory
or check_slices(max_slices) >= available_memory * threshold
)


@pytest.mark.cupy
def test_generic_calculate_output_dims(mocker: MockerFixture):
class FakeModule:
Expand Down