From fab6b0fc9d92385f9f9037ce05627426a01f85b0 Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Mon, 17 Nov 2025 15:07:35 +0100 Subject: [PATCH 01/20] Remove redundant CUDA synchronization --- src/iterative.F90 | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/iterative.F90 b/src/iterative.F90 index 0eeb599..06910ed 100644 --- a/src/iterative.F90 +++ b/src/iterative.F90 @@ -1846,9 +1846,6 @@ subroutine calculate_transmissions_and_dos(negf, Ec, SelfEneR, GS, tun_proj, tun call calculate_Gr_tridiag_blocks(negf,ESH,gsmr,Gr,1) call calculate_Gr_tridiag_blocks(negf,ESH,gsmr,Gr,2,nbl) -#:if defined("GPU") - call waitForGPU() -#:endif !Computation of transmission(s) between contacts ni(:) -> nf(:) #:if defined("GPU") call waitForGPU() From d9c145696a589e8e2e02feb8524ae76264401014 Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Mon, 17 Nov 2025 15:40:20 +0100 Subject: [PATCH 02/20] Ensure asynchronous GPU code has finished --- src/iterative.F90 | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/iterative.F90 b/src/iterative.F90 index 06910ed..506af78 100644 --- a/src/iterative.F90 +++ b/src/iterative.F90 @@ -1741,6 +1741,9 @@ subroutine calculate_transmissions(negf, Ec, SelfEneR, tun_proj, tun_mat) if (nt.gt.1) then call calculate_Gr_tridiag_blocks(negf,ESH,gsmr,Gr,2,nt) end if +#:if defined("GPU") + call waitForGPU() +#:endif else ! When more contacts are present sometimes we can re-use previous GF ! if nt1 > nt extend the Gr calculation @@ -1749,6 +1752,9 @@ subroutine calculate_transmissions(negf, Ec, SelfEneR, tun_proj, tun_mat) nt = nt1 endif end if +#:if defined("GPU") + call waitForGPU() +#:endif call calculate_single_transmission_N_contacts(negf,nit,nft,ESH,SelfEneR,cblk,negf%tun_proj,gsmr,Gr,tun) From dcb8fe871827a23be004a6e157f3fc98aa20fa79 Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Mon, 17 Nov 2025 18:59:49 +0100 Subject: [PATCH 03/20] Add a cmake-format configuration file --- .cmake-format.py | 241 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 .cmake-format.py diff --git a/.cmake-format.py b/.cmake-format.py new file mode 100644 index 0000000..e688e0c --- /dev/null +++ b/.cmake-format.py @@ -0,0 +1,241 @@ +# ---------------------------------- +# Options affecting listfile parsing +# ---------------------------------- +with section("parse"): + + # Specify structure for custom cmake functions + additional_commands = { 'foo': { 'flags': ['BAR', 'BAZ'], + 'kwargs': {'DEPENDS': '*', 'HEADERS': '*', 'SOURCES': '*'}}} + + # Override configurations per-command where available + override_spec = {} + + # Specify variable tags. + vartags = [] + + # Specify property tags. + proptags = [] + +# ----------------------------- +# Options affecting formatting. +# ----------------------------- +with section("format"): + + # Disable formatting entirely, making cmake-format a no-op + disable = False + + # How wide to allow formatted cmake files + line_width = 100 + + # How many spaces to tab for indent + tab_size = 2 + + # If true, lines are indented using tab characters (utf-8 0x09) instead of + # space characters (utf-8 0x20). In cases where the layout would + # require a fractional tab character, the behavior of the fractional + # indentation is governed by + use_tabchars = False + + # If is True, then the value of this variable indicates how + # fractional indentions are handled during whitespace replacement. If set to + # 'use-space', fractional indentation is left as spaces (utf-8 0x20). If set + # to `round-up` fractional indentation is replaced with a single tab character + # (utf-8 0x09) effectively shifting the column to the next tabstop + fractional_tab_policy = 'use-space' + + # If an argument group contains more than this many sub-groups (parg or kwarg + # groups) then force it to a vertical layout. + max_subgroups_hwrap = 2 + + # If a positional argument group contains more than this many arguments, then + # force it to a vertical layout. + max_pargs_hwrap = 6 + + # If a cmdline positional group consumes more than this many lines without + # nesting, then invalidate the layout (and nest) + max_rows_cmdline = 2 + + # If true, separate flow control names from their parentheses with a space + separate_ctrl_name_with_space = False + + # If true, separate function names from parentheses with a space + separate_fn_name_with_space = False + + # If a statement is wrapped to more than one line, than dangle the closing + # parenthesis on its own line. + dangle_parens = True + + # If the trailing parenthesis must be 'dangled' on its on line, then align it + # to this reference: `prefix`: the start of the statement, `prefix-indent`: + # the start of the statement, plus one indentation level, `child`: align to + # the column of the arguments + dangle_align = 'prefix' + + # If the statement spelling length (including space and parenthesis) is + # smaller than this amount, then force reject nested layouts. + min_prefix_chars = 4 + + # If the statement spelling length (including space and parenthesis) is larger + # than the tab width by more than this amount, then force reject un-nested + # layouts. + max_prefix_chars = 10 + + # If a candidate layout is wrapped horizontally but it exceeds this many + # lines, then reject the layout. + max_lines_hwrap = 2 + + # What style line endings to use in the output. + line_ending = 'unix' + + # Format command names consistently as 'lower' or 'upper' case + command_case = 'canonical' + + # Format keywords consistently as 'lower' or 'upper' case + keyword_case = 'unchanged' + + # A list of command names which should always be wrapped + always_wrap = [] + + # If true, the argument lists which are known to be sortable will be sorted + # lexicographicall + enable_sort = True + + # If true, the parsers may infer whether or not an argument list is sortable + # (without annotation). + autosort = False + + # By default, if cmake-format cannot successfully fit everything into the + # desired linewidth it will apply the last, most aggressive attempt that it + # made. If this flag is True, however, cmake-format will print error, exit + # with non-zero status code, and write-out nothing + require_valid_layout = False + + # A dictionary mapping layout nodes to a list of wrap decisions. See the + # documentation for more information. + layout_passes = {} + +# ------------------------------------------------ +# Options affecting comment reflow and formatting. +# ------------------------------------------------ +with section("markup"): + + # What character to use for bulleted lists + bullet_char = '*' + + # What character to use as punctuation after numerals in an enumerated list + enum_char = '.' + + # If comment markup is enabled, don't reflow the first comment block in each + # listfile. Use this to preserve formatting of your copyright/license + # statements. + first_comment_is_literal = False + + # If comment markup is enabled, don't reflow any comment block which matches + # this (regex) pattern. Default is `None` (disabled). + literal_comment_pattern = None + + # Regular expression to match preformat fences in comments default= + # ``r'^\s*([`~]{3}[`~]*)(.*)$'`` + fence_pattern = '^\\s*([`~]{3}[`~]*)(.*)$' + + # Regular expression to match rulers in comments default= + # ``r'^\s*[^\w\s]{3}.*[^\w\s]{3}$'`` + ruler_pattern = '^\\s*[^\\w\\s]{3}.*[^\\w\\s]{3}$' + + # If a comment line matches starts with this pattern then it is explicitly a + # trailing comment for the preceding argument. Default is '#<' + explicit_trailing_pattern = '#<' + + # If a comment line starts with at least this many consecutive hash + # characters, then don't lstrip() them off. This allows for lazy hash rulers + # where the first hash char is not separated by space + hashruler_min_length = 10 + + # If true, then insert a space between the first hash char and remaining hash + # chars in a hash ruler, and normalize its length to fill the column + canonicalize_hashrulers = True + + # enable comment markup parsing and reflow + enable_markup = True + +# ---------------------------- +# Options affecting the linter +# ---------------------------- +with section("lint"): + + # a list of lint codes to disable + disabled_codes = [] + + # regular expression pattern describing valid function names + function_pattern = '[0-9a-z_]+' + + # regular expression pattern describing valid macro names + macro_pattern = '[0-9A-Z_]+' + + # regular expression pattern describing valid names for variables with global + # (cache) scope + global_var_pattern = '[A-Z][0-9A-Z_]+' + + # regular expression pattern describing valid names for variables with global + # scope (but internal semantic) + internal_var_pattern = '_[A-Z][0-9A-Z_]+' + + # regular expression pattern describing valid names for variables with local + # scope + local_var_pattern = '[a-z][a-z0-9_]+' + + # regular expression pattern describing valid names for privatedirectory + # variables + private_var_pattern = '_[0-9a-z_]+' + + # regular expression pattern describing valid names for public directory + # variables + public_var_pattern = '[A-Z][0-9A-Z_]+' + + # regular expression pattern describing valid names for function/macro + # arguments and loop variables. + argument_var_pattern = '[a-z][a-z0-9_]+' + + # regular expression pattern describing valid names for keywords used in + # functions or macros + keyword_pattern = '[A-Z][0-9A-Z_]+' + + # In the heuristic for C0201, how many conditionals to match within a loop in + # before considering the loop a parser. + max_conditionals_custom_parser = 2 + + # Require at least this many newlines between statements + min_statement_spacing = 1 + + # Require no more than this many newlines between statements + max_statement_spacing = 2 + max_returns = 6 + max_branches = 12 + max_arguments = 5 + max_localvars = 15 + max_statements = 50 + +# ------------------------------- +# Options affecting file encoding +# ------------------------------- +with section("encode"): + + # If true, emit the unicode byte-order mark (BOM) at the start of the file + emit_byteorder_mark = False + + # Specify the encoding of the input file. Defaults to utf-8 + input_encoding = 'utf-8' + + # Specify the encoding of the output file. Defaults to utf-8. Note that cmake + # only claims to support utf-8 so be careful when using anything else + output_encoding = 'utf-8' + +# ------------------------------------- +# Miscellaneous configurations options. +# ------------------------------------- +with section("misc"): + + # A dictionary containing any per-command configuration overrides. Currently + # only `command_case` is supported. + per_command = {} + From dc32df553b6c267ed5eef13b5561c6b7a7549a3d Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Mon, 17 Nov 2025 19:00:02 +0100 Subject: [PATCH 04/20] CMake: format a file with cmake-format --- tests/CMakeLists.txt | 170 +++++++++++++++++++++---------------------- 1 file changed, 83 insertions(+), 87 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f7da815..fd9eafc 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -2,126 +2,122 @@ find_program(GIT_LFS git-lfs) if(NOT GIT_LFS) - message(FATAL_ERROR - "Tests cannot be run because git-lfs is missing. " - "Disable testing or install git-lfs." + message(FATAL_ERROR "Tests cannot be run because git-lfs is missing. " + "Disable testing or install git-lfs." ) endif() - # List of active tets. Each test indicates a subdirectory. set(test-directories - f90init - c_mpi_init - cpp_mpi_init - c_int - c_int_elph_deph - c_int_file - f90int - f90int_file - f90_transmission - f90elph_deph - f90read_hs - f90Si2x2 - f90Si_nin - f90Si_nin_40pl - ) + f90init + c_mpi_init + cpp_mpi_init + c_int + c_int_elph_deph + c_int_file + f90int + f90int_file + f90_transmission + f90elph_deph + f90read_hs + f90Si2x2 + f90Si_nin + f90Si_nin_40pl +) if(WITH_TRANSPORT_GPU) list(APPEND test-directories testCUDA) list(APPEND test-directories testCUDA_decimation) endif() -# Define a function wich copies over the content of the tests. -# This is used to support out-of-source build, as the test -# directory contain input files which we don't want to specify. +# Define a function wich copies over the content of the tests. This is used to support out-of-source +# build, as the test directory contain input files which we don't want to specify. function(transfer_test_data testname) - if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}") - add_custom_target(${testname}-data-transfer) - else() - add_custom_target( - ${testname}-data-transfer - COMMENT "Copying ${CMAKE_CURRENT_SOURCE_DIR} for out-of-source build." - COMMAND cp -f ${CMAKE_CURRENT_SOURCE_DIR}/* ${CMAKE_CURRENT_BINARY_DIR}/) - endif() - add_dependencies(${testname} ${testname}-data-transfer) + if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}") + add_custom_target(${testname}-data-transfer) + else() + add_custom_target( + ${testname}-data-transfer + COMMENT "Copying ${CMAKE_CURRENT_SOURCE_DIR} for out-of-source build." + COMMAND cp -f ${CMAKE_CURRENT_SOURCE_DIR}/* ${CMAKE_CURRENT_BINARY_DIR}/ + ) + endif() + add_dependencies(${testname} ${testname}-data-transfer) endfunction() function(unzip_test_data testname) - find_program(XZ NAMES xz) - if(NOT XZ) - message(FATAL_ERROR "cannot run test '${testname}' because xz executable is missing") - endif() + find_program(XZ NAMES xz) + if(NOT XZ) + message(FATAL_ERROR "cannot run test '${testname}' because xz executable is missing") + endif() - if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}") - add_custom_target(${testname}-data-unzip) - else() - add_custom_target( - ${testname}-data-unzip - COMMENT "unzip ${CMAKE_CURRENT_SOURCE_DIR} for out-of-source build." - COMMAND tar -xJf ${CMAKE_CURRENT_SOURCE_DIR}/*.tar.xz -C ${CMAKE_CURRENT_BINARY_DIR}) - endif() - add_dependencies(${testname}-data-unzip ${testname}-data-transfer) - add_dependencies(${testname} ${testname}-data-unzip) + if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}") + add_custom_target(${testname}-data-unzip) + else() + add_custom_target( + ${testname}-data-unzip + COMMENT "unzip ${CMAKE_CURRENT_SOURCE_DIR} for out-of-source build." + COMMAND tar -xJf ${CMAKE_CURRENT_SOURCE_DIR}/*.tar.xz -C ${CMAKE_CURRENT_BINARY_DIR} + ) + endif() + add_dependencies(${testname}-data-unzip ${testname}-data-transfer) + add_dependencies(${testname} ${testname}-data-unzip) endfunction() # A function to setup C++ tests. function(setup_cpp_test testname label source) - message(STATUS "add test: " ${testname} " " ${source} " " ${label}) - add_executable(${testname} ${source}) - transfer_test_data(${testname}) - target_link_libraries(${testname} MPI::MPI_CXX) - target_link_libraries(${testname} negf) - target_link_libraries(${testname} LAPACK::LAPACK) - target_include_directories(${testname} PRIVATE ${CMAKE_BINARY_DIR}/src/api) - add_test( - NAME ${testname} - COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 2 ./${testname}) - set_property(TEST ${testname} PROPERTY LABELS ${label}) + message(STATUS "add test: " ${testname} " " ${source} " " ${label}) + add_executable(${testname} ${source}) + transfer_test_data(${testname}) + target_link_libraries(${testname} MPI::MPI_CXX) + target_link_libraries(${testname} negf) + target_link_libraries(${testname} LAPACK::LAPACK) + target_include_directories(${testname} PRIVATE ${CMAKE_BINARY_DIR}/src/api) + add_test(NAME ${testname} COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 2 ./${testname}) + set_property(TEST ${testname} PROPERTY LABELS ${label}) endfunction() # A function to setup C tests. function(setup_c_test testname label source) - message(STATUS "add test: " ${testname} " " ${source} " " ${label}) - add_executable(${testname} ${source} ${ARGN}) - transfer_test_data(${testname}) - target_link_libraries(${testname} MPI::MPI_C) - target_link_libraries(${testname} negf) - target_link_libraries(${testname} LAPACK::LAPACK) - target_include_directories(${testname} PRIVATE ${CMAKE_BINARY_DIR}/src/api) - add_test(${testname} ${testname}) - set_property(TEST ${testname} PROPERTY LABELS ${label}) + message(STATUS "add test: " ${testname} " " ${source} " " ${label}) + add_executable(${testname} ${source} ${ARGN}) + transfer_test_data(${testname}) + target_link_libraries(${testname} MPI::MPI_C) + target_link_libraries(${testname} negf) + target_link_libraries(${testname} LAPACK::LAPACK) + target_include_directories(${testname} PRIVATE ${CMAKE_BINARY_DIR}/src/api) + add_test(${testname} ${testname}) + set_property(TEST ${testname} PROPERTY LABELS ${label}) endfunction() # A function to setup F90 tests. function(setup_f90_test testname label source) - message(STATUS "add test: " ${testname} " " ${source} " " ${label}) - add_executable(${testname} ${source} ${ARGN}) - transfer_test_data(${testname}) - target_link_libraries(${testname} MPI::MPI_Fortran) - target_link_libraries(${testname} negf) - target_link_libraries(${testname} LAPACK::LAPACK) - target_include_directories(${testname} PRIVATE ${BUILD_MOD_DIR}) - add_test(${testname} ${testname}) - set_property(TEST ${testname} PROPERTY LABELS ${label}) + message(STATUS "add test: " ${testname} " " ${source} " " ${label}) + add_executable(${testname} ${source} ${ARGN}) + transfer_test_data(${testname}) + target_link_libraries(${testname} MPI::MPI_Fortran) + target_link_libraries(${testname} negf) + target_link_libraries(${testname} LAPACK::LAPACK) + target_include_directories(${testname} PRIVATE ${BUILD_MOD_DIR}) + add_test(${testname} ${testname}) + set_property(TEST ${testname} PROPERTY LABELS ${label}) endfunction() # A function to setup F90 tests with MPI. function(setup_f90_mpi_test testname label source) - message(STATUS "add test: " ${testname} " " ${source} " " ${label}) - add_executable(${testname} ${source} ${ARGN}) - transfer_test_data(${testname}) - target_link_libraries(${testname} MPI::MPI_Fortran) - target_link_libraries(${testname} negf) - target_link_libraries(${testname} LAPACK::LAPACK) - target_include_directories(${testname} PRIVATE ${BUILD_MOD_DIR}) - add_test( - NAME ${testname} - COMMAND env OMP_NUM_THREADS=1 ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${MPIEXEC_MAX_NUMPROCS} ./${testname}) - set_property(TEST ${testname} PROPERTY LABELS ${label}) + message(STATUS "add test: " ${testname} " " ${source} " " ${label}) + add_executable(${testname} ${source} ${ARGN}) + transfer_test_data(${testname}) + target_link_libraries(${testname} MPI::MPI_Fortran) + target_link_libraries(${testname} negf) + target_link_libraries(${testname} LAPACK::LAPACK) + target_include_directories(${testname} PRIVATE ${BUILD_MOD_DIR}) + add_test(NAME ${testname} COMMAND env OMP_NUM_THREADS=1 ${MPIEXEC_EXECUTABLE} + ${MPIEXEC_NUMPROC_FLAG} ${MPIEXEC_MAX_NUMPROCS} ./${testname} + ) + set_property(TEST ${testname} PROPERTY LABELS ${label}) endfunction() - foreach(test-directory IN LISTS test-directories) - add_subdirectory(${test-directory}) + add_subdirectory(${test-directory}) endforeach() From ed8a8afa150454a29eec03218225cb4fce8064aa Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Tue, 18 Nov 2025 15:45:09 +0100 Subject: [PATCH 05/20] cmake-format: avoid breaking early --- .cmake-format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cmake-format.py b/.cmake-format.py index e688e0c..e80ab56 100644 --- a/.cmake-format.py +++ b/.cmake-format.py @@ -49,7 +49,7 @@ # If a positional argument group contains more than this many arguments, then # force it to a vertical layout. - max_pargs_hwrap = 6 + max_pargs_hwrap = 99 # If a cmdline positional group consumes more than this many lines without # nesting, then invalidate the layout (and nest) From 0ebd0912e376fd6db4d80a24a5ea63b8fe64bbd7 Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Tue, 18 Nov 2025 16:49:07 +0100 Subject: [PATCH 06/20] Tests: revamp test creation Use a single function for adding tests. Features: * infer programming language of test from first source file * add keyword arguments for MPI, GPU-only tests * infer CMake test labels from keyword arguments * set the CMake `PROCESSORS` property of tests * add option for running all tests with mpiexec/mpirun * only show "add test" with log level DEBUG (`cmake --log-level=debug`) --- tests/CMakeLists.txt | 144 +++++++++++++---------- tests/c_int/CMakeLists.txt | 2 +- tests/c_int_elph_deph/CMakeLists.txt | 2 +- tests/c_int_file/CMakeLists.txt | 2 +- tests/c_mpi_init/CMakeLists.txt | 2 +- tests/cpp_mpi_init/CMakeLists.txt | 2 +- tests/f90Si2x2/CMakeLists.txt | 10 +- tests/f90Si_nin/CMakeLists.txt | 10 +- tests/f90Si_nin_40pl/CMakeLists.txt | 10 +- tests/f90_transmission/CMakeLists.txt | 2 +- tests/f90elph_deph/CMakeLists.txt | 2 +- tests/f90init/CMakeLists.txt | 2 +- tests/f90int/CMakeLists.txt | 2 +- tests/f90int_file/CMakeLists.txt | 2 +- tests/f90read_hs/CMakeLists.txt | 8 +- tests/testCUDA/CMakeLists.txt | 2 +- tests/testCUDA_decimation/CMakeLists.txt | 6 +- 17 files changed, 100 insertions(+), 110 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index fd9eafc..577f04d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,3 +1,9 @@ +# On some HPC systems (e.g., LUMI), front-end nodes do not feature GPUs or the front-end nodes may +# have an instruction set architecture different from compute nodes (e.g., on Fugaku with its Intel +# front-end and Arm64FX compute nodes). On these systems, GPU tests or all tests, respectively, have +# to be submitted with mpiexec/mpirun. +option(LIBNEGF_TEST_WITH_MPIEXEC "Always launch tests with mpiexec/mpirun" OFF) + # Test input files may be large and are therefore managed by git-lfs. find_program(GIT_LFS git-lfs) @@ -9,20 +15,8 @@ endif() # List of active tets. Each test indicates a subdirectory. set(test-directories - f90init - c_mpi_init - cpp_mpi_init - c_int - c_int_elph_deph - c_int_file - f90int - f90int_file - f90_transmission - f90elph_deph - f90read_hs - f90Si2x2 - f90Si_nin - f90Si_nin_40pl + f90init c_mpi_init cpp_mpi_init c_int c_int_elph_deph c_int_file f90int f90int_file + f90_transmission f90elph_deph f90read_hs f90Si2x2 f90Si_nin f90Si_nin_40pl ) if(WITH_TRANSPORT_GPU) @@ -64,58 +58,88 @@ function(unzip_test_data testname) add_dependencies(${testname} ${testname}-data-unzip) endfunction() -# A function to setup C++ tests. -function(setup_cpp_test testname label source) - message(STATUS "add test: " ${testname} " " ${source} " " ${label}) - add_executable(${testname} ${source}) - transfer_test_data(${testname}) - target_link_libraries(${testname} MPI::MPI_CXX) - target_link_libraries(${testname} negf) - target_link_libraries(${testname} LAPACK::LAPACK) - target_include_directories(${testname} PRIVATE ${CMAKE_BINARY_DIR}/src/api) - add_test(NAME ${testname} COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 2 ./${testname}) - set_property(TEST ${testname} PROPERTY LABELS ${label}) -endfunction() +function(setup_test testname source_main) + set(options GPUONLY MPI) + set(multi_value_args SOURCES LABELS) + cmake_parse_arguments(PARSE_ARGV 2 LIBNEGF_TEST "${options}" "" "${multi_value_args}") -# A function to setup C tests. -function(setup_c_test testname label source) - message(STATUS "add test: " ${testname} " " ${source} " " ${label}) - add_executable(${testname} ${source} ${ARGN}) - transfer_test_data(${testname}) - target_link_libraries(${testname} MPI::MPI_C) - target_link_libraries(${testname} negf) - target_link_libraries(${testname} LAPACK::LAPACK) - target_include_directories(${testname} PRIVATE ${CMAKE_BINARY_DIR}/src/api) - add_test(${testname} ${testname}) - set_property(TEST ${testname} PROPERTY LABELS ${label}) -endfunction() + if(LIBNEGF_TEST_UNPARSED_ARGUMENTS) + message(WARNING "could not parse the following arguments: ${LIBNEGF_TEST_UNPARSED_ARGUMENTS}") + endif() + if(LIBNEGF_TEST_KEYWORDS_MISSING_VALUES) + message( + WARNING + "the following keyword arguments are missing values: ${LIBNEGF_TEST_KEYWORDS_MISSING_VALUES}" + ) + endif() -# A function to setup F90 tests. -function(setup_f90_test testname label source) - message(STATUS "add test: " ${testname} " " ${source} " " ${label}) - add_executable(${testname} ${source} ${ARGN}) - transfer_test_data(${testname}) - target_link_libraries(${testname} MPI::MPI_Fortran) + list(PREPEND LIBNEGF_TEST_SOURCES ${source_main}) + if(LIBNEGF_TEST_GPUONLY) + list(APPEND LIBNEGF_TEST_LABELS gpu-only) + endif() + if(LIBNEGF_TEST_MPI) + list(APPEND LIBNEGF_TEST_LABELS mpi) + endif() + + # detect implementation language + if(source_main MATCHES "[.]c$") + set(LIBNEGF_TEST_LANG C) + elseif(source_main MATCHES "[.]cpp$") + set(LIBNEGF_TEST_LANG CXX) + elseif(source_main MATCHES "[.](f|F)90$") + set(LIBNEGF_TEST_LANG Fortran) + else() + message(SEND_ERROR "unknown file extension of test source ${source_main}") + endif() + + message(DEBUG "add test: ${testname} ${LIBNEGF_TEST_SOURCES} ${LIBNEGF_TEST_LABELS}") + + # set up test compilation + add_executable(${testname} ${LIBNEGF_TEST_SOURCES}) + + if(LIBNEGF_TEST_LANG STREQUAL C) + target_link_libraries(${testname} MPI::MPI_C) + target_include_directories(${testname} PRIVATE ${CMAKE_BINARY_DIR}/src/api) + elseif(LIBNEGF_TEST_LANG STREQUAL CXX) + target_link_libraries(${testname} MPI::MPI_CXX) + target_include_directories(${testname} PRIVATE ${CMAKE_BINARY_DIR}/src/api) + elseif(LIBNEGF_TEST_LANG STREQUAL Fortran) + target_link_libraries(${testname} MPI::MPI_Fortran) + target_include_directories(${testname} PRIVATE ${BUILD_MOD_DIR}) + endif() target_link_libraries(${testname} negf) target_link_libraries(${testname} LAPACK::LAPACK) - target_include_directories(${testname} PRIVATE ${BUILD_MOD_DIR}) - add_test(${testname} ${testname}) - set_property(TEST ${testname} PROPERTY LABELS ${label}) -endfunction() -# A function to setup F90 tests with MPI. -function(setup_f90_mpi_test testname label source) - message(STATUS "add test: " ${testname} " " ${source} " " ${label}) - add_executable(${testname} ${source} ${ARGN}) + # determine how to execute the test + if(${LIBNEGF_TEST_MPI}) + set(num_procs ${MPIEXEC_MAX_NUMPROCS}) + set(cmd + ${MPIEXEC_EXECUTABLE} + ${MPIEXEC_NUMPROC_FLAG} + ${num_procs} + ${MPIEXEC_PREFLAGS} + env + # When running on a user's PC, it is possible to thrash the machine by having one MPI task + # per core and one OpenMP thread per task and per core (the default). The restriction below + # aims to avoid this scenario. + OMP_NUM_THREADS=1 + ./${testname} + ${MPIEXEC_POSTFLAGS} + ) + elseif(${LIBNEGF_TEST_WITH_MPIEXEC} AND NOT ${LIBNEGF_TEST_MPI}) + set(num_procs 1) + set(cmd ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${num_procs} ${MPIEXEC_PREFLAGS} + ./${testname} ${MPIEXEC_POSTFLAGS} + ) + else() + set(cmd ${testname}) + endif() + + add_test(NAME ${testname} COMMAND ${cmd}) + set_property(TEST ${testname} PROPERTY LABELS ${LIBNEGF_TEST_LABELS}) + set_property(TEST ${testname} PROPERTY PROCESSORS ${num_procs}) + transfer_test_data(${testname}) - target_link_libraries(${testname} MPI::MPI_Fortran) - target_link_libraries(${testname} negf) - target_link_libraries(${testname} LAPACK::LAPACK) - target_include_directories(${testname} PRIVATE ${BUILD_MOD_DIR}) - add_test(NAME ${testname} COMMAND env OMP_NUM_THREADS=1 ${MPIEXEC_EXECUTABLE} - ${MPIEXEC_NUMPROC_FLAG} ${MPIEXEC_MAX_NUMPROCS} ./${testname} - ) - set_property(TEST ${testname} PROPERTY LABELS ${label}) endfunction() foreach(test-directory IN LISTS test-directories) diff --git a/tests/c_int/CMakeLists.txt b/tests/c_int/CMakeLists.txt index 944afce..56f40f3 100644 --- a/tests/c_int/CMakeLists.txt +++ b/tests/c_int/CMakeLists.txt @@ -1 +1 @@ -setup_c_test(c_int "normal" hello.c) +setup_test(c_int hello.c) diff --git a/tests/c_int_elph_deph/CMakeLists.txt b/tests/c_int_elph_deph/CMakeLists.txt index c277efa..4a25b6a 100644 --- a/tests/c_int_elph_deph/CMakeLists.txt +++ b/tests/c_int_elph_deph/CMakeLists.txt @@ -1 +1 @@ -setup_c_test(c_int_elph_deph "normal" c_int_elph_deph.c) +setup_test(c_int_elph_deph c_int_elph_deph.c) diff --git a/tests/c_int_file/CMakeLists.txt b/tests/c_int_file/CMakeLists.txt index 97d0d72..bd2b052 100644 --- a/tests/c_int_file/CMakeLists.txt +++ b/tests/c_int_file/CMakeLists.txt @@ -1 +1 @@ -setup_c_test(c_int_file "normal" hello.c) +setup_test(c_int_file hello.c) diff --git a/tests/c_mpi_init/CMakeLists.txt b/tests/c_mpi_init/CMakeLists.txt index a81a8dd..c643a2c 100644 --- a/tests/c_mpi_init/CMakeLists.txt +++ b/tests/c_mpi_init/CMakeLists.txt @@ -1 +1 @@ -setup_c_test(c_mpi_init "normal" c_mpi_init.c) +setup_test(c_mpi_init c_mpi_init.c) diff --git a/tests/cpp_mpi_init/CMakeLists.txt b/tests/cpp_mpi_init/CMakeLists.txt index e349010..16250a9 100644 --- a/tests/cpp_mpi_init/CMakeLists.txt +++ b/tests/cpp_mpi_init/CMakeLists.txt @@ -1 +1 @@ -setup_cpp_test(cpp_mpi_init "normal" cpp_mpi_init.cpp) +setup_test(cpp_mpi_init cpp_mpi_init.cpp MPI) diff --git a/tests/f90Si2x2/CMakeLists.txt b/tests/f90Si2x2/CMakeLists.txt index 840a2ce..4036f3d 100644 --- a/tests/f90Si2x2/CMakeLists.txt +++ b/tests/f90Si2x2/CMakeLists.txt @@ -1,10 +1,2 @@ -set(sources - constants.F90 - readHS.F90 - matconv.F90 - test.F90) - -setup_f90_mpi_test(f90Si2x2 "normal" ${sources}) - +setup_test(f90Si2x2 test.F90 MPI SOURCES constants.F90 matconv.F90 readHS.F90) unzip_test_data(f90Si2x2 "hs.tar.xz") - diff --git a/tests/f90Si_nin/CMakeLists.txt b/tests/f90Si_nin/CMakeLists.txt index 7721c87..abfe11f 100644 --- a/tests/f90Si_nin/CMakeLists.txt +++ b/tests/f90Si_nin/CMakeLists.txt @@ -1,10 +1,2 @@ -set(sources - constants.F90 - readHS.F90 - matconv.F90 - test.F90) - -setup_f90_mpi_test(f90Si_nin "long" ${sources}) - +setup_test(f90Si_nin test.F90 MPI SOURCES constants.F90 matconv.F90 readHS.F90 LABELS long) unzip_test_data(f90Si_nin "hs.tar.xz") - diff --git a/tests/f90Si_nin_40pl/CMakeLists.txt b/tests/f90Si_nin_40pl/CMakeLists.txt index a1643d9..0852c8c 100644 --- a/tests/f90Si_nin_40pl/CMakeLists.txt +++ b/tests/f90Si_nin_40pl/CMakeLists.txt @@ -1,10 +1,2 @@ -set(sources - constants.F90 - readHS.F90 - matconv.F90 - test.F90) - -setup_f90_mpi_test(f90Si_nin_40pl "long" ${sources}) - +setup_test(f90Si_nin_40pl test.F90 MPI SOURCES constants.F90 matconv.F90 readHS.F90 LABELS long) unzip_test_data(f90Si_nin_40pl "hs.tar.xz") - diff --git a/tests/f90_transmission/CMakeLists.txt b/tests/f90_transmission/CMakeLists.txt index ff77843..379acd5 100644 --- a/tests/f90_transmission/CMakeLists.txt +++ b/tests/f90_transmission/CMakeLists.txt @@ -1 +1 @@ -setup_f90_test(f90_transmission "normal" hello.F90) +setup_test(f90_transmission hello.F90) diff --git a/tests/f90elph_deph/CMakeLists.txt b/tests/f90elph_deph/CMakeLists.txt index 3be2bbe..27a405d 100644 --- a/tests/f90elph_deph/CMakeLists.txt +++ b/tests/f90elph_deph/CMakeLists.txt @@ -1 +1 @@ -setup_f90_test(f90elph_deph "normal" hello.F90) +setup_test(f90elph_deph hello.F90) diff --git a/tests/f90init/CMakeLists.txt b/tests/f90init/CMakeLists.txt index e142194..928750f 100644 --- a/tests/f90init/CMakeLists.txt +++ b/tests/f90init/CMakeLists.txt @@ -1 +1 @@ -setup_f90_test(f90init "normal" f90init.F90) +setup_test(f90init f90init.F90) diff --git a/tests/f90int/CMakeLists.txt b/tests/f90int/CMakeLists.txt index 4012810..4b9b66f 100644 --- a/tests/f90int/CMakeLists.txt +++ b/tests/f90int/CMakeLists.txt @@ -1 +1 @@ -setup_f90_test(f90int "normal" hello.F90) +setup_test(f90int hello.F90) diff --git a/tests/f90int_file/CMakeLists.txt b/tests/f90int_file/CMakeLists.txt index 714a6da..c10406b 100644 --- a/tests/f90int_file/CMakeLists.txt +++ b/tests/f90int_file/CMakeLists.txt @@ -1 +1 @@ -setup_f90_test(f90int_file "normal" hello.F90) +setup_test(f90int_file hello.F90) diff --git a/tests/f90read_hs/CMakeLists.txt b/tests/f90read_hs/CMakeLists.txt index 888f10a..408eff3 100644 --- a/tests/f90read_hs/CMakeLists.txt +++ b/tests/f90read_hs/CMakeLists.txt @@ -1,7 +1 @@ -set(sources - constants.F90 - readHS.F90 - matconv.F90 - test.F90) - -setup_f90_test(f90read_hs "normal unit" ${sources}) +setup_test(f90read_hs test.F90 SOURCES constants.F90 matconv.F90 readHS.F90 LABELS unit) diff --git a/tests/testCUDA/CMakeLists.txt b/tests/testCUDA/CMakeLists.txt index 5c8bfce..0811443 100644 --- a/tests/testCUDA/CMakeLists.txt +++ b/tests/testCUDA/CMakeLists.txt @@ -1 +1 @@ -setup_f90_test(testCUDA "normal cuda" test.f90) +setup_test(testCUDA test.f90 GPUONLY) diff --git a/tests/testCUDA_decimation/CMakeLists.txt b/tests/testCUDA_decimation/CMakeLists.txt index 5e54259..b4292e8 100644 --- a/tests/testCUDA_decimation/CMakeLists.txt +++ b/tests/testCUDA_decimation/CMakeLists.txt @@ -1,5 +1 @@ -set(sources - main.f90 - random.f90) - -setup_f90_test(testCUDA_decimation "normal cuda" ${sources}) +setup_test(testCUDA_decimation main.f90 GPUONLY SOURCES random.f90) From 4f46d609520a8223b49e0bc5d2d19b94a62758cb Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Tue, 18 Nov 2025 17:10:53 +0100 Subject: [PATCH 07/20] CI: execute tests not labelled 'long' --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9aa7e60..def1fd3 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -47,4 +47,4 @@ test: stage: test script: - cd "$BUILD_DIR" - - ctest -L normal --output-on-failure + - ctest -LE long --output-on-failure From fe160f8b5e49922071dd2e732dca9765afd72a3e Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Tue, 18 Nov 2025 17:11:11 +0100 Subject: [PATCH 08/20] README: explain how to submit tests to batch scheduler --- README.adoc | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/README.adoc b/README.adoc index 49369ad..6b6a2d7 100644 --- a/README.adoc +++ b/README.adoc @@ -37,6 +37,28 @@ Running the tests additionally requires git-lfs for downloading test inputs. The extension of test input files is `.dat`. +== Running the Tests on a Supercomputer + +On some HPC systems (e.g., LUMI), front-end nodes do not feature GPUs or the front-end nodes may +have an instruction set architecture different from compute nodes (e.g., on Fugaku with its Intel +front-end and Arm64FX compute nodes). On these systems, it is best to submit all tests to the batch scheduler. + +This can be achieved with the following steps. First, determine the account name and a queue (or _partition_) for job submission. Next, select a time limit. Tests that do not possess the label `long` run quickly even on personal computers. Therefore, we suggest a time limit of at most five minutes (this is per task). Finally, determine the number of MPI tasks within the job. Our recommendation is to run on at most one node with one job per NUMA domain. + +CAUTION: Avoid large numbers of MPI tasks (e.g., by launching one task per virtual CPU core). The tests may not scale very well. + +With the values above in mind, set `MPIEXEC_EXECUTABLE` to the absolute path to the batch scheduler executable and have `MPIEXEC_PREFLAGS` contain all the batch scheduler arguments. Here is an example for Slurm: +```sh +cmake \ + -DLIBNEGF_TEST_WITH_MPIEXEC=ON \ + -DMPIEXEC_EXECUTABLE="$(which srun)" \ + -DMPIEXEC_NUMPROC_FLAG='--ntasks' \ + -DMPIEXEC_MAX_NUMPROCS=4 \ + -DMPIEXEC_PREFLAGS='--account=jsc1 --partition=dev --nodes=1 --time=5m' \ + ... +``` + + == Generating libNEGF Input From 3230444200bf8c7ca0a45def8a43c5c096a5f396 Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Tue, 18 Nov 2025 17:14:57 +0100 Subject: [PATCH 09/20] cmake-format: add custom parser for setup_test() Add a custom parser and revert `max_pargs_hwrap` to its default value. --- .cmake-format.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.cmake-format.py b/.cmake-format.py index e80ab56..6f2c0fe 100644 --- a/.cmake-format.py +++ b/.cmake-format.py @@ -4,8 +4,10 @@ with section("parse"): # Specify structure for custom cmake functions - additional_commands = { 'foo': { 'flags': ['BAR', 'BAZ'], - 'kwargs': {'DEPENDS': '*', 'HEADERS': '*', 'SOURCES': '*'}}} + additional_commands = { + 'setup_test': { 'kwargs': {'GPUONLY': '0', 'LABELS': '+', 'MPI': '0', 'SOURCES': '+'}, + 'pargs': {'flags': ['2'], 'nargs': '0'}}, + } # Override configurations per-command where available override_spec = {} @@ -49,7 +51,7 @@ # If a positional argument group contains more than this many arguments, then # force it to a vertical layout. - max_pargs_hwrap = 99 + max_pargs_hwrap = 6 # If a cmdline positional group consumes more than this many lines without # nesting, then invalidate the layout (and nest) From 307a623b2a9f2cea7eab26f292da8bfb81d90ddd Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Tue, 18 Nov 2025 17:16:35 +0100 Subject: [PATCH 10/20] Tests: apply latest cmake-format config change --- tests/CMakeLists.txt | 16 ++++++++++++++-- tests/f90Si2x2/CMakeLists.txt | 6 +++++- tests/f90Si_nin/CMakeLists.txt | 7 ++++++- tests/f90Si_nin_40pl/CMakeLists.txt | 7 ++++++- tests/f90read_hs/CMakeLists.txt | 6 +++++- tests/testCUDA_decimation/CMakeLists.txt | 6 +++++- 6 files changed, 41 insertions(+), 7 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 577f04d..75142f6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -15,8 +15,20 @@ endif() # List of active tets. Each test indicates a subdirectory. set(test-directories - f90init c_mpi_init cpp_mpi_init c_int c_int_elph_deph c_int_file f90int f90int_file - f90_transmission f90elph_deph f90read_hs f90Si2x2 f90Si_nin f90Si_nin_40pl + f90init + c_mpi_init + cpp_mpi_init + c_int + c_int_elph_deph + c_int_file + f90int + f90int_file + f90_transmission + f90elph_deph + f90read_hs + f90Si2x2 + f90Si_nin + f90Si_nin_40pl ) if(WITH_TRANSPORT_GPU) diff --git a/tests/f90Si2x2/CMakeLists.txt b/tests/f90Si2x2/CMakeLists.txt index 4036f3d..80d7a92 100644 --- a/tests/f90Si2x2/CMakeLists.txt +++ b/tests/f90Si2x2/CMakeLists.txt @@ -1,2 +1,6 @@ -setup_test(f90Si2x2 test.F90 MPI SOURCES constants.F90 matconv.F90 readHS.F90) +setup_test( + f90Si2x2 test.F90 + MPI + SOURCES constants.F90 matconv.F90 readHS.F90 +) unzip_test_data(f90Si2x2 "hs.tar.xz") diff --git a/tests/f90Si_nin/CMakeLists.txt b/tests/f90Si_nin/CMakeLists.txt index abfe11f..69b6034 100644 --- a/tests/f90Si_nin/CMakeLists.txt +++ b/tests/f90Si_nin/CMakeLists.txt @@ -1,2 +1,7 @@ -setup_test(f90Si_nin test.F90 MPI SOURCES constants.F90 matconv.F90 readHS.F90 LABELS long) +setup_test( + f90Si_nin test.F90 + MPI + SOURCES constants.F90 matconv.F90 readHS.F90 + LABELS long +) unzip_test_data(f90Si_nin "hs.tar.xz") diff --git a/tests/f90Si_nin_40pl/CMakeLists.txt b/tests/f90Si_nin_40pl/CMakeLists.txt index 0852c8c..5a79e65 100644 --- a/tests/f90Si_nin_40pl/CMakeLists.txt +++ b/tests/f90Si_nin_40pl/CMakeLists.txt @@ -1,2 +1,7 @@ -setup_test(f90Si_nin_40pl test.F90 MPI SOURCES constants.F90 matconv.F90 readHS.F90 LABELS long) +setup_test( + f90Si_nin_40pl test.F90 + MPI + SOURCES constants.F90 matconv.F90 readHS.F90 + LABELS long +) unzip_test_data(f90Si_nin_40pl "hs.tar.xz") diff --git a/tests/f90read_hs/CMakeLists.txt b/tests/f90read_hs/CMakeLists.txt index 408eff3..3d37ea2 100644 --- a/tests/f90read_hs/CMakeLists.txt +++ b/tests/f90read_hs/CMakeLists.txt @@ -1 +1,5 @@ -setup_test(f90read_hs test.F90 SOURCES constants.F90 matconv.F90 readHS.F90 LABELS unit) +setup_test( + f90read_hs test.F90 + SOURCES constants.F90 matconv.F90 readHS.F90 + LABELS unit +) diff --git a/tests/testCUDA_decimation/CMakeLists.txt b/tests/testCUDA_decimation/CMakeLists.txt index b4292e8..5d1c444 100644 --- a/tests/testCUDA_decimation/CMakeLists.txt +++ b/tests/testCUDA_decimation/CMakeLists.txt @@ -1 +1,5 @@ -setup_test(testCUDA_decimation main.f90 GPUONLY SOURCES random.f90) +setup_test( + testCUDA_decimation main.f90 + GPUONLY + SOURCES random.f90 +) From a5c5e2f1dc1d36badbaec288557aeed28b4de046 Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Mon, 24 Nov 2025 16:02:24 +0100 Subject: [PATCH 11/20] README: fix instructions tests w/ batch scheduler --- README.adoc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.adoc b/README.adoc index 6b6a2d7..61e9d28 100644 --- a/README.adoc +++ b/README.adoc @@ -47,17 +47,23 @@ This can be achieved with the following steps. First, determine the account name CAUTION: Avoid large numbers of MPI tasks (e.g., by launching one task per virtual CPU core). The tests may not scale very well. -With the values above in mind, set `MPIEXEC_EXECUTABLE` to the absolute path to the batch scheduler executable and have `MPIEXEC_PREFLAGS` contain all the batch scheduler arguments. Here is an example for Slurm: +With the values above in mind, set `MPIEXEC_EXECUTABLE` to the absolute path to the batch scheduler executable and have `MPIEXEC_PREFLAGS` contain all the batch scheduler arguments as a CMake list; in a CMake list, list items are separated by a semicolon. Here is an example for Slurm: ```sh cmake \ -DLIBNEGF_TEST_WITH_MPIEXEC=ON \ -DMPIEXEC_EXECUTABLE="$(which srun)" \ -DMPIEXEC_NUMPROC_FLAG='--ntasks' \ -DMPIEXEC_MAX_NUMPROCS=4 \ - -DMPIEXEC_PREFLAGS='--account=jsc1 --partition=dev --nodes=1 --time=5m' \ + -DMPIEXEC_PREFLAGS='--account=mat4energy;--partition=develbooster;--nodes=1;--gpus-per-task=1;--time=1' \ ... ``` +Note the following: + +* A time limit of one minutes is sufficient for tests without `long` tag on JUWELS Booster and LUMI. +* The time limit of the job and the time limit of test CMake test submitting the job are two different things. +* Redirecting the output (e.g., with Slurm with `--output=stdout.txt`) to files breaks the mpifx tests because these tests examine the standard output instead of the exit status. + == Generating libNEGF Input From 428339a883d1a7332ea1f1dc19fe564f3d7648eb Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Mon, 24 Nov 2025 16:25:09 +0100 Subject: [PATCH 12/20] README: use proper AsciiDoc --- README.adoc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.adoc b/README.adoc index 61e9d28..063b6a5 100644 --- a/README.adoc +++ b/README.adoc @@ -48,7 +48,9 @@ This can be achieved with the following steps. First, determine the account name CAUTION: Avoid large numbers of MPI tasks (e.g., by launching one task per virtual CPU core). The tests may not scale very well. With the values above in mind, set `MPIEXEC_EXECUTABLE` to the absolute path to the batch scheduler executable and have `MPIEXEC_PREFLAGS` contain all the batch scheduler arguments as a CMake list; in a CMake list, list items are separated by a semicolon. Here is an example for Slurm: -```sh + +[source,shell,linenums] +---- cmake \ -DLIBNEGF_TEST_WITH_MPIEXEC=ON \ -DMPIEXEC_EXECUTABLE="$(which srun)" \ @@ -56,7 +58,7 @@ cmake \ -DMPIEXEC_MAX_NUMPROCS=4 \ -DMPIEXEC_PREFLAGS='--account=mat4energy;--partition=develbooster;--nodes=1;--gpus-per-task=1;--time=1' \ ... -``` +---- Note the following: From b9d036a2c82dec30ea00e0b581956947000c7bf9 Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Mon, 24 Nov 2025 16:27:44 +0100 Subject: [PATCH 13/20] README: improve wording --- README.adoc | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/README.adoc b/README.adoc index 063b6a5..1156c60 100644 --- a/README.adoc +++ b/README.adoc @@ -39,15 +39,20 @@ The extension of test input files is `.dat`. == Running the Tests on a Supercomputer -On some HPC systems (e.g., LUMI), front-end nodes do not feature GPUs or the front-end nodes may -have an instruction set architecture different from compute nodes (e.g., on Fugaku with its Intel -front-end and Arm64FX compute nodes). On these systems, it is best to submit all tests to the batch scheduler. +On some HPC systems, front-end nodes do not feature GPUs (e.g., on LUMI) or the +front-end nodes may have an instruction set architecture different from compute +nodes (e.g., on Fugaku with its Intel front-end and Arm64FX compute nodes). On +these systems, it is best to submit all tests to the batch scheduler. This can be achieved with the following steps. First, determine the account name and a queue (or _partition_) for job submission. Next, select a time limit. Tests that do not possess the label `long` run quickly even on personal computers. Therefore, we suggest a time limit of at most five minutes (this is per task). Finally, determine the number of MPI tasks within the job. Our recommendation is to run on at most one node with one job per NUMA domain. CAUTION: Avoid large numbers of MPI tasks (e.g., by launching one task per virtual CPU core). The tests may not scale very well. -With the values above in mind, set `MPIEXEC_EXECUTABLE` to the absolute path to the batch scheduler executable and have `MPIEXEC_PREFLAGS` contain all the batch scheduler arguments as a CMake list; in a CMake list, list items are separated by a semicolon. Here is an example for Slurm: +With the values above in mind, enable `LIBNEGF_TEST_WITH_MPIEXEC`, set +`MPIEXEC_EXECUTABLE` to the absolute path to the batch scheduler executable, and +have `MPIEXEC_PREFLAGS` contain all the batch scheduler arguments as a CMake +list; in a CMake list, list items are separated by a semicolon. Here is an +example for Slurm: [source,shell,linenums] ---- @@ -64,7 +69,7 @@ Note the following: * A time limit of one minutes is sufficient for tests without `long` tag on JUWELS Booster and LUMI. * The time limit of the job and the time limit of test CMake test submitting the job are two different things. -* Redirecting the output (e.g., with Slurm with `--output=stdout.txt`) to files breaks the mpifx tests because these tests examine the standard output instead of the exit status. +* Redirecting the output to files (e.g., with Slurm with `--output=stdout.txt`) breaks the mpifx tests because these tests examine the standard output instead of the exit status. From f5fb2ffaa28010f3219c9644ad6eb28f2732e6ff Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Tue, 25 Nov 2025 15:54:17 +0100 Subject: [PATCH 14/20] CMake: avoid redundant operation From the CMake 3.18.6 `add_library()` documentation: If no type is given explicitly the type is STATIC or SHARED based on whether the current value of the variable BUILD_SHARED_LIBS is ON. For SHARED and MODULE libraries the POSITION_INDEPENDENT_CODE target property is set to ON automatically. These properties still hold according to the CMake 4.2.0 documentation. --- ext_system/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/ext_system/CMakeLists.txt b/ext_system/CMakeLists.txt index adda84a..581f8f2 100644 --- a/ext_system/CMakeLists.txt +++ b/ext_system/CMakeLists.txt @@ -7,7 +7,6 @@ add_library(syscalls_objlib OBJECT ${sources}) set(BUILD_MOD_DIR ${CMAKE_CURRENT_BINARY_DIR}/include) set_target_properties(syscalls_objlib PROPERTIES Fortran_MODULE_DIRECTORY ${BUILD_MOD_DIR}) -set_target_properties(syscalls_objlib PROPERTIES POSITION_INDEPENDENT_CODE ${BUILD_SHARED_LIBS}) target_include_directories(syscalls_objlib PUBLIC $ From 1f9ab6e2ce7730d21aa96e0cc4c04ecc8073f6cf Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Tue, 25 Nov 2025 16:06:58 +0100 Subject: [PATCH 15/20] CMake: fix CMP0177 warnings --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index da5accb..c96e04a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,9 @@ cmake_minimum_required(VERSION 3.18) +if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.31) + cmake_policy(SET CMP0177 NEW) +endif() + list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/cmake) include(LibNegfUtils) libnegf_load_build_settings() From 33dd1ebf97bef41ef0d51717d3f38c7bb452afe1 Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Tue, 25 Nov 2025 16:23:47 +0100 Subject: [PATCH 16/20] CMake: enable test for ext_system --- tests/CMakeLists.txt | 5 +++++ ext_system/test.f90 => tests/ext_system_test.f90 | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) rename ext_system/test.f90 => tests/ext_system_test.f90 (66%) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f7da815..2bbae67 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -125,3 +125,8 @@ endfunction() foreach(test-directory IN LISTS test-directories) add_subdirectory(${test-directory}) endforeach() + + +add_executable(ext_system_test ext_system_test.f90) +target_link_libraries(ext_system_test syscalls_objlib) +add_test(ext_system_test ext_system_test) diff --git a/ext_system/test.f90 b/tests/ext_system_test.f90 similarity index 66% rename from ext_system/test.f90 rename to tests/ext_system_test.f90 index 1f30d2e..034c104 100644 --- a/ext_system/test.f90 +++ b/tests/ext_system_test.f90 @@ -4,7 +4,7 @@ program test character(100) :: folder - folder = "testfolder" + folder = "ext_system_testfolder" call create_directory(trim(folder)) @@ -12,9 +12,9 @@ program test write(101, *) 'test', 101 close(101) - !call remove_file(trim(folder)//'/afile') + call remove_file(trim(folder)//'/afile') - !call remove_directory(folder) + call remove_directory(folder) end program test From e692a11c3aa7d946998977e951df34f1c3b4dea6 Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Tue, 25 Nov 2025 17:05:49 +0100 Subject: [PATCH 17/20] Revert "CMake: avoid redundant operation" This reverts commit f5fb2ffaa28010f3219c9644ad6eb28f2732e6ff. The statements in the commit to be reverted do not hold for object libraries. In practice, the reverted commit only caused problems in the Rocky Linux 9 CI container (this Linux distribution does not enable position independent executable [PIE for short] by default). --- ext_system/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/ext_system/CMakeLists.txt b/ext_system/CMakeLists.txt index 581f8f2..adda84a 100644 --- a/ext_system/CMakeLists.txt +++ b/ext_system/CMakeLists.txt @@ -7,6 +7,7 @@ add_library(syscalls_objlib OBJECT ${sources}) set(BUILD_MOD_DIR ${CMAKE_CURRENT_BINARY_DIR}/include) set_target_properties(syscalls_objlib PROPERTIES Fortran_MODULE_DIRECTORY ${BUILD_MOD_DIR}) +set_target_properties(syscalls_objlib PROPERTIES POSITION_INDEPENDENT_CODE ${BUILD_SHARED_LIBS}) target_include_directories(syscalls_objlib PUBLIC $ From 1f2992193ac29833a990f7e2d70dc0209fb6731f Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Mon, 1 Dec 2025 16:55:40 +0100 Subject: [PATCH 18/20] README: use ctest in reserved allocation --- README.adoc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.adoc b/README.adoc index 1156c60..5e79894 100644 --- a/README.adoc +++ b/README.adoc @@ -64,6 +64,8 @@ cmake \ -DMPIEXEC_PREFLAGS='--account=mat4energy;--partition=develbooster;--nodes=1;--gpus-per-task=1;--time=1' \ ... ---- +For faster execution, the tests can be run in an existing job allocation, e.g., +by calling `salloc` (for Slurm) or by using a reservation. Within the allocation, just call `ctest`. Note the following: From 9931a955099c9e9d001ef108c247ae161db85646 Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Tue, 2 Dec 2025 13:45:22 +0100 Subject: [PATCH 19/20] CMake: avoid `env` as job name Avoid `env` as job name when `MPIEXEC_EXECUTABLE` submits tests to a batch scheduler. --- tests/CMakeLists.txt | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 75142f6..aac5877 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -126,15 +126,18 @@ function(setup_test testname source_main) if(${LIBNEGF_TEST_MPI}) set(num_procs ${MPIEXEC_MAX_NUMPROCS}) set(cmd - ${MPIEXEC_EXECUTABLE} - ${MPIEXEC_NUMPROC_FLAG} - ${num_procs} - ${MPIEXEC_PREFLAGS} - env # When running on a user's PC, it is possible to thrash the machine by having one MPI task # per core and one OpenMP thread per task and per core (the default). The restriction below # aims to avoid this scenario. + # + # env is wrapping the call to mpiexec/mpirun to avoid the job name `env` when + # ${MPIEXEC_EXECUTABLE} is a batch scheduler. + env OMP_NUM_THREADS=1 + ${MPIEXEC_EXECUTABLE} + ${MPIEXEC_NUMPROC_FLAG} + ${num_procs} + ${MPIEXEC_PREFLAGS} ./${testname} ${MPIEXEC_POSTFLAGS} ) From cbeebde336b7d69862cb65711ce26190d156917a Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Tue, 2 Dec 2025 13:52:16 +0100 Subject: [PATCH 20/20] CMake: elaborate on env/mpiexec interaction --- tests/CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index aac5877..41bc9a2 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -130,8 +130,12 @@ function(setup_test testname source_main) # per core and one OpenMP thread per task and per core (the default). The restriction below # aims to avoid this scenario. # - # env is wrapping the call to mpiexec/mpirun to avoid the job name `env` when - # ${MPIEXEC_EXECUTABLE} is a batch scheduler. + # `env` is wrapping the call to mpiexec/mpirun to avoid the job name `env` when + # ${MPIEXEC_EXECUTABLE} is a batch scheduler. This works with Slurm but some batch + # schedulers (e.g., OAR) filter the set of environment variables (the OAR documentation [1] + # even mentions `OMP_NUM_THREADS` explicitly). + # + # [1] https://oar.imag.fr/wiki:passing_environment_variables_to_openmpi_nodes env OMP_NUM_THREADS=1 ${MPIEXEC_EXECUTABLE}