Skip to content
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ endif()
# Set compiler flags
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -Wextra -pedantic -Wno-unused -Wno-psabi -Wfloat-conversion")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2 -mtune=generic -Wno-psabi")
if (CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} --param max-inline-insns-single=1500")
endif()

# Set coverage compiler flags - must come before any targets are defined
if(GMGPOLAR_ENABLE_COVERAGE)
Expand Down
68 changes: 39 additions & 29 deletions include/common/global_definitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,34 +83,44 @@ enum class BetaCoeff
/* Mumps - Constant Definitions */
/* ---------------------------- */
#ifdef GMGPOLAR_USE_MUMPS
/* Mumps macro s.t. indices match documentation */
#define ICNTL(I) icntl[(I) - 1]
#define CNTL(I) cntl[(I) - 1]
#define INFOG(I) infog[(I) - 1]

#define USE_COMM_WORLD -987654
#define PAR_NOT_PARALLEL 0
#define PAR_PARALLEL 1

#define JOB_INIT -1
#define JOB_END -2
#define JOB_REMOVE_SAVED_DATA -3
#define JOB_FREE_INTERNAL_DATA -4
#define JOB_SUPPRESS_OOC_FILES -200

#define JOB_ANALYSIS_PHASE 1
#define JOB_FACTORIZATION_PHASE 2
#define JOB_COMPUTE_SOLUTION 3
#define JOB_ANALYSIS_AND_FACTORIZATION 4
#define JOB_FACTORIZATION_AND_SOLUTION 5
#define JOB_ANALYSIS_FACTORIZATION_SOLUTION 6
#define JOB_SAVE_INTERNAL_DATA 7
#define JOB_RESTORE_INTERNAL_DATA 8
#define JOB_DISTRIBUTE_RHS 9

#define SYM_UNSYMMETRIC 0
#define SYM_POSITIVE_DEFINITE 1
#define SYM_GENERAL_SYMMETRIC 2
#include "dmumps_c.h"
/* Mumps inline functions s.t. indices match documentation */
inline int& ICNTL(DMUMPS_STRUC_C& mumps_solver, int I)
{
return mumps_solver.icntl[(I)-1];
}
inline double& CNTL(DMUMPS_STRUC_C& mumps_solver, int I)
{
return mumps_solver.cntl[(I)-1];
}
inline int& INFOG(DMUMPS_STRUC_C& mumps_solver, int I)
{
return mumps_solver.infog[(I)-1];
}

constexpr int USE_COMM_WORLD = -987654;
constexpr int PAR_NOT_PARALLEL = 0;
constexpr int PAR_PARALLEL = 1;

constexpr int JOB_INIT = -1;
constexpr int JOB_END = -2;
constexpr int JOB_REMOVE_SAVED_DATA = -3;
constexpr int JOB_FREE_INTERNAL_DATA = -4;
constexpr int JOB_SUPPRESS_OOC_FILES = -200;

constexpr int JOB_ANALYSIS_PHASE = 1;
constexpr int JOB_FACTORIZATION_PHASE = 2;
constexpr int JOB_COMPUTE_SOLUTION = 3;
constexpr int JOB_ANALYSIS_AND_FACTORIZATION = 4;
constexpr int JOB_FACTORIZATION_AND_SOLUTION = 5;
constexpr int JOB_ANALYSIS_FACTORIZATION_SOLUTION = 6;
constexpr int JOB_SAVE_INTERNAL_DATA = 7;
constexpr int JOB_RESTORE_INTERNAL_DATA = 8;
constexpr int JOB_DISTRIBUTE_RHS = 9;

constexpr int SYM_UNSYMMETRIC = 0;
constexpr int SYM_POSITIVE_DEFINITE = 1;
constexpr int SYM_GENERAL_SYMMETRIC = 2;
#endif

// --------------------------------------- //
Expand Down Expand Up @@ -148,4 +158,4 @@ enum class BetaCoeff
#define LIKWID_START(marker)
#define LIKWID_STOP(marker)
#define LIKWID_CLOSE()
#endif
#endif
98 changes: 49 additions & 49 deletions src/DirectSolver/DirectSolver-COO-MUMPS-Give/initializeMumps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,68 +23,68 @@ void DirectSolver_COO_MUMPS_Give::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_so
mumps_solver.comm_fortran = USE_COMM_WORLD;
dmumps_c(&mumps_solver);

mumps_solver.ICNTL(1) = 0; // Output stream for error messages.
mumps_solver.ICNTL(2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process.
mumps_solver.ICNTL(3) = 0; // Output stream for global information, collected on the host
mumps_solver.ICNTL(4) = 0; // Level of printing for error, warning, and diagnostic messages.
mumps_solver.ICNTL(5) = 0; // Controls the matrix input format
mumps_solver.ICNTL(6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix
mumps_solver.ICNTL(7) =
ICNTL(mumps_solver, 1) = 0; // Output stream for error messages.
ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process.
ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host
ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages.
ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format
ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix
ICNTL(mumps_solver, 7) =
5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis
mumps_solver.ICNTL(8) = 77; // Describes the scaling strategy
mumps_solver.ICNTL(9) = 1; // Computes the solution using A or A^T
mumps_solver.ICNTL(10) = 0; // Applies the iterative refinement to the computed solution
mumps_solver.ICNTL(11) = 0; // Computes statistics related to an error analysis of the linear system solved
mumps_solver.ICNTL(12) = 0; // Defines an ordering strategy for symmetric matrices and is used
mumps_solver.ICNTL(13) = 0; // Controls the parallelism of the root node
mumps_solver.ICNTL(14) = // Controls the percentage increase in the estimated working space
ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy
ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T
ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution
ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved
ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used
ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node
ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space
(solver_matrix.is_symmetric() ? 5 : 20);
mumps_solver.ICNTL(15) = 0; // Exploits compression of the input matrix resulting from a block format
mumps_solver.ICNTL(16) = 0; // Controls the setting of the number of OpenMP threads
ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format
ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads
// ICNTL(17) Doesn't exist
mumps_solver.ICNTL(18) = 0; // Defines the strategy for the distributed input matrix
mumps_solver.ICNTL(19) = 0; // Computes the Schur complement matrix
mumps_solver.ICNTL(20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides
mumps_solver.ICNTL(21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors.
mumps_solver.ICNTL(22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve.
mumps_solver.ICNTL(23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can
ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix
ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix
ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides
ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors.
ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve.
ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can
// allocate per working process
mumps_solver.ICNTL(24) = 0; // Controls the detection of “null pivot rows”.
mumps_solver.ICNTL(25) =
ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”.
ICNTL(mumps_solver, 25) =
0; // Allows the computation of a solution of a deficient matrix and also of a null space basis
mumps_solver.ICNTL(26) = 0; // Drives the solution phase if a Schur complement matrix has been computed
mumps_solver.ICNTL(27) = -32; // Controls the blocking size for multiple right-hand sides.
mumps_solver.ICNTL(28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed
mumps_solver.ICNTL(29) =
ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed
ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides.
ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed
ICNTL(mumps_solver, 29) =
0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation.
mumps_solver.ICNTL(30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix
mumps_solver.ICNTL(31) = 0; // Indicates which factors may be discarded during the factorization.
mumps_solver.ICNTL(32) = 0; // Performs the forward elimination of the right-hand sides during the factorization
mumps_solver.ICNTL(33) = 0; // Computes the determinant of the input matrix.
mumps_solver.ICNTL(34) = 0; // Controls the conservation of the OOC files during JOB= –3
mumps_solver.ICNTL(35) = 0; // Controls the activation of the BLR feature
mumps_solver.ICNTL(36) = 0; // Controls the choice of BLR factorization variant
mumps_solver.ICNTL(37) = 0; // Controls the BLR compression of the contribution blocks
mumps_solver.ICNTL(38) = 600; // Estimates compression rate of LU factors
mumps_solver.ICNTL(39) = 500; // Estimates compression rate of contribution blocks
ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix
ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization.
ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization
ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix.
ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3
ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature
ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant
ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks
ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors
ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks
// ICNTL(40-47) Don't exist
mumps_solver.ICNTL(48) = 0; // Multithreading with tree parallelism
mumps_solver.ICNTL(49) = 0; // Compact workarray id%S at the end of factorization phase
ICNTL(mumps_solver, 48) = 0; // Multithreading with tree parallelism
ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase
// ICNTL(50-55) Don't exist
mumps_solver.ICNTL(56) =
ICNTL(mumps_solver, 56) =
0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method
// ICNTL(57) Doesn't exist
mumps_solver.ICNTL(58) = 2; // Defines options for symbolic factorization
ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization
// ICNTL(59-60) Don't exist

mumps_solver.CNTL(1) = -1.0; // Relative threshold for numerical pivoting
mumps_solver.CNTL(2) = -1.0; // Stopping criterion for iterative refinement
mumps_solver.CNTL(3) = 0.0; // Determine null pivot rows
mumps_solver.CNTL(4) = -1.0; // Determines the threshold for static pivoting
mumps_solver.CNTL(5) =
CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting
CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement
CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows
CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting
CNTL(mumps_solver, 5) =
0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active
// CNTL(6) Doesn't exist
mumps_solver.CNTL(7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression
CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression
// CNTL(8-15) Don't exist

mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION;
Expand All @@ -96,7 +96,7 @@ void DirectSolver_COO_MUMPS_Give::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_so
mumps_solver.a = solver_matrix.values_data();
dmumps_c(&mumps_solver);

if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && mumps_solver.INFOG(12) != 0) {
if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) {
std::cout
<< "Warning: DirectSolver matrix is not positive definite: Negative pivots in the factorization phase."
<< std::endl;
Expand Down
Loading