From 9ebff6e0c62bd98d22b959d55ba7c3f4c6628fb5 Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Thu, 15 Jan 2026 16:22:51 +0100 Subject: [PATCH 01/10] Prefer constexpr for MUMPS constants --- include/common/global_definitions.h | 48 ++++++++++++++--------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/include/common/global_definitions.h b/include/common/global_definitions.h index bef73fe9..01628bde 100644 --- a/include/common/global_definitions.h +++ b/include/common/global_definitions.h @@ -88,29 +88,29 @@ enum class BetaCoeff #define CNTL(I) cntl[(I) - 1] #define INFOG(I) infog[(I) - 1] - #define USE_COMM_WORLD -987654 - #define PAR_NOT_PARALLEL 0 - #define PAR_PARALLEL 1 - - #define JOB_INIT -1 - #define JOB_END -2 - #define JOB_REMOVE_SAVED_DATA -3 - #define JOB_FREE_INTERNAL_DATA -4 - #define JOB_SUPPRESS_OOC_FILES -200 - - #define JOB_ANALYSIS_PHASE 1 - #define JOB_FACTORIZATION_PHASE 2 - #define JOB_COMPUTE_SOLUTION 3 - #define JOB_ANALYSIS_AND_FACTORIZATION 4 - #define JOB_FACTORIZATION_AND_SOLUTION 5 - #define JOB_ANALYSIS_FACTORIZATION_SOLUTION 6 - #define JOB_SAVE_INTERNAL_DATA 7 - #define JOB_RESTORE_INTERNAL_DATA 8 - #define JOB_DISTRIBUTE_RHS 9 - - #define SYM_UNSYMMETRIC 0 - #define SYM_POSITIVE_DEFINITE 1 - #define SYM_GENERAL_SYMMETRIC 2 + constexpr int USE_COMM_WORLD -987654 + constexpr int PAR_NOT_PARALLEL 0 + constexpr int PAR_PARALLEL 1 + + constexpr int JOB_INIT -1 + constexpr int JOB_END -2 + constexpr int JOB_REMOVE_SAVED_DATA -3 + constexpr int JOB_FREE_INTERNAL_DATA -4 + constexpr int JOB_SUPPRESS_OOC_FILES -200 + + constexpr int JOB_ANALYSIS_PHASE 1 + constexpr int JOB_FACTORIZATION_PHASE 2 + constexpr int JOB_COMPUTE_SOLUTION 3 + constexpr int JOB_ANALYSIS_AND_FACTORIZATION 4 + constexpr int JOB_FACTORIZATION_AND_SOLUTION 5 + constexpr int JOB_ANALYSIS_FACTORIZATION_SOLUTION 6 + constexpr int JOB_SAVE_INTERNAL_DATA 7 + constexpr int JOB_RESTORE_INTERNAL_DATA 8 + constexpr int JOB_DISTRIBUTE_RHS 9 + + constexpr int SYM_UNSYMMETRIC 0 + constexpr int SYM_POSITIVE_DEFINITE 1 + constexpr int SYM_GENERAL_SYMMETRIC 2 #endif // --------------------------------------- // @@ -148,4 +148,4 @@ enum class BetaCoeff #define LIKWID_START(marker) #define LIKWID_STOP(marker) #define LIKWID_CLOSE() -#endif \ No newline at end of file +#endif From a42becb2a486bc918d496a9a6e7ad35816a66f2e Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Thu, 15 Jan 2026 16:25:47 +0100 Subject: [PATCH 02/10] Prefer inline functions for MUMPS index modification --- include/common/global_definitions.h | 62 ++++++++++++++++------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/include/common/global_definitions.h b/include/common/global_definitions.h index 01628bde..2460c73b 100644 --- a/include/common/global_definitions.h +++ b/include/common/global_definitions.h @@ -83,34 +83,40 @@ enum class BetaCoeff /* Mumps - Constant Definitions */ /* ---------------------------- */ #ifdef GMGPOLAR_USE_MUMPS - /* Mumps macro s.t. indices match documentation */ - #define ICNTL(I) icntl[(I) - 1] - #define CNTL(I) cntl[(I) - 1] - #define INFOG(I) infog[(I) - 1] - - constexpr int USE_COMM_WORLD -987654 - constexpr int PAR_NOT_PARALLEL 0 - constexpr int PAR_PARALLEL 1 - - constexpr int JOB_INIT -1 - constexpr int JOB_END -2 - constexpr int JOB_REMOVE_SAVED_DATA -3 - constexpr int JOB_FREE_INTERNAL_DATA -4 - constexpr int JOB_SUPPRESS_OOC_FILES -200 - - constexpr int JOB_ANALYSIS_PHASE 1 - constexpr int JOB_FACTORIZATION_PHASE 2 - constexpr int JOB_COMPUTE_SOLUTION 3 - constexpr int JOB_ANALYSIS_AND_FACTORIZATION 4 - constexpr int JOB_FACTORIZATION_AND_SOLUTION 5 - constexpr int JOB_ANALYSIS_FACTORIZATION_SOLUTION 6 - constexpr int JOB_SAVE_INTERNAL_DATA 7 - constexpr int JOB_RESTORE_INTERNAL_DATA 8 - constexpr int JOB_DISTRIBUTE_RHS 9 - - constexpr int SYM_UNSYMMETRIC 0 - constexpr int SYM_POSITIVE_DEFINITE 1 - constexpr int SYM_GENERAL_SYMMETRIC 2 + /* Mumps inline functions s.t. indices match documentation */ + inline int& ICNTL(DMUMPS_STRUC_C& mumps_solver, int I) { + return mumps_solver.icntl[(I) - 1]; + } + inline int& CNTL(DMUMPS_STRUC_C& mumps_solver, int I) { + return mumps_solver.cntl[(I) - 1]; + } + inline int& INFOG(DMUMPS_STRUC_C& mumps_solver, int I) { + return mumps_solver.infog[(I) - 1]; + } + + constexpr int USE_COMM_WORLD = -987654; + constexpr int PAR_NOT_PARALLEL = 0; + constexpr int PAR_PARALLEL = 1; + + constexpr int JOB_INIT = -1; + constexpr int JOB_END = -2; + constexpr int JOB_REMOVE_SAVED_DATA = -3; + constexpr int JOB_FREE_INTERNAL_DATA = -4; + constexpr int JOB_SUPPRESS_OOC_FILES = -200; + + constexpr int JOB_ANALYSIS_PHASE = 1; + constexpr int JOB_FACTORIZATION_PHASE = 2; + constexpr int JOB_COMPUTE_SOLUTION = 3; + constexpr int JOB_ANALYSIS_AND_FACTORIZATION = 4; + constexpr int JOB_FACTORIZATION_AND_SOLUTION = 5; + constexpr int JOB_ANALYSIS_FACTORIZATION_SOLUTION = 6; + constexpr int JOB_SAVE_INTERNAL_DATA = 7; + constexpr int JOB_RESTORE_INTERNAL_DATA = 8; + constexpr int JOB_DISTRIBUTE_RHS = 9; + + constexpr int SYM_UNSYMMETRIC = 0; + constexpr int SYM_POSITIVE_DEFINITE = 1; + constexpr int SYM_GENERAL_SYMMETRIC = 2; #endif // --------------------------------------- // From 114a67ba68179fab4afa9f9a541677bf9a9d3e74 Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Thu, 15 Jan 2026 16:27:36 +0100 Subject: [PATCH 03/10] Use functions --- include/common/global_definitions.h | 3 +- .../initializeMumps.cpp | 98 +++++++++---------- .../initializeMumps.cpp | 98 +++++++++---------- .../initializeMumps.cpp | 98 +++++++++---------- .../initializeMumps.cpp | 98 +++++++++---------- src/Smoother/SmootherGive/initializeMumps.cpp | 98 +++++++++---------- src/Smoother/SmootherTake/initializeMumps.cpp | 98 +++++++++---------- 7 files changed, 296 insertions(+), 295 deletions(-) diff --git a/include/common/global_definitions.h b/include/common/global_definitions.h index 2460c73b..c5730237 100644 --- a/include/common/global_definitions.h +++ b/include/common/global_definitions.h @@ -83,11 +83,12 @@ enum class BetaCoeff /* Mumps - Constant Definitions */ /* ---------------------------- */ #ifdef GMGPOLAR_USE_MUMPS +#include "dmumps_c.h" /* Mumps inline functions s.t. indices match documentation */ inline int& ICNTL(DMUMPS_STRUC_C& mumps_solver, int I) { return mumps_solver.icntl[(I) - 1]; } - inline int& CNTL(DMUMPS_STRUC_C& mumps_solver, int I) { + inline double& CNTL(DMUMPS_STRUC_C& mumps_solver, int I) { return mumps_solver.cntl[(I) - 1]; } inline int& INFOG(DMUMPS_STRUC_C& mumps_solver, int I) { diff --git a/src/DirectSolver/DirectSolver-COO-MUMPS-Give/initializeMumps.cpp b/src/DirectSolver/DirectSolver-COO-MUMPS-Give/initializeMumps.cpp index 881ced38..4f212056 100644 --- a/src/DirectSolver/DirectSolver-COO-MUMPS-Give/initializeMumps.cpp +++ b/src/DirectSolver/DirectSolver-COO-MUMPS-Give/initializeMumps.cpp @@ -22,68 +22,68 @@ void DirectSolverGive::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, Spars mumps_solver.comm_fortran = USE_COMM_WORLD; dmumps_c(&mumps_solver); - mumps_solver.ICNTL(1) = 0; // Output stream for error messages. - mumps_solver.ICNTL(2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. - mumps_solver.ICNTL(3) = 0; // Output stream for global information, collected on the host - mumps_solver.ICNTL(4) = 0; // Level of printing for error, warning, and diagnostic messages. - mumps_solver.ICNTL(5) = 0; // Controls the matrix input format - mumps_solver.ICNTL(6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix - mumps_solver.ICNTL(7) = + ICNTL(mumps_solver, 1) = 0; // Output stream for error messages. + ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. + ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host + ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages. + ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format + ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix + ICNTL(mumps_solver, 7) = 5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis - mumps_solver.ICNTL(8) = 77; // Describes the scaling strategy - mumps_solver.ICNTL(9) = 1; // Computes the solution using A or A^T - mumps_solver.ICNTL(10) = 0; // Applies the iterative refinement to the computed solution - mumps_solver.ICNTL(11) = 0; // Computes statistics related to an error analysis of the linear system solved - mumps_solver.ICNTL(12) = 0; // Defines an ordering strategy for symmetric matrices and is used - mumps_solver.ICNTL(13) = 0; // Controls the parallelism of the root node - mumps_solver.ICNTL(14) = // Controls the percentage increase in the estimated working space + ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy + ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T + ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution + ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved + ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used + ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node + ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space (solver_matrix.is_symmetric() ? 5 : 20); - mumps_solver.ICNTL(15) = 0; // Exploits compression of the input matrix resulting from a block format - mumps_solver.ICNTL(16) = 0; // Controls the setting of the number of OpenMP threads + ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format + ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads // ICNTL(17) Doesn't exist - mumps_solver.ICNTL(18) = 0; // Defines the strategy for the distributed input matrix - mumps_solver.ICNTL(19) = 0; // Computes the Schur complement matrix - mumps_solver.ICNTL(20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides - mumps_solver.ICNTL(21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. - mumps_solver.ICNTL(22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. - mumps_solver.ICNTL(23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can + ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix + ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix + ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides + ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. + ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. + ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can // allocate per working process - mumps_solver.ICNTL(24) = 0; // Controls the detection of “null pivot rows”. - mumps_solver.ICNTL(25) = + ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”. + ICNTL(mumps_solver, 25) = 0; // Allows the computation of a solution of a deficient matrix and also of a null space basis - mumps_solver.ICNTL(26) = 0; // Drives the solution phase if a Schur complement matrix has been computed - mumps_solver.ICNTL(27) = -32; // Controls the blocking size for multiple right-hand sides. - mumps_solver.ICNTL(28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed - mumps_solver.ICNTL(29) = + ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed + ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides. + ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed + ICNTL(mumps_solver, 29) = 0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation. - mumps_solver.ICNTL(30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix - mumps_solver.ICNTL(31) = 0; // Indicates which factors may be discarded during the factorization. - mumps_solver.ICNTL(32) = 0; // Performs the forward elimination of the right-hand sides during the factorization - mumps_solver.ICNTL(33) = 0; // Computes the determinant of the input matrix. - mumps_solver.ICNTL(34) = 0; // Controls the conservation of the OOC files during JOB= –3 - mumps_solver.ICNTL(35) = 0; // Controls the activation of the BLR feature - mumps_solver.ICNTL(36) = 0; // Controls the choice of BLR factorization variant - mumps_solver.ICNTL(37) = 0; // Controls the BLR compression of the contribution blocks - mumps_solver.ICNTL(38) = 600; // Estimates compression rate of LU factors - mumps_solver.ICNTL(39) = 500; // Estimates compression rate of contribution blocks + ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix + ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization. + ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization + ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix. + ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3 + ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature + ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant + ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks + ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors + ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks // ICNTL(40-47) Don't exist - mumps_solver.ICNTL(48) = 1; // Multithreading with tree parallelism - mumps_solver.ICNTL(49) = 0; // Compact workarray id%S at the end of factorization phase + ICNTL(mumps_solver, 48) = 1; // Multithreading with tree parallelism + ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase // ICNTL(50-55) Don't exist - mumps_solver.ICNTL(56) = + ICNTL(mumps_solver, 56) = 0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method // ICNTL(57) Doesn't exist - mumps_solver.ICNTL(58) = 2; // Defines options for symbolic factorization + ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization // ICNTL(59-60) Don't exist - mumps_solver.CNTL(1) = -1.0; // Relative threshold for numerical pivoting - mumps_solver.CNTL(2) = -1.0; // Stopping criterion for iterative refinement - mumps_solver.CNTL(3) = 0.0; // Determine null pivot rows - mumps_solver.CNTL(4) = -1.0; // Determines the threshold for static pivoting - mumps_solver.CNTL(5) = + CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting + CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement + CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows + CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting + CNTL(mumps_solver, 5) = 0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active // CNTL(6) Doesn't exist - mumps_solver.CNTL(7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression + CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression // CNTL(8-15) Don't exist mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION; @@ -95,7 +95,7 @@ void DirectSolverGive::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, Spars mumps_solver.a = solver_matrix.values_data(); dmumps_c(&mumps_solver); - if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && mumps_solver.INFOG(12) != 0) { + if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) { std::cout << "Warning: DirectSolver matrix is not positive definite: Negative pivots in the factorization phase." << std::endl; diff --git a/src/DirectSolver/DirectSolver-COO-MUMPS-Take/initializeMumps.cpp b/src/DirectSolver/DirectSolver-COO-MUMPS-Take/initializeMumps.cpp index 983d60ee..dd3d77dc 100644 --- a/src/DirectSolver/DirectSolver-COO-MUMPS-Take/initializeMumps.cpp +++ b/src/DirectSolver/DirectSolver-COO-MUMPS-Take/initializeMumps.cpp @@ -22,68 +22,68 @@ void DirectSolverTake::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, Spars mumps_solver.comm_fortran = USE_COMM_WORLD; dmumps_c(&mumps_solver); - mumps_solver.ICNTL(1) = 0; // Output stream for error messages. - mumps_solver.ICNTL(2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. - mumps_solver.ICNTL(3) = 0; // Output stream for global information, collected on the host - mumps_solver.ICNTL(4) = 0; // Level of printing for error, warning, and diagnostic messages. - mumps_solver.ICNTL(5) = 0; // Controls the matrix input format - mumps_solver.ICNTL(6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix - mumps_solver.ICNTL(7) = + ICNTL(mumps_solver, 1) = 0; // Output stream for error messages. + ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. + ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host + ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages. + ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format + ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix + ICNTL(mumps_solver, 7) = 5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis - mumps_solver.ICNTL(8) = 77; // Describes the scaling strategy - mumps_solver.ICNTL(9) = 1; // Computes the solution using A or A^T - mumps_solver.ICNTL(10) = 0; // Applies the iterative refinement to the computed solution - mumps_solver.ICNTL(11) = 0; // Computes statistics related to an error analysis of the linear system solved - mumps_solver.ICNTL(12) = 0; // Defines an ordering strategy for symmetric matrices and is used - mumps_solver.ICNTL(13) = 0; // Controls the parallelism of the root node - mumps_solver.ICNTL(14) = // Controls the percentage increase in the estimated working space + ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy + ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T + ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution + ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved + ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used + ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node + ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space (solver_matrix.is_symmetric() ? 5 : 20); - mumps_solver.ICNTL(15) = 0; // Exploits compression of the input matrix resulting from a block format - mumps_solver.ICNTL(16) = 0; // Controls the setting of the number of OpenMP threads + ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format + ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads // ICNTL(17) Doesn't exist - mumps_solver.ICNTL(18) = 0; // Defines the strategy for the distributed input matrix - mumps_solver.ICNTL(19) = 0; // Computes the Schur complement matrix - mumps_solver.ICNTL(20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides - mumps_solver.ICNTL(21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. - mumps_solver.ICNTL(22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. - mumps_solver.ICNTL(23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can + ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix + ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix + ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides + ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. + ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. + ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can // allocate per working process - mumps_solver.ICNTL(24) = 0; // Controls the detection of “null pivot rows”. - mumps_solver.ICNTL(25) = + ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”. + ICNTL(mumps_solver, 25) = 0; // Allows the computation of a solution of a deficient matrix and also of a null space basis - mumps_solver.ICNTL(26) = 0; // Drives the solution phase if a Schur complement matrix has been computed - mumps_solver.ICNTL(27) = -32; // Controls the blocking size for multiple right-hand sides. - mumps_solver.ICNTL(28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed - mumps_solver.ICNTL(29) = + ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed + ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides. + ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed + ICNTL(mumps_solver, 29) = 0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation. - mumps_solver.ICNTL(30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix - mumps_solver.ICNTL(31) = 0; // Indicates which factors may be discarded during the factorization. - mumps_solver.ICNTL(32) = 0; // Performs the forward elimination of the right-hand sides during the factorization - mumps_solver.ICNTL(33) = 0; // Computes the determinant of the input matrix. - mumps_solver.ICNTL(34) = 0; // Controls the conservation of the OOC files during JOB= –3 - mumps_solver.ICNTL(35) = 0; // Controls the activation of the BLR feature - mumps_solver.ICNTL(36) = 0; // Controls the choice of BLR factorization variant - mumps_solver.ICNTL(37) = 0; // Controls the BLR compression of the contribution blocks - mumps_solver.ICNTL(38) = 600; // Estimates compression rate of LU factors - mumps_solver.ICNTL(39) = 500; // Estimates compression rate of contribution blocks + ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix + ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization. + ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization + ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix. + ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3 + ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature + ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant + ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks + ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors + ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks // ICNTL(40-47) Don't exist - mumps_solver.ICNTL(48) = 1; // Multithreading with tree parallelism - mumps_solver.ICNTL(49) = 0; // Compact workarray id%S at the end of factorization phase + ICNTL(mumps_solver, 48) = 1; // Multithreading with tree parallelism + ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase // ICNTL(50-55) Don't exist - mumps_solver.ICNTL(56) = + ICNTL(mumps_solver, 56) = 0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method // ICNTL(57) Doesn't exist - mumps_solver.ICNTL(58) = 2; // Defines options for symbolic factorization + ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization // ICNTL(59-60) Don't exist - mumps_solver.CNTL(1) = -1.0; // Relative threshold for numerical pivoting - mumps_solver.CNTL(2) = -1.0; // Stopping criterion for iterative refinement - mumps_solver.CNTL(3) = 0.0; // Determine null pivot rows - mumps_solver.CNTL(4) = -1.0; // Determines the threshold for static pivoting - mumps_solver.CNTL(5) = + CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting + CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement + CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows + CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting + CNTL(mumps_solver, 5) = 0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active // CNTL(6) Doesn't exist - mumps_solver.CNTL(7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression + CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression // CNTL(8-15) Don't exist mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION; @@ -95,7 +95,7 @@ void DirectSolverTake::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, Spars mumps_solver.a = solver_matrix.values_data(); dmumps_c(&mumps_solver); - if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && mumps_solver.INFOG(12) != 0) { + if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) { std::cout << "Warning: DirectSolver matrix is not positive definite: Negative pivots in the factorization phase." << std::endl; diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/initializeMumps.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/initializeMumps.cpp index 12c75472..907e1f7a 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/initializeMumps.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/initializeMumps.cpp @@ -23,68 +23,68 @@ void ExtrapolatedSmootherGive::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solve mumps_solver.comm_fortran = USE_COMM_WORLD; dmumps_c(&mumps_solver); - mumps_solver.ICNTL(1) = 0; // Output stream for error messages. - mumps_solver.ICNTL(2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. - mumps_solver.ICNTL(3) = 0; // Output stream for global information, collected on the host - mumps_solver.ICNTL(4) = 0; // Level of printing for error, warning, and diagnostic messages. - mumps_solver.ICNTL(5) = 0; // Controls the matrix input format - mumps_solver.ICNTL(6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix - mumps_solver.ICNTL(7) = + ICNTL(mumps_solver, 1) = 0; // Output stream for error messages. + ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. + ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host + ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages. + ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format + ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix + ICNTL(mumps_solver, 7) = 5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis - mumps_solver.ICNTL(8) = 77; // Describes the scaling strategy - mumps_solver.ICNTL(9) = 1; // Computes the solution using A or A^T - mumps_solver.ICNTL(10) = 0; // Applies the iterative refinement to the computed solution - mumps_solver.ICNTL(11) = 0; // Computes statistics related to an error analysis of the linear system solved - mumps_solver.ICNTL(12) = 0; // Defines an ordering strategy for symmetric matrices and is used - mumps_solver.ICNTL(13) = 0; // Controls the parallelism of the root node - mumps_solver.ICNTL(14) = // Controls the percentage increase in the estimated working space + ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy + ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T + ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution + ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved + ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used + ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node + ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space (solver_matrix.is_symmetric() ? 5 : 20); - mumps_solver.ICNTL(15) = 0; // Exploits compression of the input matrix resulting from a block format - mumps_solver.ICNTL(16) = 0; // Controls the setting of the number of OpenMP threads + ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format + ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads // ICNTL(17) Doesn't exist - mumps_solver.ICNTL(18) = 0; // Defines the strategy for the distributed input matrix - mumps_solver.ICNTL(19) = 0; // Computes the Schur complement matrix - mumps_solver.ICNTL(20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides - mumps_solver.ICNTL(21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. - mumps_solver.ICNTL(22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. - mumps_solver.ICNTL(23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can + ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix + ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix + ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides + ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. + ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. + ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can // allocate per working process - mumps_solver.ICNTL(24) = 0; // Controls the detection of “null pivot rows”. - mumps_solver.ICNTL(25) = + ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”. + ICNTL(mumps_solver, 25) = 0; // Allows the computation of a solution of a deficient matrix and also of a null space basis - mumps_solver.ICNTL(26) = 0; // Drives the solution phase if a Schur complement matrix has been computed - mumps_solver.ICNTL(27) = -32; // Controls the blocking size for multiple right-hand sides. - mumps_solver.ICNTL(28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed - mumps_solver.ICNTL(29) = + ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed + ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides. + ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed + ICNTL(mumps_solver, 29) = 0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation. - mumps_solver.ICNTL(30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix - mumps_solver.ICNTL(31) = 0; // Indicates which factors may be discarded during the factorization. - mumps_solver.ICNTL(32) = 0; // Performs the forward elimination of the right-hand sides during the factorization - mumps_solver.ICNTL(33) = 0; // Computes the determinant of the input matrix. - mumps_solver.ICNTL(34) = 0; // Controls the conservation of the OOC files during JOB= –3 - mumps_solver.ICNTL(35) = 0; // Controls the activation of the BLR feature - mumps_solver.ICNTL(36) = 0; // Controls the choice of BLR factorization variant - mumps_solver.ICNTL(37) = 0; // Controls the BLR compression of the contribution blocks - mumps_solver.ICNTL(38) = 600; // Estimates compression rate of LU factors - mumps_solver.ICNTL(39) = 500; // Estimates compression rate of contribution blocks + ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix + ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization. + ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization + ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix. + ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3 + ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature + ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant + ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks + ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors + ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks // ICNTL(40-47) Don't exist - mumps_solver.ICNTL(48) = 1; // Multithreading with tree parallelism - mumps_solver.ICNTL(49) = 0; // Compact workarray id%S at the end of factorization phase + ICNTL(mumps_solver, 48) = 1; // Multithreading with tree parallelism + ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase // ICNTL(50-55) Don't exist - mumps_solver.ICNTL(56) = + ICNTL(mumps_solver, 56) = 0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method // ICNTL(57) Doesn't exist - mumps_solver.ICNTL(58) = 2; // Defines options for symbolic factorization + ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization // ICNTL(59-60) Don't exist - mumps_solver.CNTL(1) = -1.0; // Relative threshold for numerical pivoting - mumps_solver.CNTL(2) = -1.0; // Stopping criterion for iterative refinement - mumps_solver.CNTL(3) = 0.0; // Determine null pivot rows - mumps_solver.CNTL(4) = -1.0; // Determines the threshold for static pivoting - mumps_solver.CNTL(5) = + CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting + CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement + CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows + CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting + CNTL(mumps_solver, 5) = 0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active // CNTL(6) Doesn't exist - mumps_solver.CNTL(7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression + CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression // CNTL(8-15) Don't exist mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION; @@ -96,7 +96,7 @@ void ExtrapolatedSmootherGive::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solve mumps_solver.a = solver_matrix.values_data(); dmumps_c(&mumps_solver); - if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && mumps_solver.INFOG(12) != 0) { + if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) { std::cout << "Warning: ExtrapolatedSmoother inner boundary matrix is not positive definite: Negative pivots in " "the factorization phase." << std::endl; diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/initializeMumps.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/initializeMumps.cpp index 69d4236d..91ba4b1e 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/initializeMumps.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/initializeMumps.cpp @@ -23,68 +23,68 @@ void ExtrapolatedSmootherTake::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solve mumps_solver.comm_fortran = USE_COMM_WORLD; dmumps_c(&mumps_solver); - mumps_solver.ICNTL(1) = 0; // Output stream for error messages. - mumps_solver.ICNTL(2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. - mumps_solver.ICNTL(3) = 0; // Output stream for global information, collected on the host - mumps_solver.ICNTL(4) = 0; // Level of printing for error, warning, and diagnostic messages. - mumps_solver.ICNTL(5) = 0; // Controls the matrix input format - mumps_solver.ICNTL(6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix - mumps_solver.ICNTL(7) = + ICNTL(mumps_solver, 1) = 0; // Output stream for error messages. + ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. + ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host + ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages. + ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format + ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix + ICNTL(mumps_solver, 7) = 5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis - mumps_solver.ICNTL(8) = 77; // Describes the scaling strategy - mumps_solver.ICNTL(9) = 1; // Computes the solution using A or A^T - mumps_solver.ICNTL(10) = 0; // Applies the iterative refinement to the computed solution - mumps_solver.ICNTL(11) = 0; // Computes statistics related to an error analysis of the linear system solved - mumps_solver.ICNTL(12) = 0; // Defines an ordering strategy for symmetric matrices and is used - mumps_solver.ICNTL(13) = 0; // Controls the parallelism of the root node - mumps_solver.ICNTL(14) = // Controls the percentage increase in the estimated working space + ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy + ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T + ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution + ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved + ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used + ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node + ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space (solver_matrix.is_symmetric() ? 5 : 20); - mumps_solver.ICNTL(15) = 0; // Exploits compression of the input matrix resulting from a block format - mumps_solver.ICNTL(16) = 0; // Controls the setting of the number of OpenMP threads + ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format + ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads // ICNTL(17) Doesn't exist - mumps_solver.ICNTL(18) = 0; // Defines the strategy for the distributed input matrix - mumps_solver.ICNTL(19) = 0; // Computes the Schur complement matrix - mumps_solver.ICNTL(20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides - mumps_solver.ICNTL(21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. - mumps_solver.ICNTL(22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. - mumps_solver.ICNTL(23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can + ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix + ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix + ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides + ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. + ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. + ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can // allocate per working process - mumps_solver.ICNTL(24) = 0; // Controls the detection of “null pivot rows”. - mumps_solver.ICNTL(25) = + ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”. + ICNTL(mumps_solver, 25) = 0; // Allows the computation of a solution of a deficient matrix and also of a null space basis - mumps_solver.ICNTL(26) = 0; // Drives the solution phase if a Schur complement matrix has been computed - mumps_solver.ICNTL(27) = -32; // Controls the blocking size for multiple right-hand sides. - mumps_solver.ICNTL(28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed - mumps_solver.ICNTL(29) = + ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed + ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides. + ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed + ICNTL(mumps_solver, 29) = 0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation. - mumps_solver.ICNTL(30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix - mumps_solver.ICNTL(31) = 0; // Indicates which factors may be discarded during the factorization. - mumps_solver.ICNTL(32) = 0; // Performs the forward elimination of the right-hand sides during the factorization - mumps_solver.ICNTL(33) = 0; // Computes the determinant of the input matrix. - mumps_solver.ICNTL(34) = 0; // Controls the conservation of the OOC files during JOB= –3 - mumps_solver.ICNTL(35) = 0; // Controls the activation of the BLR feature - mumps_solver.ICNTL(36) = 0; // Controls the choice of BLR factorization variant - mumps_solver.ICNTL(37) = 0; // Controls the BLR compression of the contribution blocks - mumps_solver.ICNTL(38) = 600; // Estimates compression rate of LU factors - mumps_solver.ICNTL(39) = 500; // Estimates compression rate of contribution blocks + ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix + ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization. + ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization + ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix. + ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3 + ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature + ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant + ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks + ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors + ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks // ICNTL(40-47) Don't exist - mumps_solver.ICNTL(48) = 1; // Multithreading with tree parallelism - mumps_solver.ICNTL(49) = 0; // Compact workarray id%S at the end of factorization phase + ICNTL(mumps_solver, 48) = 1; // Multithreading with tree parallelism + ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase // ICNTL(50-55) Don't exist - mumps_solver.ICNTL(56) = + ICNTL(mumps_solver, 56) = 0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method // ICNTL(57) Doesn't exist - mumps_solver.ICNTL(58) = 2; // Defines options for symbolic factorization + ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization // ICNTL(59-60) Don't exist - mumps_solver.CNTL(1) = -1.0; // Relative threshold for numerical pivoting - mumps_solver.CNTL(2) = -1.0; // Stopping criterion for iterative refinement - mumps_solver.CNTL(3) = 0.0; // Determine null pivot rows - mumps_solver.CNTL(4) = -1.0; // Determines the threshold for static pivoting - mumps_solver.CNTL(5) = + CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting + CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement + CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows + CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting + CNTL(mumps_solver, 5) = 0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active // CNTL(6) Doesn't exist - mumps_solver.CNTL(7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression + CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression // CNTL(8-15) Don't exist mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION; @@ -96,7 +96,7 @@ void ExtrapolatedSmootherTake::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solve mumps_solver.a = solver_matrix.values_data(); dmumps_c(&mumps_solver); - if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && mumps_solver.INFOG(12) != 0) { + if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) { std::cout << "Warning: ExtrapolatedSmoother inner boundary matrix is not positive definite: Negative pivots in " "the factorization phase." << std::endl; diff --git a/src/Smoother/SmootherGive/initializeMumps.cpp b/src/Smoother/SmootherGive/initializeMumps.cpp index a8aa196c..ab2ec0df 100644 --- a/src/Smoother/SmootherGive/initializeMumps.cpp +++ b/src/Smoother/SmootherGive/initializeMumps.cpp @@ -22,68 +22,68 @@ void SmootherGive::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMat mumps_solver.comm_fortran = USE_COMM_WORLD; dmumps_c(&mumps_solver); - mumps_solver.ICNTL(1) = 0; // Output stream for error messages. - mumps_solver.ICNTL(2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. - mumps_solver.ICNTL(3) = 0; // Output stream for global information, collected on the host - mumps_solver.ICNTL(4) = 0; // Level of printing for error, warning, and diagnostic messages. - mumps_solver.ICNTL(5) = 0; // Controls the matrix input format - mumps_solver.ICNTL(6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix - mumps_solver.ICNTL(7) = + ICNTL(mumps_solver, 1) = 0; // Output stream for error messages. + ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. + ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host + ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages. + ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format + ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix + ICNTL(mumps_solver, 7) = 5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis - mumps_solver.ICNTL(8) = 77; // Describes the scaling strategy - mumps_solver.ICNTL(9) = 1; // Computes the solution using A or A^T - mumps_solver.ICNTL(10) = 0; // Applies the iterative refinement to the computed solution - mumps_solver.ICNTL(11) = 0; // Computes statistics related to an error analysis of the linear system solved - mumps_solver.ICNTL(12) = 0; // Defines an ordering strategy for symmetric matrices and is used - mumps_solver.ICNTL(13) = 0; // Controls the parallelism of the root node - mumps_solver.ICNTL(14) = // Controls the percentage increase in the estimated working space + ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy + ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T + ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution + ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved + ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used + ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node + ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space (solver_matrix.is_symmetric() ? 5 : 20); - mumps_solver.ICNTL(15) = 0; // Exploits compression of the input matrix resulting from a block format - mumps_solver.ICNTL(16) = 0; // Controls the setting of the number of OpenMP threads + ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format + ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads // ICNTL(17) Doesn't exist - mumps_solver.ICNTL(18) = 0; // Defines the strategy for the distributed input matrix - mumps_solver.ICNTL(19) = 0; // Computes the Schur complement matrix - mumps_solver.ICNTL(20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides - mumps_solver.ICNTL(21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. - mumps_solver.ICNTL(22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. - mumps_solver.ICNTL(23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can + ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix + ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix + ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides + ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. + ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. + ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can // allocate per working process - mumps_solver.ICNTL(24) = 0; // Controls the detection of “null pivot rows”. - mumps_solver.ICNTL(25) = + ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”. + ICNTL(mumps_solver, 25) = 0; // Allows the computation of a solution of a deficient matrix and also of a null space basis - mumps_solver.ICNTL(26) = 0; // Drives the solution phase if a Schur complement matrix has been computed - mumps_solver.ICNTL(27) = -32; // Controls the blocking size for multiple right-hand sides. - mumps_solver.ICNTL(28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed - mumps_solver.ICNTL(29) = + ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed + ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides. + ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed + ICNTL(mumps_solver, 29) = 0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation. - mumps_solver.ICNTL(30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix - mumps_solver.ICNTL(31) = 0; // Indicates which factors may be discarded during the factorization. - mumps_solver.ICNTL(32) = 0; // Performs the forward elimination of the right-hand sides during the factorization - mumps_solver.ICNTL(33) = 0; // Computes the determinant of the input matrix. - mumps_solver.ICNTL(34) = 0; // Controls the conservation of the OOC files during JOB= –3 - mumps_solver.ICNTL(35) = 0; // Controls the activation of the BLR feature - mumps_solver.ICNTL(36) = 0; // Controls the choice of BLR factorization variant - mumps_solver.ICNTL(37) = 0; // Controls the BLR compression of the contribution blocks - mumps_solver.ICNTL(38) = 600; // Estimates compression rate of LU factors - mumps_solver.ICNTL(39) = 500; // Estimates compression rate of contribution blocks + ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix + ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization. + ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization + ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix. + ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3 + ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature + ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant + ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks + ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors + ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks // ICNTL(40-47) Don't exist - mumps_solver.ICNTL(48) = 1; // Multithreading with tree parallelism - mumps_solver.ICNTL(49) = 0; // Compact workarray id%S at the end of factorization phase + ICNTL(mumps_solver, 48) = 1; // Multithreading with tree parallelism + ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase // ICNTL(50-55) Don't exist - mumps_solver.ICNTL(56) = + ICNTL(mumps_solver, 56) = 0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method // ICNTL(57) Doesn't exist - mumps_solver.ICNTL(58) = 2; // Defines options for symbolic factorization + ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization // ICNTL(59-60) Don't exist - mumps_solver.CNTL(1) = -1.0; // Relative threshold for numerical pivoting - mumps_solver.CNTL(2) = -1.0; // Stopping criterion for iterative refinement - mumps_solver.CNTL(3) = 0.0; // Determine null pivot rows - mumps_solver.CNTL(4) = -1.0; // Determines the threshold for static pivoting - mumps_solver.CNTL(5) = + CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting + CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement + CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows + CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting + CNTL(mumps_solver, 5) = 0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active // CNTL(6) Doesn't exist - mumps_solver.CNTL(7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression + CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression // CNTL(8-15) Don't exist mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION; @@ -95,7 +95,7 @@ void SmootherGive::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMat mumps_solver.a = solver_matrix.values_data(); dmumps_c(&mumps_solver); - if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && mumps_solver.INFOG(12) != 0) { + if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) { std::cout << "Warning: Smoother inner boundary matrix is not positive definite: Negative pivots in the " "factorization phase." << std::endl; diff --git a/src/Smoother/SmootherTake/initializeMumps.cpp b/src/Smoother/SmootherTake/initializeMumps.cpp index f7496ed0..9d61f3a3 100644 --- a/src/Smoother/SmootherTake/initializeMumps.cpp +++ b/src/Smoother/SmootherTake/initializeMumps.cpp @@ -22,68 +22,68 @@ void SmootherTake::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMat mumps_solver.comm_fortran = USE_COMM_WORLD; dmumps_c(&mumps_solver); - mumps_solver.ICNTL(1) = 0; // Output stream for error messages. - mumps_solver.ICNTL(2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. - mumps_solver.ICNTL(3) = 0; // Output stream for global information, collected on the host - mumps_solver.ICNTL(4) = 0; // Level of printing for error, warning, and diagnostic messages. - mumps_solver.ICNTL(5) = 0; // Controls the matrix input format - mumps_solver.ICNTL(6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix - mumps_solver.ICNTL(7) = + ICNTL(mumps_solver, 1) = 0; // Output stream for error messages. + ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. + ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host + ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages. + ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format + ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix + ICNTL(mumps_solver, 7) = 5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis - mumps_solver.ICNTL(8) = 77; // Describes the scaling strategy - mumps_solver.ICNTL(9) = 1; // Computes the solution using A or A^T - mumps_solver.ICNTL(10) = 0; // Applies the iterative refinement to the computed solution - mumps_solver.ICNTL(11) = 0; // Computes statistics related to an error analysis of the linear system solved - mumps_solver.ICNTL(12) = 0; // Defines an ordering strategy for symmetric matrices and is used - mumps_solver.ICNTL(13) = 0; // Controls the parallelism of the root node - mumps_solver.ICNTL(14) = // Controls the percentage increase in the estimated working space + ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy + ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T + ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution + ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved + ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used + ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node + ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space (solver_matrix.is_symmetric() ? 5 : 20); - mumps_solver.ICNTL(15) = 0; // Exploits compression of the input matrix resulting from a block format - mumps_solver.ICNTL(16) = 0; // Controls the setting of the number of OpenMP threads + ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format + ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads // ICNTL(17) Doesn't exist - mumps_solver.ICNTL(18) = 0; // Defines the strategy for the distributed input matrix - mumps_solver.ICNTL(19) = 0; // Computes the Schur complement matrix - mumps_solver.ICNTL(20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides - mumps_solver.ICNTL(21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. - mumps_solver.ICNTL(22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. - mumps_solver.ICNTL(23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can + ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix + ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix + ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides + ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. + ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. + ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can // allocate per working process - mumps_solver.ICNTL(24) = 0; // Controls the detection of “null pivot rows”. - mumps_solver.ICNTL(25) = + ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”. + ICNTL(mumps_solver, 25) = 0; // Allows the computation of a solution of a deficient matrix and also of a null space basis - mumps_solver.ICNTL(26) = 0; // Drives the solution phase if a Schur complement matrix has been computed - mumps_solver.ICNTL(27) = -32; // Controls the blocking size for multiple right-hand sides. - mumps_solver.ICNTL(28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed - mumps_solver.ICNTL(29) = + ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed + ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides. + ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed + ICNTL(mumps_solver, 29) = 0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation. - mumps_solver.ICNTL(30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix - mumps_solver.ICNTL(31) = 0; // Indicates which factors may be discarded during the factorization. - mumps_solver.ICNTL(32) = 0; // Performs the forward elimination of the right-hand sides during the factorization - mumps_solver.ICNTL(33) = 0; // Computes the determinant of the input matrix. - mumps_solver.ICNTL(34) = 0; // Controls the conservation of the OOC files during JOB= –3 - mumps_solver.ICNTL(35) = 0; // Controls the activation of the BLR feature - mumps_solver.ICNTL(36) = 0; // Controls the choice of BLR factorization variant - mumps_solver.ICNTL(37) = 0; // Controls the BLR compression of the contribution blocks - mumps_solver.ICNTL(38) = 600; // Estimates compression rate of LU factors - mumps_solver.ICNTL(39) = 500; // Estimates compression rate of contribution blocks + ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix + ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization. + ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization + ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix. + ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3 + ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature + ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant + ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks + ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors + ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks // ICNTL(40-47) Don't exist - mumps_solver.ICNTL(48) = 1; // Multithreading with tree parallelism - mumps_solver.ICNTL(49) = 0; // Compact workarray id%S at the end of factorization phase + ICNTL(mumps_solver, 48) = 1; // Multithreading with tree parallelism + ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase // ICNTL(50-55) Don't exist - mumps_solver.ICNTL(56) = + ICNTL(mumps_solver, 56) = 0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method // ICNTL(57) Doesn't exist - mumps_solver.ICNTL(58) = 2; // Defines options for symbolic factorization + ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization // ICNTL(59-60) Don't exist - mumps_solver.CNTL(1) = -1.0; // Relative threshold for numerical pivoting - mumps_solver.CNTL(2) = -1.0; // Stopping criterion for iterative refinement - mumps_solver.CNTL(3) = 0.0; // Determine null pivot rows - mumps_solver.CNTL(4) = -1.0; // Determines the threshold for static pivoting - mumps_solver.CNTL(5) = + CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting + CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement + CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows + CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting + CNTL(mumps_solver, 5) = 0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active // CNTL(6) Doesn't exist - mumps_solver.CNTL(7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression + CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression // CNTL(8-15) Don't exist mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION; @@ -95,7 +95,7 @@ void SmootherTake::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMat mumps_solver.a = solver_matrix.values_data(); dmumps_c(&mumps_solver); - if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && mumps_solver.INFOG(12) != 0) { + if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) { std::cout << "Warning: Smoother inner boundary matrix is not positive definite: Negative pivots in the " "factorization phase." << std::endl; From 54a1a1e407b7a216dba767468af2a8d2e33e7d05 Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Thu, 15 Jan 2026 16:40:39 +0100 Subject: [PATCH 04/10] Replace NODE_APPLY_A_GIVE macro with an inline function. Put repeated lines into the function --- src/Residual/ResidualGive/applyAGive.cpp | 497 +++++++++++------------ 1 file changed, 246 insertions(+), 251 deletions(-) diff --git a/src/Residual/ResidualGive/applyAGive.cpp b/src/Residual/ResidualGive/applyAGive.cpp index 5bb7429a..4fbf19b7 100644 --- a/src/Residual/ResidualGive/applyAGive.cpp +++ b/src/Residual/ResidualGive/applyAGive.cpp @@ -2,257 +2,256 @@ #include "../../../include/common/geometry_helper.h" -#define NODE_APPLY_A_GIVE(i_r, i_theta, r, theta, grid, DirBC_Interior, result, x, arr, att, art, detDF, coeff_beta) \ - do { \ - /* -------------------- */ \ - /* Node in the interior */ \ - /* -------------------- */ \ - if (i_r > 1 && i_r < grid.nr() - 2) { \ - double h1 = grid.radialSpacing(i_r - 1); \ - double h2 = grid.radialSpacing(i_r); \ - double k1 = grid.angularSpacing(i_theta - 1); \ - double k2 = grid.angularSpacing(i_theta); \ - double coeff1 = 0.5 * (k1 + k2) / h1; \ - double coeff2 = 0.5 * (k1 + k2) / h2; \ - double coeff3 = 0.5 * (h1 + h2) / k1; \ - double coeff4 = 0.5 * (h1 + h2) / k2; \ - /* Fill result(i,j) */ \ - result[grid.index(i_r, i_theta)] -= \ - (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF) * \ - x[grid.index(i_r, i_theta)] /* beta_{i,j} */ \ - - coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Left */ \ - - coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Right */ \ - - coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Bottom */ \ - - coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Top */ /* Center: (Left, Right, Bottom, Top) */ \ - + ((coeff1 + coeff2) * arr + (coeff3 + coeff4) * att) * x[grid.index(i_r, i_theta)]); \ - /* Fill result(i-1,j) */ \ - result[grid.index(i_r - 1, i_theta)] -= \ - (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right */ \ - + coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Center: (Right) */ \ - - 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Right */ \ - + 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Right */ \ - /* Fill result(i+1,j) */ \ - result[grid.index(i_r + 1, i_theta)] -= \ - (-coeff2 * arr * x[grid.index(i_r, i_theta)] /* Left */ \ - + coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Center: (Left) */ \ - + 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Left */ \ - - 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Left */ \ - /* Fill result(i,j-1) */ \ - result[grid.index(i_r, i_theta - 1)] -= \ - (-coeff3 * att * x[grid.index(i_r, i_theta)] /* Top */ \ - + coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Center: (Top) */ \ - - 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Top Right */ \ - + 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Top Left */ \ - /* Fill result(i,j+1) */ \ - result[grid.index(i_r, i_theta + 1)] -= \ - (-coeff4 * att * x[grid.index(i_r, i_theta)] /* Bottom */ \ - + coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Center: (Bottom) */ \ - + 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Bottom Right */ \ - - 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Bottom Left */ \ - /* -------------------------- */ \ - /* Node on the inner boundary */ \ - /* -------------------------- */ \ - } \ - else if (i_r == 0) { \ - /* ------------------------------------------------ */ \ - /* Case 1: Dirichlet boundary on the inner boundary */ \ - /* ------------------------------------------------ */ \ - if (DirBC_Interior) { \ - /* Fill result(i,j) */ \ - result[grid.index(i_r, i_theta)] -= x[grid.index(i_r, i_theta)]; \ - /* Give value to the interior nodes! */ \ - double h2 = grid.radialSpacing(i_r); \ - double k1 = grid.angularSpacing(i_theta - 1); \ - double k2 = grid.angularSpacing(i_theta); \ - double coeff2 = 0.5 * (k1 + k2) / h2; \ - /* Fill result(i+1,j) */ \ - result[grid.index(i_r + 1, i_theta)] -= \ - (-coeff2 * arr * x[grid.index(i_r, i_theta)] /* Left */ \ - + coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Center: (Left) */ \ - + 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Left */ \ - - 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Left */ \ - } \ - else { \ - /* ------------------------------------------------------------- */ \ - /* Case 2: Across origin discretization on the interior boundary */ \ - /* ------------------------------------------------------------- */ \ - /* h1 gets replaced with 2 * R0. */ \ - /* (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). */ \ - /* Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. */ \ - double h1 = 2.0 * grid.radius(0); \ - double h2 = grid.radialSpacing(i_r); \ - double k1 = grid.angularSpacing(i_theta - 1); \ - double k2 = grid.angularSpacing(i_theta); \ - double coeff1 = 0.5 * (k1 + k2) / h1; \ - double coeff2 = 0.5 * (k1 + k2) / h2; \ - double coeff3 = 0.5 * (h1 + h2) / k1; \ - double coeff4 = 0.5 * (h1 + h2) / k2; \ - /* Fill result(i,j) */ \ - result[grid.index(i_r, i_theta)] -= \ - (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF) * \ - x[grid.index(i_r, i_theta)] /* beta_{i,j} */ \ - - coeff1 * arr * x[grid.index(i_r, i_theta + (grid.ntheta() / 2))] /* Left */ \ - - coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Right */ \ - - coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Bottom */ \ - - coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Top */ \ - + ((coeff1 + coeff2) * arr + (coeff3 + coeff4) * att) * \ - x[grid.index(i_r, i_theta)]); /* Center: (Left, Right, Bottom, Top) */ \ - /* Fill result(i-1,j) */ \ - /* From view the view of the across origin node, the directions are roatated by 180 degrees in the stencil! */ \ - result[grid.index(i_r, i_theta + (grid.ntheta() / 2))] -= \ - (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right -> Left */ \ - + coeff1 * arr * \ - x[grid.index(i_r, i_theta + (grid.ntheta() / 2))]); /* Center: (Right) -> Center: (Left)*/ \ - /* + 0.25 * art * x[grid.index(i_r,i_theta+1)]; // Top Right -> Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ \ - /* - 0.25 * art * x[grid.index(i_r,i_theta-1)]; // Bottom Right -> Top Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ \ - /* Fill result(i+1,j) */ \ - result[grid.index(i_r + 1, i_theta)] -= \ - (-coeff2 * arr * x[grid.index(i_r, i_theta)] /* Left */ \ - + coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Center: (Left) */ \ - + 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Left */ \ - - 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Left */ \ - /* Fill result(i,j-1) */ \ - result[grid.index(i_r, i_theta - 1)] -= \ - (-coeff3 * att * x[grid.index(i_r, i_theta)] /* Top */ \ - + coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Center: (Top) */ \ - - 0.25 * art * x[grid.index(i_r + 1, i_theta)]); /* Top Right */ \ - /* + 0.25 * art * x[grid.index(i_r-1,i_theta)]; // Top Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ \ - /* Fill result(i,j+1) */ \ - result[grid.index(i_r, i_theta + 1)] -= \ - (-coeff4 * att * x[grid.index(i_r, i_theta)] /* Bottom */ \ - + coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Center: (Bottom) */ \ - + 0.25 * art * x[grid.index(i_r + 1, i_theta)]); /* Bottom Right */ \ - /* - 0.25 * art * x[grid.index(i_r-1,i_theta)]; // Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ \ - } \ - /* ------------------------------- */ \ - /* Node next to the inner boundary */ \ - /* ------------------------------- */ \ - } \ - else if (i_r == 1) { \ - double h1 = grid.radialSpacing(i_r - 1); \ - double h2 = grid.radialSpacing(i_r); \ - double k1 = grid.angularSpacing(i_theta - 1); \ - double k2 = grid.angularSpacing(i_theta); \ - double coeff1 = 0.5 * (k1 + k2) / h1; \ - double coeff2 = 0.5 * (k1 + k2) / h2; \ - double coeff3 = 0.5 * (h1 + h2) / k1; \ - double coeff4 = 0.5 * (h1 + h2) / k2; \ - /* Fill result(i,j) */ \ - result[grid.index(i_r, i_theta)] -= \ - (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF) * \ - x[grid.index(i_r, i_theta)] /* beta_{i,j} */ \ - - coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Left */ \ - - coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Right */ \ - - coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Bottom */ \ - - coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Top */ \ - + ((coeff1 + coeff2) * arr + (coeff3 + coeff4) * att) * \ - x[grid.index(i_r, i_theta)]); /* Center: (Left, Right, Bottom, Top) */ \ - /* Fill result(i-1,j) */ \ - if (!DirBC_Interior) { /* Don't give to the inner dirichlet boundary! */ \ - result[grid.index(i_r - 1, i_theta)] -= \ - (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right */ \ - + coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Center: (Right) */ \ - - 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Right */ \ - + 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Right */ \ - } \ - /* Fill result(i+1,j) */ \ - result[grid.index(i_r + 1, i_theta)] -= \ - (-coeff2 * arr * x[grid.index(i_r, i_theta)] /* Left */ \ - + coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Center: (Left) */ \ - + 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Left */ \ - - 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Left */ \ - /* Fill result(i,j-1) */ \ - result[grid.index(i_r, i_theta - 1)] -= \ - (-coeff3 * att * x[grid.index(i_r, i_theta)] /* Top */ \ - + coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Center: (Top) */ \ - - 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Top Right */ \ - + 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Top Left */ \ - /* Fill result(i,j+1) */ \ - result[grid.index(i_r, i_theta + 1)] -= \ - (-coeff4 * att * x[grid.index(i_r, i_theta)] /* Bottom */ \ - + coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Center: (Bottom) */ \ - + 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Bottom Right */ \ - - 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Bottom Left */ \ - /* ------------------------------- */ \ - /* Node next to the outer boundary */ \ - /* ------------------------------- */ \ - } \ - else if (i_r == grid.nr() - 2) { \ - double h1 = grid.radialSpacing(i_r - 1); \ - double h2 = grid.radialSpacing(i_r); \ - double k1 = grid.angularSpacing(i_theta - 1); \ - double k2 = grid.angularSpacing(i_theta); \ - double coeff1 = 0.5 * (k1 + k2) / h1; \ - double coeff2 = 0.5 * (k1 + k2) / h2; \ - double coeff3 = 0.5 * (h1 + h2) / k1; \ - double coeff4 = 0.5 * (h1 + h2) / k2; \ - /* Fill result(i,j) */ \ - result[grid.index(i_r, i_theta)] -= \ - (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF) * \ - x[grid.index(i_r, i_theta)] /* beta_{i,j} */ \ - - coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Left */ \ - - coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Right */ \ - - coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Bottom */ \ - - coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Top */ \ - + ((coeff1 + coeff2) * arr + (coeff3 + coeff4) * att) * \ - x[grid.index(i_r, i_theta)]); /* Center: (Left, Right, Bottom, Top) */ \ - /* Fill result(i-1,j) */ \ - result[grid.index(i_r - 1, i_theta)] -= \ - (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right */ \ - + coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Center: (Right) */ \ - - 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Right */ \ - + 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Right */ \ - /* Don't give to the outer dirichlet boundary! */ \ - /* Fill result(i+1,j) */ \ - /* result[grid.index(i_r+1,i_theta)] -= ( */ \ - /* - coeff2 * arr * x[grid.index(i_r,i_theta)] // Left */ \ - /* + coeff2 * arr * x[grid.index(i_r+1,i_theta)] // Center: (Left) */ \ - /* + 0.25 * art * x[grid.index(i_r,i_theta+1)] // Top Left */ \ - /* - 0.25 * art * x[grid.index(i_r,i_theta-1)] ); // Bottom Left */ \ - /* Fill result(i,j-1) */ \ - result[grid.index(i_r, i_theta - 1)] -= \ - (-coeff3 * att * x[grid.index(i_r, i_theta)] /* Top */ \ - + coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Center: (Top) */ \ - - 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Top Right */ \ - + 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Top Left */ \ - /* Fill result(i,j+1) */ \ - result[grid.index(i_r, i_theta + 1)] -= \ - (-coeff4 * att * x[grid.index(i_r, i_theta)] /* Bottom */ \ - + coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Center: (Bottom) */ \ - + 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Bottom Right */ \ - - 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Bottom Left */ \ - /* ----------------------------- */ \ - /* Node on to the outer boundary */ \ - /* ----------------------------- */ \ - } \ - else if (i_r == grid.nr() - 1) { \ - /* Fill result of (i,j) */ \ - result[grid.index(i_r, i_theta)] -= x[grid.index(i_r, i_theta)]; \ - /* Give value to the interior nodes! */ \ - double h1 = grid.radialSpacing(i_r - 1); \ - double k1 = grid.angularSpacing(i_theta - 1); \ - double k2 = grid.angularSpacing(i_theta); \ - double coeff1 = 0.5 * (k1 + k2) / h1; \ - /* Fill result(i-1,j) */ \ - result[grid.index(i_r - 1, i_theta)] -= \ - (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right */ \ - + coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Center: (Right) */ \ - - 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Right */ \ - + 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Right */ \ - } \ - } while (0) +inline void node_apply_a_give(int i_r, int i_theta, double r, double theta, const PolarGrid& grid, const LevelCache& level_cache, bool DirBC_Interior, + Vector result, ConstVector x) +{ + const int global_index = grid.index(i_r, i_theta); + double coeff_beta, arr, att, art, detDF; + level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); + do { + /* -------------------- */ + /* Node in the interior */ + /* -------------------- */ + if (i_r > 1 && i_r < grid.nr() - 2) { + double h1 = grid.radialSpacing(i_r - 1); + double h2 = grid.radialSpacing(i_r); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); + double coeff1 = 0.5 * (k1 + k2) / h1; + double coeff2 = 0.5 * (k1 + k2) / h2; + double coeff3 = 0.5 * (h1 + h2) / k1; + double coeff4 = 0.5 * (h1 + h2) / k2; + /* Fill result(i,j) */ + result[grid.index(i_r, i_theta)] -= + (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF) * x[grid.index(i_r, i_theta)] /* beta_{i,j} */ + - coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Left */ + - coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Right */ + - coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Bottom */ + - coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Top */ /* Center: (Left, Right, Bottom, Top) */ + + ((coeff1 + coeff2) * arr + (coeff3 + coeff4) * att) * x[grid.index(i_r, i_theta)]); + /* Fill result(i-1,j) */ + result[grid.index(i_r - 1, i_theta)] -= + (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right */ + + coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Center: (Right) */ + - 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Right */ + + 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Right */ + /* Fill result(i+1,j) */ + result[grid.index(i_r + 1, i_theta)] -= + (-coeff2 * arr * x[grid.index(i_r, i_theta)] /* Left */ + + coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Center: (Left) */ + + 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Left */ + - 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Left */ + /* Fill result(i,j-1) */ + result[grid.index(i_r, i_theta - 1)] -= + (-coeff3 * att * x[grid.index(i_r, i_theta)] /* Top */ + + coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Center: (Top) */ + - 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Top Right */ + + 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Top Left */ + /* Fill result(i,j+1) */ + result[grid.index(i_r, i_theta + 1)] -= + (-coeff4 * att * x[grid.index(i_r, i_theta)] /* Bottom */ + + coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Center: (Bottom) */ + + 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Bottom Right */ + - 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Bottom Left */ + /* -------------------------- */ + /* Node on the inner boundary */ + /* -------------------------- */ + } + else if (i_r == 0) { + /* ------------------------------------------------ */ + /* Case 1: Dirichlet boundary on the inner boundary */ + /* ------------------------------------------------ */ + if (DirBC_Interior) { + /* Fill result(i,j) */ + result[grid.index(i_r, i_theta)] -= x[grid.index(i_r, i_theta)]; + /* Give value to the interior nodes! */ + double h2 = grid.radialSpacing(i_r); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); + double coeff2 = 0.5 * (k1 + k2) / h2; + /* Fill result(i+1,j) */ + result[grid.index(i_r + 1, i_theta)] -= + (-coeff2 * arr * x[grid.index(i_r, i_theta)] /* Left */ + + coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Center: (Left) */ + + 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Left */ + - 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Left */ + } + else { + /* ------------------------------------------------------------- */ + /* Case 2: Across origin discretization on the interior boundary */ + /* ------------------------------------------------------------- */ + /* h1 gets replaced with 2 * R0. */ + /* (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). */ + /* Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. */ + double h1 = 2.0 * grid.radius(0); + double h2 = grid.radialSpacing(i_r); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); + double coeff1 = 0.5 * (k1 + k2) / h1; + double coeff2 = 0.5 * (k1 + k2) / h2; + double coeff3 = 0.5 * (h1 + h2) / k1; + double coeff4 = 0.5 * (h1 + h2) / k2; + /* Fill result(i,j) */ + result[grid.index(i_r, i_theta)] -= + (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF) * + x[grid.index(i_r, i_theta)] /* beta_{i,j} */ + - coeff1 * arr * x[grid.index(i_r, i_theta + (grid.ntheta() / 2))] /* Left */ + - coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Right */ + - coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Bottom */ + - coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Top */ + + ((coeff1 + coeff2) * arr + (coeff3 + coeff4) * att) * + x[grid.index(i_r, i_theta)]); /* Center: (Left, Right, Bottom, Top) */ + /* Fill result(i-1,j) */ + /* From view the view of the across origin node, the directions are roatated by 180 degrees in the stencil! */ + result[grid.index(i_r, i_theta + (grid.ntheta() / 2))] -= + (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right -> Left */ + + coeff1 * arr * + x[grid.index(i_r, i_theta + (grid.ntheta() / 2))]); /* Center: (Right) -> Center: (Left)*/ + /* + 0.25 * art * x[grid.index(i_r,i_theta+1)]; // Top Right -> Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ + /* - 0.25 * art * x[grid.index(i_r,i_theta-1)]; // Bottom Right -> Top Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ + /* Fill result(i+1,j) */ + result[grid.index(i_r + 1, i_theta)] -= + (-coeff2 * arr * x[grid.index(i_r, i_theta)] /* Left */ + + coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Center: (Left) */ + + 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Left */ + - 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Left */ + /* Fill result(i,j-1) */ + result[grid.index(i_r, i_theta - 1)] -= + (-coeff3 * att * x[grid.index(i_r, i_theta)] /* Top */ + + coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Center: (Top) */ + - 0.25 * art * x[grid.index(i_r + 1, i_theta)]); /* Top Right */ + /* + 0.25 * art * x[grid.index(i_r-1,i_theta)]; // Top Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ + /* Fill result(i,j+1) */ + result[grid.index(i_r, i_theta + 1)] -= + (-coeff4 * att * x[grid.index(i_r, i_theta)] /* Bottom */ + + coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Center: (Bottom) */ + + 0.25 * art * x[grid.index(i_r + 1, i_theta)]); /* Bottom Right */ + /* - 0.25 * art * x[grid.index(i_r-1,i_theta)]; // Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ + } + /* ------------------------------- */ + /* Node next to the inner boundary */ + /* ------------------------------- */ + } + else if (i_r == 1) { + double h1 = grid.radialSpacing(i_r - 1); + double h2 = grid.radialSpacing(i_r); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); + double coeff1 = 0.5 * (k1 + k2) / h1; + double coeff2 = 0.5 * (k1 + k2) / h2; + double coeff3 = 0.5 * (h1 + h2) / k1; + double coeff4 = 0.5 * (h1 + h2) / k2; + /* Fill result(i,j) */ + result[grid.index(i_r, i_theta)] -= + (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF) * x[grid.index(i_r, i_theta)] /* beta_{i,j} */ + - coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Left */ + - coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Right */ + - coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Bottom */ + - coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Top */ + + ((coeff1 + coeff2) * arr + (coeff3 + coeff4) * att) * + x[grid.index(i_r, i_theta)]); /* Center: (Left, Right, Bottom, Top) */ + /* Fill result(i-1,j) */ + if (!DirBC_Interior) { /* Don't give to the inner dirichlet boundary! */ + result[grid.index(i_r - 1, i_theta)] -= + (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right */ + + coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Center: (Right) */ + - 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Right */ + + 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Right */ + } + /* Fill result(i+1,j) */ + result[grid.index(i_r + 1, i_theta)] -= + (-coeff2 * arr * x[grid.index(i_r, i_theta)] /* Left */ + + coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Center: (Left) */ + + 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Left */ + - 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Left */ + /* Fill result(i,j-1) */ + result[grid.index(i_r, i_theta - 1)] -= + (-coeff3 * att * x[grid.index(i_r, i_theta)] /* Top */ + + coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Center: (Top) */ + - 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Top Right */ + + 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Top Left */ + /* Fill result(i,j+1) */ + result[grid.index(i_r, i_theta + 1)] -= + (-coeff4 * att * x[grid.index(i_r, i_theta)] /* Bottom */ + + coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Center: (Bottom) */ + + 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Bottom Right */ + - 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Bottom Left */ + /* ------------------------------- */ + /* Node next to the outer boundary */ + /* ------------------------------- */ + } + else if (i_r == grid.nr() - 2) { + double h1 = grid.radialSpacing(i_r - 1); + double h2 = grid.radialSpacing(i_r); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); + double coeff1 = 0.5 * (k1 + k2) / h1; + double coeff2 = 0.5 * (k1 + k2) / h2; + double coeff3 = 0.5 * (h1 + h2) / k1; + double coeff4 = 0.5 * (h1 + h2) / k2; + /* Fill result(i,j) */ + result[grid.index(i_r, i_theta)] -= + (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF) * x[grid.index(i_r, i_theta)] /* beta_{i,j} */ + - coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Left */ + - coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Right */ + - coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Bottom */ + - coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Top */ + + ((coeff1 + coeff2) * arr + (coeff3 + coeff4) * att) * + x[grid.index(i_r, i_theta)]); /* Center: (Left, Right, Bottom, Top) */ + /* Fill result(i-1,j) */ + result[grid.index(i_r - 1, i_theta)] -= + (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right */ + + coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Center: (Right) */ + - 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Right */ + + 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Right */ + /* Don't give to the outer dirichlet boundary! */ + /* Fill result(i+1,j) */ + /* result[grid.index(i_r+1,i_theta)] -= ( */ + /* - coeff2 * arr * x[grid.index(i_r,i_theta)] // Left */ + /* + coeff2 * arr * x[grid.index(i_r+1,i_theta)] // Center: (Left) */ + /* + 0.25 * art * x[grid.index(i_r,i_theta+1)] // Top Left */ + /* - 0.25 * art * x[grid.index(i_r,i_theta-1)] ); // Bottom Left */ + /* Fill result(i,j-1) */ + result[grid.index(i_r, i_theta - 1)] -= + (-coeff3 * att * x[grid.index(i_r, i_theta)] /* Top */ + + coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Center: (Top) */ + - 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Top Right */ + + 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Top Left */ + /* Fill result(i,j+1) */ + result[grid.index(i_r, i_theta + 1)] -= + (-coeff4 * att * x[grid.index(i_r, i_theta)] /* Bottom */ + + coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Center: (Bottom) */ + + 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Bottom Right */ + - 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Bottom Left */ + /* ----------------------------- */ + /* Node on to the outer boundary */ + /* ----------------------------- */ + } + else if (i_r == grid.nr() - 1) { + /* Fill result of (i,j) */ + result[grid.index(i_r, i_theta)] -= x[grid.index(i_r, i_theta)]; + /* Give value to the interior nodes! */ + double h1 = grid.radialSpacing(i_r - 1); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); + double coeff1 = 0.5 * (k1 + k2) / h1; + /* Fill result(i-1,j) */ + result[grid.index(i_r - 1, i_theta)] -= + (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right */ + + coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Center: (Right) */ + - 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Right */ + + 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Right */ + } + } while (0); +} void ResidualGive::applyCircleSection(const int i_r, Vector result, ConstVector x) const { const double r = grid_.radius(i_r); for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - const int global_index = grid_.index(i_r, i_theta); - const double theta = grid_.theta(i_theta); + const double theta = grid_.theta(i_theta); - double coeff_beta, arr, att, art, detDF; - level_cache_.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); - - NODE_APPLY_A_GIVE(i_r, i_theta, r, theta, grid_, DirBC_Interior_, result, x, arr, att, art, detDF, coeff_beta); + node_apply_a_give(i_r, i_theta, r, theta, grid_, level_cache_, DirBC_Interior_, result, x); } } @@ -260,12 +259,8 @@ void ResidualGive::applyRadialSection(const int i_theta, Vector result, { const double theta = grid_.theta(i_theta); for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { - const int global_index = grid_.index(i_r, i_theta); - const double r = grid_.radius(i_r); - - double coeff_beta, arr, att, art, detDF; - level_cache_.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); + const double r = grid_.radius(i_r); - NODE_APPLY_A_GIVE(i_r, i_theta, r, theta, grid_, DirBC_Interior_, result, x, arr, att, art, detDF, coeff_beta); + node_apply_a_give(i_r, i_theta, r, theta, grid_, level_cache_, DirBC_Interior_, result, x); } } From 0426ed13d76a7fb593953a9e8f16870e995f453b Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Thu, 15 Jan 2026 16:52:14 +0100 Subject: [PATCH 05/10] Replace NODE_APPLY_RESIDUAL_TAKE macro with an inline function --- .../ResidualTake/applyResidualTake.cpp | 253 +++++++++--------- 1 file changed, 129 insertions(+), 124 deletions(-) diff --git a/src/Residual/ResidualTake/applyResidualTake.cpp b/src/Residual/ResidualTake/applyResidualTake.cpp index 258d2679..c3bfee39 100644 --- a/src/Residual/ResidualTake/applyResidualTake.cpp +++ b/src/Residual/ResidualTake/applyResidualTake.cpp @@ -1,116 +1,121 @@ #include "../../../include/Residual/ResidualTake/residualTake.h" -#define NODE_APPLY_RESIDUAL_TAKE(i_r, i_theta, grid, DirBC_Interior, result, rhs, x, arr, att, art, detDF, coeff_beta) \ - do { \ - /* -------------------- */ \ - /* Node in the interior */ \ - /* -------------------- */ \ - if (i_r > 0 && i_r < grid.nr() - 1) { \ - double h1 = grid.radialSpacing(i_r - 1); \ - double h2 = grid.radialSpacing(i_r); \ - double k1 = grid.angularSpacing(i_theta - 1); \ - double k2 = grid.angularSpacing(i_theta); \ - \ - double coeff1 = 0.5 * (k1 + k2) / h1; \ - double coeff2 = 0.5 * (k1 + k2) / h2; \ - double coeff3 = 0.5 * (h1 + h2) / k1; \ - double coeff4 = 0.5 * (h1 + h2) / k2; \ - \ - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); \ - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); \ - \ - const int bottom_left = grid.index(i_r - 1, i_theta_M1); \ - const int left = grid.index(i_r - 1, i_theta); \ - const int top_left = grid.index(i_r - 1, i_theta_P1); \ - const int bottom = grid.index(i_r, i_theta_M1); \ - const int center = grid.index(i_r, i_theta); \ - const int top = grid.index(i_r, i_theta_P1); \ - const int bottom_right = grid.index(i_r + 1, i_theta_M1); \ - const int right = grid.index(i_r + 1, i_theta); \ - const int top_right = grid.index(i_r + 1, i_theta_P1); \ - \ - result[center] = \ - rhs[center] - \ - (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) * x[center] /* beta_{i,j} */ \ - \ - - coeff1 * (arr[center] + arr[left]) * (x[left] - x[center]) /* Left - Center: (Left) */ \ - - coeff2 * (arr[center] + arr[right]) * (x[right] - x[center]) /* Right - Center: (Right) */ \ - - coeff3 * (att[center] + att[bottom]) * (x[bottom] - x[center]) /* Bottom - Center: (Bottom) */ \ - - coeff4 * (att[center] + att[top]) * (x[top] - x[center]) /* Top - Center: (Top) */ \ - \ - - 0.25 * (art[left] + art[bottom]) * x[bottom_left] /* Bottom Left */ \ - + 0.25 * (art[right] + art[bottom]) * x[bottom_right] /* Bottom Right */ \ - + 0.25 * (art[left] + art[top]) * x[top_left] /* Top Left */ \ - - 0.25 * (art[right] + art[top]) * x[top_right] /* Top Right */ \ - ); \ - /* -------------------------- */ \ - /* Node on the inner boundary */ \ - /* -------------------------- */ \ - } \ - else if (i_r == 0) { \ - /* ------------------------------------------------ */ \ - /* Case 1: Dirichlet boundary on the inner boundary */ \ - /* ------------------------------------------------ */ \ - if (DirBC_Interior) { \ - const int center = grid.index(i_r, i_theta); \ - result[center] = rhs[center] - x[center]; \ - } \ - else { \ - /* ------------------------------------------------------------- */ \ - /* Case 2: Across origin discretization on the interior boundary */ \ - /* ------------------------------------------------------------- */ \ - /* h1 gets replaced with 2 * R0. */ \ - /* (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). */ \ - /* Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. */ \ - double h1 = 2.0 * grid.radius(0); \ - double h2 = grid.radialSpacing(i_r); \ - double k1 = grid.angularSpacing(i_theta - 1); \ - double k2 = grid.angularSpacing(i_theta); \ - \ - double coeff1 = 0.5 * (k1 + k2) / h1; \ - double coeff2 = 0.5 * (k1 + k2) / h2; \ - double coeff3 = 0.5 * (h1 + h2) / k1; \ - double coeff4 = 0.5 * (h1 + h2) / k2; \ - \ - const int i_theta_M1 = grid_.wrapThetaIndex(i_theta - 1); \ - const int i_theta_P1 = grid_.wrapThetaIndex(i_theta + 1); \ - const int i_theta_Across = grid_.wrapThetaIndex(i_theta + grid_.ntheta() / 2); \ - \ - const int left = grid_.index(i_r, i_theta_Across); \ - const int bottom = grid_.index(i_r, i_theta_M1); \ - const int center = grid.index(i_r, i_theta); \ - const int top = grid_.index(i_r, i_theta_P1); \ - const int bottom_right = grid_.index(i_r + 1, i_theta_M1); \ - const int right = grid_.index(i_r + 1, i_theta); \ - const int top_right = grid_.index(i_r + 1, i_theta_P1); \ - \ - result[center] = \ - rhs[center] - \ - (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) * \ - x[center] /* beta_{i,j} */ \ - \ - - coeff1 * (arr[center] + arr[left]) * (x[left] - x[center]) /* Left - Center: (Left) */ \ - - coeff2 * (arr[center] + arr[right]) * (x[right] - x[center]) /* Right - Center: (Right) */ \ - - coeff3 * (att[center] + att[bottom]) * (x[bottom] - x[center]) /* Bottom - Center: (Bottom) */ \ - - coeff4 * (att[center] + att[top]) * (x[top] - x[center]) /* Top - Center: (Top) */ \ - \ - /* - 0.25 * (art[left] + art[bottom]) * x[bottom_left] // Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ \ - + 0.25 * (art[right] + art[bottom]) * x[bottom_right] /* Bottom Right */ \ - \ - /* + 0.25 * (art[left] + art[top]) * x[top_left] // Top Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ \ - - 0.25 * (art[right] + art[top]) * x[top_right] /* Top Right */ \ - ); \ - } \ - /* ----------------------------- */ \ - /* Node on to the outer boundary */ \ - /* ----------------------------- */ \ - } \ - else if (i_r == grid.nr() - 1) { \ - /* Fill result of (i,j) */ \ - const int center = grid.index(i_r, i_theta); \ - result[center] = rhs[center] - x[center]; \ - } \ - } while (0) +inline void node_apply_residual_take(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + Vector result, ConstVector rhs, ConstVector x, + ConstVector arr, ConstVector att, ConstVector art, + ConstVector detDF, ConstVector coeff_beta) +{ + do { + /* -------------------- */ + /* Node in the interior */ + /* -------------------- */ + if (i_r > 0 && i_r < grid.nr() - 1) { + double h1 = grid.radialSpacing(i_r - 1); + double h2 = grid.radialSpacing(i_r); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); + + double coeff1 = 0.5 * (k1 + k2) / h1; + double coeff2 = 0.5 * (k1 + k2) / h2; + double coeff3 = 0.5 * (h1 + h2) / k1; + double coeff4 = 0.5 * (h1 + h2) / k2; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + const int bottom_left = grid.index(i_r - 1, i_theta_M1); + const int left = grid.index(i_r - 1, i_theta); + const int top_left = grid.index(i_r - 1, i_theta_P1); + const int bottom = grid.index(i_r, i_theta_M1); + const int center = grid.index(i_r, i_theta); + const int top = grid.index(i_r, i_theta_P1); + const int bottom_right = grid.index(i_r + 1, i_theta_M1); + const int right = grid.index(i_r + 1, i_theta); + const int top_right = grid.index(i_r + 1, i_theta_P1); + + result[center] = + rhs[center] - + (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) * x[center] /* beta_{i,j} */ + + - coeff1 * (arr[center] + arr[left]) * (x[left] - x[center]) /* Left - Center: (Left) */ + - coeff2 * (arr[center] + arr[right]) * (x[right] - x[center]) /* Right - Center: (Right) */ + - coeff3 * (att[center] + att[bottom]) * (x[bottom] - x[center]) /* Bottom - Center: (Bottom) */ + - coeff4 * (att[center] + att[top]) * (x[top] - x[center]) /* Top - Center: (Top) */ + + - 0.25 * (art[left] + art[bottom]) * x[bottom_left] /* Bottom Left */ + + 0.25 * (art[right] + art[bottom]) * x[bottom_right] /* Bottom Right */ + + 0.25 * (art[left] + art[top]) * x[top_left] /* Top Left */ + - 0.25 * (art[right] + art[top]) * x[top_right] /* Top Right */ + ); + /* -------------------------- */ + /* Node on the inner boundary */ + /* -------------------------- */ + } + else if (i_r == 0) { + /* ------------------------------------------------ */ + /* Case 1: Dirichlet boundary on the inner boundary */ + /* ------------------------------------------------ */ + if (DirBC_Interior) { + const int center = grid.index(i_r, i_theta); + result[center] = rhs[center] - x[center]; + } + else { + /* ------------------------------------------------------------- */ + /* Case 2: Across origin discretization on the interior boundary */ + /* ------------------------------------------------------------- */ + /* h1 gets replaced with 2 * R0. */ + /* (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). */ + /* Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. */ + double h1 = 2.0 * grid.radius(0); + double h2 = grid.radialSpacing(i_r); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); + + double coeff1 = 0.5 * (k1 + k2) / h1; + double coeff2 = 0.5 * (k1 + k2) / h2; + double coeff3 = 0.5 * (h1 + h2) / k1; + double coeff4 = 0.5 * (h1 + h2) / k2; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_Across = grid.wrapThetaIndex(i_theta + grid.ntheta() / 2); + + const int left = grid.index(i_r, i_theta_Across); + const int bottom = grid.index(i_r, i_theta_M1); + const int center = grid.index(i_r, i_theta); + const int top = grid.index(i_r, i_theta_P1); + const int bottom_right = grid.index(i_r + 1, i_theta_M1); + const int right = grid.index(i_r + 1, i_theta); + const int top_right = grid.index(i_r + 1, i_theta_P1); + + result[center] = + rhs[center] - + (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) * + x[center] /* beta_{i,j} */ + + - coeff1 * (arr[center] + arr[left]) * (x[left] - x[center]) /* Left - Center: (Left) */ + - coeff2 * (arr[center] + arr[right]) * (x[right] - x[center]) /* Right - Center: (Right) */ + - coeff3 * (att[center] + att[bottom]) * (x[bottom] - x[center]) /* Bottom - Center: (Bottom) */ + - coeff4 * (att[center] + att[top]) * (x[top] - x[center]) /* Top - Center: (Top) */ + + /* - 0.25 * (art[left] + art[bottom]) * x[bottom_left] // Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ + + 0.25 * (art[right] + art[bottom]) * x[bottom_right] /* Bottom Right */ + + /* + 0.25 * (art[left] + art[top]) * x[top_left] // Top Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ + - 0.25 * (art[right] + art[top]) * x[top_right] /* Top Right */ + ); + } + /* ----------------------------- */ + /* Node on to the outer boundary */ + /* ----------------------------- */ + } + else if (i_r == grid.nr() - 1) { + /* Fill result of (i,j) */ + const int center = grid.index(i_r, i_theta); + result[center] = rhs[center] - x[center]; + } + } while (0); +} void ResidualTake::applyCircleSection(const int i_r, Vector result, ConstVector rhs, ConstVector x) const @@ -118,14 +123,14 @@ void ResidualTake::applyCircleSection(const int i_r, Vector result, Cons assert(level_cache_.cacheDensityProfileCoefficients()); assert(level_cache_.cacheDomainGeometry()); - const auto& arr = level_cache_.arr(); - const auto& att = level_cache_.att(); - const auto& art = level_cache_.art(); - const auto& detDF = level_cache_.detDF(); - const auto& coeff_beta = level_cache_.coeff_beta(); + ConstVector arr = level_cache_.arr(); + ConstVector att = level_cache_.att(); + ConstVector art = level_cache_.art(); + ConstVector detDF = level_cache_.detDF(); + ConstVector coeff_beta = level_cache_.coeff_beta(); for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - NODE_APPLY_RESIDUAL_TAKE(i_r, i_theta, grid_, DirBC_Interior_, result, rhs, x, arr, att, art, detDF, + node_apply_residual_take(i_r, i_theta, grid_, DirBC_Interior_, result, rhs, x, arr, att, art, detDF, coeff_beta); } } @@ -136,14 +141,14 @@ void ResidualTake::applyRadialSection(const int i_theta, Vector result, assert(level_cache_.cacheDensityProfileCoefficients()); assert(level_cache_.cacheDomainGeometry()); - const auto& arr = level_cache_.arr(); - const auto& att = level_cache_.att(); - const auto& art = level_cache_.art(); - const auto& detDF = level_cache_.detDF(); - const auto& coeff_beta = level_cache_.coeff_beta(); + ConstVector arr = level_cache_.arr(); + ConstVector att = level_cache_.att(); + ConstVector art = level_cache_.art(); + ConstVector detDF = level_cache_.detDF(); + ConstVector coeff_beta = level_cache_.coeff_beta(); for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { - NODE_APPLY_RESIDUAL_TAKE(i_r, i_theta, grid_, DirBC_Interior_, result, rhs, x, arr, att, art, detDF, + node_apply_residual_take(i_r, i_theta, grid_, DirBC_Interior_, result, rhs, x, arr, att, art, detDF, coeff_beta); } -} \ No newline at end of file +} From 28787c68c446ba459d090d321222749d8c32b406 Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Mon, 26 Jan 2026 08:50:49 +0100 Subject: [PATCH 06/10] Use references for inline functions for performance --- src/Residual/ResidualGive/applyAGive.cpp | 2 +- src/Residual/ResidualTake/applyResidualTake.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Residual/ResidualGive/applyAGive.cpp b/src/Residual/ResidualGive/applyAGive.cpp index 4fbf19b7..ac3aebd4 100644 --- a/src/Residual/ResidualGive/applyAGive.cpp +++ b/src/Residual/ResidualGive/applyAGive.cpp @@ -3,7 +3,7 @@ #include "../../../include/common/geometry_helper.h" inline void node_apply_a_give(int i_r, int i_theta, double r, double theta, const PolarGrid& grid, const LevelCache& level_cache, bool DirBC_Interior, - Vector result, ConstVector x) + Vector& result, ConstVector& x) { const int global_index = grid.index(i_r, i_theta); double coeff_beta, arr, att, art, detDF; diff --git a/src/Residual/ResidualTake/applyResidualTake.cpp b/src/Residual/ResidualTake/applyResidualTake.cpp index c3bfee39..4fb4afe2 100644 --- a/src/Residual/ResidualTake/applyResidualTake.cpp +++ b/src/Residual/ResidualTake/applyResidualTake.cpp @@ -1,9 +1,9 @@ #include "../../../include/Residual/ResidualTake/residualTake.h" inline void node_apply_residual_take(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - Vector result, ConstVector rhs, ConstVector x, - ConstVector arr, ConstVector att, ConstVector art, - ConstVector detDF, ConstVector coeff_beta) + Vector& result, ConstVector& rhs, ConstVector& x, + ConstVector& arr, ConstVector& att, ConstVector& art, + ConstVector& detDF, ConstVector& coeff_beta) { do { /* -------------------- */ From 06d53ab179cd9d874a82dfcaade63c1b158e121a Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Mon, 26 Jan 2026 09:50:12 +0100 Subject: [PATCH 07/10] Clang format --- include/common/global_definitions.h | 73 ++++++++++++------------ src/Residual/ResidualGive/applyAGive.cpp | 5 +- 2 files changed, 41 insertions(+), 37 deletions(-) diff --git a/include/common/global_definitions.h b/include/common/global_definitions.h index c5730237..2f4f1bef 100644 --- a/include/common/global_definitions.h +++ b/include/common/global_definitions.h @@ -83,41 +83,44 @@ enum class BetaCoeff /* Mumps - Constant Definitions */ /* ---------------------------- */ #ifdef GMGPOLAR_USE_MUMPS -#include "dmumps_c.h" - /* Mumps inline functions s.t. indices match documentation */ - inline int& ICNTL(DMUMPS_STRUC_C& mumps_solver, int I) { - return mumps_solver.icntl[(I) - 1]; - } - inline double& CNTL(DMUMPS_STRUC_C& mumps_solver, int I) { - return mumps_solver.cntl[(I) - 1]; - } - inline int& INFOG(DMUMPS_STRUC_C& mumps_solver, int I) { - return mumps_solver.infog[(I) - 1]; - } - - constexpr int USE_COMM_WORLD = -987654; - constexpr int PAR_NOT_PARALLEL = 0; - constexpr int PAR_PARALLEL = 1; - - constexpr int JOB_INIT = -1; - constexpr int JOB_END = -2; - constexpr int JOB_REMOVE_SAVED_DATA = -3; - constexpr int JOB_FREE_INTERNAL_DATA = -4; - constexpr int JOB_SUPPRESS_OOC_FILES = -200; - - constexpr int JOB_ANALYSIS_PHASE = 1; - constexpr int JOB_FACTORIZATION_PHASE = 2; - constexpr int JOB_COMPUTE_SOLUTION = 3; - constexpr int JOB_ANALYSIS_AND_FACTORIZATION = 4; - constexpr int JOB_FACTORIZATION_AND_SOLUTION = 5; - constexpr int JOB_ANALYSIS_FACTORIZATION_SOLUTION = 6; - constexpr int JOB_SAVE_INTERNAL_DATA = 7; - constexpr int JOB_RESTORE_INTERNAL_DATA = 8; - constexpr int JOB_DISTRIBUTE_RHS = 9; - - constexpr int SYM_UNSYMMETRIC = 0; - constexpr int SYM_POSITIVE_DEFINITE = 1; - constexpr int SYM_GENERAL_SYMMETRIC = 2; + #include "dmumps_c.h" +/* Mumps inline functions s.t. indices match documentation */ +inline int& ICNTL(DMUMPS_STRUC_C& mumps_solver, int I) +{ + return mumps_solver.icntl[(I)-1]; +} +inline double& CNTL(DMUMPS_STRUC_C& mumps_solver, int I) +{ + return mumps_solver.cntl[(I)-1]; +} +inline int& INFOG(DMUMPS_STRUC_C& mumps_solver, int I) +{ + return mumps_solver.infog[(I)-1]; +} + +constexpr int USE_COMM_WORLD = -987654; +constexpr int PAR_NOT_PARALLEL = 0; +constexpr int PAR_PARALLEL = 1; + +constexpr int JOB_INIT = -1; +constexpr int JOB_END = -2; +constexpr int JOB_REMOVE_SAVED_DATA = -3; +constexpr int JOB_FREE_INTERNAL_DATA = -4; +constexpr int JOB_SUPPRESS_OOC_FILES = -200; + +constexpr int JOB_ANALYSIS_PHASE = 1; +constexpr int JOB_FACTORIZATION_PHASE = 2; +constexpr int JOB_COMPUTE_SOLUTION = 3; +constexpr int JOB_ANALYSIS_AND_FACTORIZATION = 4; +constexpr int JOB_FACTORIZATION_AND_SOLUTION = 5; +constexpr int JOB_ANALYSIS_FACTORIZATION_SOLUTION = 6; +constexpr int JOB_SAVE_INTERNAL_DATA = 7; +constexpr int JOB_RESTORE_INTERNAL_DATA = 8; +constexpr int JOB_DISTRIBUTE_RHS = 9; + +constexpr int SYM_UNSYMMETRIC = 0; +constexpr int SYM_POSITIVE_DEFINITE = 1; +constexpr int SYM_GENERAL_SYMMETRIC = 2; #endif // --------------------------------------- // diff --git a/src/Residual/ResidualGive/applyAGive.cpp b/src/Residual/ResidualGive/applyAGive.cpp index ac3aebd4..b072b008 100644 --- a/src/Residual/ResidualGive/applyAGive.cpp +++ b/src/Residual/ResidualGive/applyAGive.cpp @@ -2,8 +2,9 @@ #include "../../../include/common/geometry_helper.h" -inline void node_apply_a_give(int i_r, int i_theta, double r, double theta, const PolarGrid& grid, const LevelCache& level_cache, bool DirBC_Interior, - Vector& result, ConstVector& x) +inline void node_apply_a_give(int i_r, int i_theta, double r, double theta, const PolarGrid& grid, + const LevelCache& level_cache, bool DirBC_Interior, Vector& result, + ConstVector& x) { const int global_index = grid.index(i_r, i_theta); double coeff_beta, arr, att, art, detDF; From 00b17ba4a627864b57498728625abd4ec7e6de7e Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Mon, 26 Jan 2026 15:47:35 +0100 Subject: [PATCH 08/10] Add GNU compiler flag to encourage inlining --- CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index f4200980..2b00a970 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,9 @@ endif() # Set compiler flags set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -Wextra -pedantic -Wno-unused -Wno-psabi -Wfloat-conversion") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2 -mtune=generic -Wno-psabi") +if (CMAKE_COMPILER_IS_GNUCXX) + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} --param max-inline-insns-single=1500") +endif() # Set coverage compiler flags - must come before any targets are defined if(GMGPOLAR_ENABLE_COVERAGE) From 77ae2be1a5c505c67a418b27b118aa432bdc2bd5 Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Mon, 26 Jan 2026 15:51:29 +0100 Subject: [PATCH 09/10] Remove unnecessary do{}while(0); loops --- src/Residual/ResidualGive/applyAGive.cpp | 360 +++++++++--------- .../ResidualTake/applyResidualTake.cpp | 157 ++++---- 2 files changed, 250 insertions(+), 267 deletions(-) diff --git a/src/Residual/ResidualGive/applyAGive.cpp b/src/Residual/ResidualGive/applyAGive.cpp index b072b008..3e2ed8e2 100644 --- a/src/Residual/ResidualGive/applyAGive.cpp +++ b/src/Residual/ResidualGive/applyAGive.cpp @@ -9,132 +9,77 @@ inline void node_apply_a_give(int i_r, int i_theta, double r, double theta, cons const int global_index = grid.index(i_r, i_theta); double coeff_beta, arr, att, art, detDF; level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); - do { - /* -------------------- */ - /* Node in the interior */ - /* -------------------- */ - if (i_r > 1 && i_r < grid.nr() - 2) { - double h1 = grid.radialSpacing(i_r - 1); + /* -------------------- */ + /* Node in the interior */ + /* -------------------- */ + if (i_r > 1 && i_r < grid.nr() - 2) { + double h1 = grid.radialSpacing(i_r - 1); + double h2 = grid.radialSpacing(i_r); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); + double coeff1 = 0.5 * (k1 + k2) / h1; + double coeff2 = 0.5 * (k1 + k2) / h2; + double coeff3 = 0.5 * (h1 + h2) / k1; + double coeff4 = 0.5 * (h1 + h2) / k2; + /* Fill result(i,j) */ + result[grid.index(i_r, i_theta)] -= + (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF) * x[grid.index(i_r, i_theta)] /* beta_{i,j} */ + - coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Left */ + - coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Right */ + - coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Bottom */ + - coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Top */ /* Center: (Left, Right, Bottom, Top) */ + + ((coeff1 + coeff2) * arr + (coeff3 + coeff4) * att) * x[grid.index(i_r, i_theta)]); + /* Fill result(i-1,j) */ + result[grid.index(i_r - 1, i_theta)] -= (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right */ + + coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Center: (Right) */ + - 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Right */ + + 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Right */ + /* Fill result(i+1,j) */ + result[grid.index(i_r + 1, i_theta)] -= (-coeff2 * arr * x[grid.index(i_r, i_theta)] /* Left */ + + coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Center: (Left) */ + + 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Left */ + - 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Left */ + /* Fill result(i,j-1) */ + result[grid.index(i_r, i_theta - 1)] -= (-coeff3 * att * x[grid.index(i_r, i_theta)] /* Top */ + + coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Center: (Top) */ + - 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Top Right */ + + 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Top Left */ + /* Fill result(i,j+1) */ + result[grid.index(i_r, i_theta + 1)] -= (-coeff4 * att * x[grid.index(i_r, i_theta)] /* Bottom */ + + coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Center: (Bottom) */ + + 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Bottom Right */ + - 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Bottom Left */ + /* -------------------------- */ + /* Node on the inner boundary */ + /* -------------------------- */ + } + else if (i_r == 0) { + /* ------------------------------------------------ */ + /* Case 1: Dirichlet boundary on the inner boundary */ + /* ------------------------------------------------ */ + if (DirBC_Interior) { + /* Fill result(i,j) */ + result[grid.index(i_r, i_theta)] -= x[grid.index(i_r, i_theta)]; + /* Give value to the interior nodes! */ double h2 = grid.radialSpacing(i_r); double k1 = grid.angularSpacing(i_theta - 1); double k2 = grid.angularSpacing(i_theta); - double coeff1 = 0.5 * (k1 + k2) / h1; double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; - /* Fill result(i,j) */ - result[grid.index(i_r, i_theta)] -= - (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF) * x[grid.index(i_r, i_theta)] /* beta_{i,j} */ - - coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Left */ - - coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Right */ - - coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Bottom */ - - coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Top */ /* Center: (Left, Right, Bottom, Top) */ - + ((coeff1 + coeff2) * arr + (coeff3 + coeff4) * att) * x[grid.index(i_r, i_theta)]); - /* Fill result(i-1,j) */ - result[grid.index(i_r - 1, i_theta)] -= - (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right */ - + coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Center: (Right) */ - - 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Right */ - + 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Right */ /* Fill result(i+1,j) */ result[grid.index(i_r + 1, i_theta)] -= (-coeff2 * arr * x[grid.index(i_r, i_theta)] /* Left */ + coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Center: (Left) */ + 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Left */ - 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Left */ - /* Fill result(i,j-1) */ - result[grid.index(i_r, i_theta - 1)] -= - (-coeff3 * att * x[grid.index(i_r, i_theta)] /* Top */ - + coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Center: (Top) */ - - 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Top Right */ - + 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Top Left */ - /* Fill result(i,j+1) */ - result[grid.index(i_r, i_theta + 1)] -= - (-coeff4 * att * x[grid.index(i_r, i_theta)] /* Bottom */ - + coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Center: (Bottom) */ - + 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Bottom Right */ - - 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Bottom Left */ - /* -------------------------- */ - /* Node on the inner boundary */ - /* -------------------------- */ } - else if (i_r == 0) { - /* ------------------------------------------------ */ - /* Case 1: Dirichlet boundary on the inner boundary */ - /* ------------------------------------------------ */ - if (DirBC_Interior) { - /* Fill result(i,j) */ - result[grid.index(i_r, i_theta)] -= x[grid.index(i_r, i_theta)]; - /* Give value to the interior nodes! */ - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - double coeff2 = 0.5 * (k1 + k2) / h2; - /* Fill result(i+1,j) */ - result[grid.index(i_r + 1, i_theta)] -= - (-coeff2 * arr * x[grid.index(i_r, i_theta)] /* Left */ - + coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Center: (Left) */ - + 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Left */ - - 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Left */ - } - else { - /* ------------------------------------------------------------- */ - /* Case 2: Across origin discretization on the interior boundary */ - /* ------------------------------------------------------------- */ - /* h1 gets replaced with 2 * R0. */ - /* (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). */ - /* Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. */ - double h1 = 2.0 * grid.radius(0); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; - /* Fill result(i,j) */ - result[grid.index(i_r, i_theta)] -= - (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF) * - x[grid.index(i_r, i_theta)] /* beta_{i,j} */ - - coeff1 * arr * x[grid.index(i_r, i_theta + (grid.ntheta() / 2))] /* Left */ - - coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Right */ - - coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Bottom */ - - coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Top */ - + ((coeff1 + coeff2) * arr + (coeff3 + coeff4) * att) * - x[grid.index(i_r, i_theta)]); /* Center: (Left, Right, Bottom, Top) */ - /* Fill result(i-1,j) */ - /* From view the view of the across origin node, the directions are roatated by 180 degrees in the stencil! */ - result[grid.index(i_r, i_theta + (grid.ntheta() / 2))] -= - (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right -> Left */ - + coeff1 * arr * - x[grid.index(i_r, i_theta + (grid.ntheta() / 2))]); /* Center: (Right) -> Center: (Left)*/ - /* + 0.25 * art * x[grid.index(i_r,i_theta+1)]; // Top Right -> Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ - /* - 0.25 * art * x[grid.index(i_r,i_theta-1)]; // Bottom Right -> Top Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ - /* Fill result(i+1,j) */ - result[grid.index(i_r + 1, i_theta)] -= - (-coeff2 * arr * x[grid.index(i_r, i_theta)] /* Left */ - + coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Center: (Left) */ - + 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Left */ - - 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Left */ - /* Fill result(i,j-1) */ - result[grid.index(i_r, i_theta - 1)] -= - (-coeff3 * att * x[grid.index(i_r, i_theta)] /* Top */ - + coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Center: (Top) */ - - 0.25 * art * x[grid.index(i_r + 1, i_theta)]); /* Top Right */ - /* + 0.25 * art * x[grid.index(i_r-1,i_theta)]; // Top Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ - /* Fill result(i,j+1) */ - result[grid.index(i_r, i_theta + 1)] -= - (-coeff4 * att * x[grid.index(i_r, i_theta)] /* Bottom */ - + coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Center: (Bottom) */ - + 0.25 * art * x[grid.index(i_r + 1, i_theta)]); /* Bottom Right */ - /* - 0.25 * art * x[grid.index(i_r-1,i_theta)]; // Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ - } - /* ------------------------------- */ - /* Node next to the inner boundary */ - /* ------------------------------- */ - } - else if (i_r == 1) { - double h1 = grid.radialSpacing(i_r - 1); + else { + /* ------------------------------------------------------------- */ + /* Case 2: Across origin discretization on the interior boundary */ + /* ------------------------------------------------------------- */ + /* h1 gets replaced with 2 * R0. */ + /* (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). */ + /* Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. */ + double h1 = 2.0 * grid.radius(0); double h2 = grid.radialSpacing(i_r); double k1 = grid.angularSpacing(i_theta - 1); double k2 = grid.angularSpacing(i_theta); @@ -145,20 +90,20 @@ inline void node_apply_a_give(int i_r, int i_theta, double r, double theta, cons /* Fill result(i,j) */ result[grid.index(i_r, i_theta)] -= (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF) * x[grid.index(i_r, i_theta)] /* beta_{i,j} */ - - coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Left */ + - coeff1 * arr * x[grid.index(i_r, i_theta + (grid.ntheta() / 2))] /* Left */ - coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Right */ - coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Bottom */ - coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Top */ + ((coeff1 + coeff2) * arr + (coeff3 + coeff4) * att) * x[grid.index(i_r, i_theta)]); /* Center: (Left, Right, Bottom, Top) */ /* Fill result(i-1,j) */ - if (!DirBC_Interior) { /* Don't give to the inner dirichlet boundary! */ - result[grid.index(i_r - 1, i_theta)] -= - (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right */ - + coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Center: (Right) */ - - 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Right */ - + 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Right */ - } + /* From view the view of the across origin node, the directions are roatated by 180 degrees in the stencil! */ + result[grid.index(i_r, i_theta + (grid.ntheta() / 2))] -= + (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right -> Left */ + + coeff1 * arr * + x[grid.index(i_r, i_theta + (grid.ntheta() / 2))]); /* Center: (Right) -> Center: (Left)*/ + /* + 0.25 * art * x[grid.index(i_r,i_theta+1)]; // Top Right -> Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ + /* - 0.25 * art * x[grid.index(i_r,i_theta-1)]; // Bottom Right -> Top Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ /* Fill result(i+1,j) */ result[grid.index(i_r + 1, i_theta)] -= (-coeff2 * arr * x[grid.index(i_r, i_theta)] /* Left */ @@ -169,81 +114,122 @@ inline void node_apply_a_give(int i_r, int i_theta, double r, double theta, cons result[grid.index(i_r, i_theta - 1)] -= (-coeff3 * att * x[grid.index(i_r, i_theta)] /* Top */ + coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Center: (Top) */ - - 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Top Right */ - + 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Top Left */ - /* Fill result(i,j+1) */ - result[grid.index(i_r, i_theta + 1)] -= - (-coeff4 * att * x[grid.index(i_r, i_theta)] /* Bottom */ - + coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Center: (Bottom) */ - + 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Bottom Right */ - - 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Bottom Left */ - /* ------------------------------- */ - /* Node next to the outer boundary */ - /* ------------------------------- */ - } - else if (i_r == grid.nr() - 2) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; - /* Fill result(i,j) */ - result[grid.index(i_r, i_theta)] -= - (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF) * x[grid.index(i_r, i_theta)] /* beta_{i,j} */ - - coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Left */ - - coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Right */ - - coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Bottom */ - - coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Top */ - + ((coeff1 + coeff2) * arr + (coeff3 + coeff4) * att) * - x[grid.index(i_r, i_theta)]); /* Center: (Left, Right, Bottom, Top) */ - /* Fill result(i-1,j) */ - result[grid.index(i_r - 1, i_theta)] -= - (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right */ - + coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Center: (Right) */ - - 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Right */ - + 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Right */ - /* Don't give to the outer dirichlet boundary! */ - /* Fill result(i+1,j) */ - /* result[grid.index(i_r+1,i_theta)] -= ( */ - /* - coeff2 * arr * x[grid.index(i_r,i_theta)] // Left */ - /* + coeff2 * arr * x[grid.index(i_r+1,i_theta)] // Center: (Left) */ - /* + 0.25 * art * x[grid.index(i_r,i_theta+1)] // Top Left */ - /* - 0.25 * art * x[grid.index(i_r,i_theta-1)] ); // Bottom Left */ - /* Fill result(i,j-1) */ - result[grid.index(i_r, i_theta - 1)] -= - (-coeff3 * att * x[grid.index(i_r, i_theta)] /* Top */ - + coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Center: (Top) */ - - 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Top Right */ - + 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Top Left */ + - 0.25 * art * x[grid.index(i_r + 1, i_theta)]); /* Top Right */ + /* + 0.25 * art * x[grid.index(i_r-1,i_theta)]; // Top Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ /* Fill result(i,j+1) */ result[grid.index(i_r, i_theta + 1)] -= (-coeff4 * att * x[grid.index(i_r, i_theta)] /* Bottom */ + coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Center: (Bottom) */ - + 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Bottom Right */ - - 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Bottom Left */ - /* ----------------------------- */ - /* Node on to the outer boundary */ - /* ----------------------------- */ + + 0.25 * art * x[grid.index(i_r + 1, i_theta)]); /* Bottom Right */ + /* - 0.25 * art * x[grid.index(i_r-1,i_theta)]; // Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ } - else if (i_r == grid.nr() - 1) { - /* Fill result of (i,j) */ - result[grid.index(i_r, i_theta)] -= x[grid.index(i_r, i_theta)]; - /* Give value to the interior nodes! */ - double h1 = grid.radialSpacing(i_r - 1); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - double coeff1 = 0.5 * (k1 + k2) / h1; - /* Fill result(i-1,j) */ + /* ------------------------------- */ + /* Node next to the inner boundary */ + /* ------------------------------- */ + } + else if (i_r == 1) { + double h1 = grid.radialSpacing(i_r - 1); + double h2 = grid.radialSpacing(i_r); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); + double coeff1 = 0.5 * (k1 + k2) / h1; + double coeff2 = 0.5 * (k1 + k2) / h2; + double coeff3 = 0.5 * (h1 + h2) / k1; + double coeff4 = 0.5 * (h1 + h2) / k2; + /* Fill result(i,j) */ + result[grid.index(i_r, i_theta)] -= + (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF) * x[grid.index(i_r, i_theta)] /* beta_{i,j} */ + - coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Left */ + - coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Right */ + - coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Bottom */ + - coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Top */ + + ((coeff1 + coeff2) * arr + (coeff3 + coeff4) * att) * + x[grid.index(i_r, i_theta)]); /* Center: (Left, Right, Bottom, Top) */ + /* Fill result(i-1,j) */ + if (!DirBC_Interior) { /* Don't give to the inner dirichlet boundary! */ result[grid.index(i_r - 1, i_theta)] -= (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right */ + coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Center: (Right) */ - 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Right */ + 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Right */ } - } while (0); + /* Fill result(i+1,j) */ + result[grid.index(i_r + 1, i_theta)] -= (-coeff2 * arr * x[grid.index(i_r, i_theta)] /* Left */ + + coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Center: (Left) */ + + 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Left */ + - 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Left */ + /* Fill result(i,j-1) */ + result[grid.index(i_r, i_theta - 1)] -= (-coeff3 * att * x[grid.index(i_r, i_theta)] /* Top */ + + coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Center: (Top) */ + - 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Top Right */ + + 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Top Left */ + /* Fill result(i,j+1) */ + result[grid.index(i_r, i_theta + 1)] -= (-coeff4 * att * x[grid.index(i_r, i_theta)] /* Bottom */ + + coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Center: (Bottom) */ + + 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Bottom Right */ + - 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Bottom Left */ + /* ------------------------------- */ + /* Node next to the outer boundary */ + /* ------------------------------- */ + } + else if (i_r == grid.nr() - 2) { + double h1 = grid.radialSpacing(i_r - 1); + double h2 = grid.radialSpacing(i_r); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); + double coeff1 = 0.5 * (k1 + k2) / h1; + double coeff2 = 0.5 * (k1 + k2) / h2; + double coeff3 = 0.5 * (h1 + h2) / k1; + double coeff4 = 0.5 * (h1 + h2) / k2; + /* Fill result(i,j) */ + result[grid.index(i_r, i_theta)] -= + (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF) * x[grid.index(i_r, i_theta)] /* beta_{i,j} */ + - coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Left */ + - coeff2 * arr * x[grid.index(i_r + 1, i_theta)] /* Right */ + - coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Bottom */ + - coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Top */ + + ((coeff1 + coeff2) * arr + (coeff3 + coeff4) * att) * + x[grid.index(i_r, i_theta)]); /* Center: (Left, Right, Bottom, Top) */ + /* Fill result(i-1,j) */ + result[grid.index(i_r - 1, i_theta)] -= (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right */ + + coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Center: (Right) */ + - 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Right */ + + 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Right */ + /* Don't give to the outer dirichlet boundary! */ + /* Fill result(i+1,j) */ + /* result[grid.index(i_r+1,i_theta)] -= ( */ + /* - coeff2 * arr * x[grid.index(i_r,i_theta)] // Left */ + /* + coeff2 * arr * x[grid.index(i_r+1,i_theta)] // Center: (Left) */ + /* + 0.25 * art * x[grid.index(i_r,i_theta+1)] // Top Left */ + /* - 0.25 * art * x[grid.index(i_r,i_theta-1)] ); // Bottom Left */ + /* Fill result(i,j-1) */ + result[grid.index(i_r, i_theta - 1)] -= (-coeff3 * att * x[grid.index(i_r, i_theta)] /* Top */ + + coeff3 * att * x[grid.index(i_r, i_theta - 1)] /* Center: (Top) */ + - 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Top Right */ + + 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Top Left */ + /* Fill result(i,j+1) */ + result[grid.index(i_r, i_theta + 1)] -= (-coeff4 * att * x[grid.index(i_r, i_theta)] /* Bottom */ + + coeff4 * att * x[grid.index(i_r, i_theta + 1)] /* Center: (Bottom) */ + + 0.25 * art * x[grid.index(i_r + 1, i_theta)] /* Bottom Right */ + - 0.25 * art * x[grid.index(i_r - 1, i_theta)]); /* Bottom Left */ + /* ----------------------------- */ + /* Node on to the outer boundary */ + /* ----------------------------- */ + } + else if (i_r == grid.nr() - 1) { + /* Fill result of (i,j) */ + result[grid.index(i_r, i_theta)] -= x[grid.index(i_r, i_theta)]; + /* Give value to the interior nodes! */ + double h1 = grid.radialSpacing(i_r - 1); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); + double coeff1 = 0.5 * (k1 + k2) / h1; + /* Fill result(i-1,j) */ + result[grid.index(i_r - 1, i_theta)] -= (-coeff1 * arr * x[grid.index(i_r, i_theta)] /* Right */ + + coeff1 * arr * x[grid.index(i_r - 1, i_theta)] /* Center: (Right) */ + - 0.25 * art * x[grid.index(i_r, i_theta + 1)] /* Top Right */ + + 0.25 * art * x[grid.index(i_r, i_theta - 1)]); /* Bottom Right */ + } } void ResidualGive::applyCircleSection(const int i_r, Vector result, ConstVector x) const diff --git a/src/Residual/ResidualTake/applyResidualTake.cpp b/src/Residual/ResidualTake/applyResidualTake.cpp index 4fb4afe2..b4443dad 100644 --- a/src/Residual/ResidualTake/applyResidualTake.cpp +++ b/src/Residual/ResidualTake/applyResidualTake.cpp @@ -5,12 +5,67 @@ inline void node_apply_residual_take(int i_r, int i_theta, const PolarGrid& grid ConstVector& arr, ConstVector& att, ConstVector& art, ConstVector& detDF, ConstVector& coeff_beta) { - do { - /* -------------------- */ - /* Node in the interior */ - /* -------------------- */ - if (i_r > 0 && i_r < grid.nr() - 1) { - double h1 = grid.radialSpacing(i_r - 1); + /* -------------------- */ + /* Node in the interior */ + /* -------------------- */ + if (i_r > 0 && i_r < grid.nr() - 1) { + double h1 = grid.radialSpacing(i_r - 1); + double h2 = grid.radialSpacing(i_r); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); + + double coeff1 = 0.5 * (k1 + k2) / h1; + double coeff2 = 0.5 * (k1 + k2) / h2; + double coeff3 = 0.5 * (h1 + h2) / k1; + double coeff4 = 0.5 * (h1 + h2) / k2; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + const int bottom_left = grid.index(i_r - 1, i_theta_M1); + const int left = grid.index(i_r - 1, i_theta); + const int top_left = grid.index(i_r - 1, i_theta_P1); + const int bottom = grid.index(i_r, i_theta_M1); + const int center = grid.index(i_r, i_theta); + const int top = grid.index(i_r, i_theta_P1); + const int bottom_right = grid.index(i_r + 1, i_theta_M1); + const int right = grid.index(i_r + 1, i_theta); + const int top_right = grid.index(i_r + 1, i_theta_P1); + + result[center] = + rhs[center] - + (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) * x[center] /* beta_{i,j} */ + + - coeff1 * (arr[center] + arr[left]) * (x[left] - x[center]) /* Left - Center: (Left) */ + - coeff2 * (arr[center] + arr[right]) * (x[right] - x[center]) /* Right - Center: (Right) */ + - coeff3 * (att[center] + att[bottom]) * (x[bottom] - x[center]) /* Bottom - Center: (Bottom) */ + - coeff4 * (att[center] + att[top]) * (x[top] - x[center]) /* Top - Center: (Top) */ + + - 0.25 * (art[left] + art[bottom]) * x[bottom_left] /* Bottom Left */ + + 0.25 * (art[right] + art[bottom]) * x[bottom_right] /* Bottom Right */ + + 0.25 * (art[left] + art[top]) * x[top_left] /* Top Left */ + - 0.25 * (art[right] + art[top]) * x[top_right] /* Top Right */ + ); + /* -------------------------- */ + /* Node on the inner boundary */ + /* -------------------------- */ + } + else if (i_r == 0) { + /* ------------------------------------------------ */ + /* Case 1: Dirichlet boundary on the inner boundary */ + /* ------------------------------------------------ */ + if (DirBC_Interior) { + const int center = grid.index(i_r, i_theta); + result[center] = rhs[center] - x[center]; + } + else { + /* ------------------------------------------------------------- */ + /* Case 2: Across origin discretization on the interior boundary */ + /* ------------------------------------------------------------- */ + /* h1 gets replaced with 2 * R0. */ + /* (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). */ + /* Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. */ + double h1 = 2.0 * grid.radius(0); double h2 = grid.radialSpacing(i_r); double k1 = grid.angularSpacing(i_theta - 1); double k2 = grid.angularSpacing(i_theta); @@ -20,12 +75,11 @@ inline void node_apply_residual_take(int i_r, int i_theta, const PolarGrid& grid double coeff3 = 0.5 * (h1 + h2) / k1; double coeff4 = 0.5 * (h1 + h2) / k2; - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_Across = grid.wrapThetaIndex(i_theta + grid.ntheta() / 2); - const int bottom_left = grid.index(i_r - 1, i_theta_M1); - const int left = grid.index(i_r - 1, i_theta); - const int top_left = grid.index(i_r - 1, i_theta_P1); + const int left = grid.index(i_r, i_theta_Across); const int bottom = grid.index(i_r, i_theta_M1); const int center = grid.index(i_r, i_theta); const int top = grid.index(i_r, i_theta_P1); @@ -42,79 +96,22 @@ inline void node_apply_residual_take(int i_r, int i_theta, const PolarGrid& grid - coeff3 * (att[center] + att[bottom]) * (x[bottom] - x[center]) /* Bottom - Center: (Bottom) */ - coeff4 * (att[center] + att[top]) * (x[top] - x[center]) /* Top - Center: (Top) */ - - 0.25 * (art[left] + art[bottom]) * x[bottom_left] /* Bottom Left */ + /* - 0.25 * (art[left] + art[bottom]) * x[bottom_left] // Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ + 0.25 * (art[right] + art[bottom]) * x[bottom_right] /* Bottom Right */ - + 0.25 * (art[left] + art[top]) * x[top_left] /* Top Left */ + + /* + 0.25 * (art[left] + art[top]) * x[top_left] // Top Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ - 0.25 * (art[right] + art[top]) * x[top_right] /* Top Right */ ); - /* -------------------------- */ - /* Node on the inner boundary */ - /* -------------------------- */ - } - else if (i_r == 0) { - /* ------------------------------------------------ */ - /* Case 1: Dirichlet boundary on the inner boundary */ - /* ------------------------------------------------ */ - if (DirBC_Interior) { - const int center = grid.index(i_r, i_theta); - result[center] = rhs[center] - x[center]; - } - else { - /* ------------------------------------------------------------- */ - /* Case 2: Across origin discretization on the interior boundary */ - /* ------------------------------------------------------------- */ - /* h1 gets replaced with 2 * R0. */ - /* (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). */ - /* Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. */ - double h1 = 2.0 * grid.radius(0); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - const int i_theta_Across = grid.wrapThetaIndex(i_theta + grid.ntheta() / 2); - - const int left = grid.index(i_r, i_theta_Across); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int bottom_right = grid.index(i_r + 1, i_theta_M1); - const int right = grid.index(i_r + 1, i_theta); - const int top_right = grid.index(i_r + 1, i_theta_P1); - - result[center] = - rhs[center] - - (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) * - x[center] /* beta_{i,j} */ - - - coeff1 * (arr[center] + arr[left]) * (x[left] - x[center]) /* Left - Center: (Left) */ - - coeff2 * (arr[center] + arr[right]) * (x[right] - x[center]) /* Right - Center: (Right) */ - - coeff3 * (att[center] + att[bottom]) * (x[bottom] - x[center]) /* Bottom - Center: (Bottom) */ - - coeff4 * (att[center] + att[top]) * (x[top] - x[center]) /* Top - Center: (Top) */ - - /* - 0.25 * (art[left] + art[bottom]) * x[bottom_left] // Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ - + 0.25 * (art[right] + art[bottom]) * x[bottom_right] /* Bottom Right */ - - /* + 0.25 * (art[left] + art[top]) * x[top_left] // Top Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ - - 0.25 * (art[right] + art[top]) * x[top_right] /* Top Right */ - ); - } - /* ----------------------------- */ - /* Node on to the outer boundary */ - /* ----------------------------- */ } - else if (i_r == grid.nr() - 1) { - /* Fill result of (i,j) */ - const int center = grid.index(i_r, i_theta); - result[center] = rhs[center] - x[center]; - } - } while (0); + /* ----------------------------- */ + /* Node on to the outer boundary */ + /* ----------------------------- */ + } + else if (i_r == grid.nr() - 1) { + /* Fill result of (i,j) */ + const int center = grid.index(i_r, i_theta); + result[center] = rhs[center] - x[center]; + } } void ResidualTake::applyCircleSection(const int i_r, Vector result, ConstVector rhs, From 272f498ec3757215fd62873c7f5da6898605b23c Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Mon, 26 Jan 2026 16:14:30 +0100 Subject: [PATCH 10/10] Only increase inlining for src --- CMakeLists.txt | 3 --- src/CMakeLists.txt | 6 ++++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2b00a970..f4200980 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,9 +26,6 @@ endif() # Set compiler flags set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -Wextra -pedantic -Wno-unused -Wno-psabi -Wfloat-conversion") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2 -mtune=generic -Wno-psabi") -if (CMAKE_COMPILER_IS_GNUCXX) - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} --param max-inline-insns-single=1500") -endif() # Set coverage compiler flags - must come before any targets are defined if(GMGPOLAR_ENABLE_COVERAGE) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index eb06eece..bec9580a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,11 @@ cmake_minimum_required(VERSION 3.12) +if (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_BUILD_TYPE STREQUAL "Release") + # If using GNU increase maximum number of instructions considered for inlining + # in Release mode in this folder and sub-folders + add_compile_options(--param max-inline-insns-single=1500) +endif() + # Add subdirectories for components add_subdirectory(InputFunctions)