diff --git a/Common/include/CConfig.hpp b/Common/include/CConfig.hpp
index dbe2be4dbdf7..5dc9f58709a1 100644
--- a/Common/include/CConfig.hpp
+++ b/Common/include/CConfig.hpp
@@ -488,7 +488,7 @@ class CConfig {
   unsigned short **DegreeFFDBox;      /*!< \brief Degree of the FFD boxes. */
   string *FFDTag;                     /*!< \brief Parameters of the design variable. */
   string *TagFFDBox;                  /*!< \brief Tag of the FFD box. */
-  unsigned short GeometryMode;        /*!< \brief Gemoetry mode (analysis or gradient computation). */
+  unsigned short GeometryMode;        /*!< \brief Geometry mode (analysis or gradient computation). */
   unsigned short MGCycle;             /*!< \brief Kind of multigrid cycle. */
   unsigned short FinestMesh;          /*!< \brief Finest mesh for the full multigrid approach. */
   unsigned short nFFD_Fix_IDir,
@@ -496,11 +496,20 @@ class CConfig {
   unsigned short nMG_PreSmooth,       /*!< \brief Number of MG pre-smooth parameters found in config file. */
   nMG_PostSmooth,                     /*!< \brief Number of MG post-smooth parameters found in config file. */
   nMG_CorrecSmooth;                   /*!< \brief Number of MG correct-smooth parameters found in config file. */
+  unsigned long MG_Min_MeshSize;      /*!< \brief Minimum mesh size per coarsest level to allow another MG level. */
+
   short *FFD_Fix_IDir,
   *FFD_Fix_JDir, *FFD_Fix_KDir;       /*!< \brief Exact sections. */
   unsigned short *MG_PreSmooth,       /*!< \brief Multigrid Pre smoothing. */
   *MG_PostSmooth,                     /*!< \brief Multigrid Post smoothing. */
   *MG_CorrecSmooth;                   /*!< \brief Multigrid Jacobi implicit smoothing of the correction. */
+  bool MG_Smooth_EarlyExit; /*!< \brief Enable early exit for MG smoothing. */
+  su2double MG_Smooth_Res_Threshold; /*!< \brief Residual reduction threshold for early exit. */
+  bool MG_Smooth_Output; /*!< \brief Output per-iteration multigrid smoothing info. */
+  bool MG_Implicit_Lines; /*!< \\brief Enable implicit-lines agglomeration from walls. */
+  bool MG_Implicit_Debug; /*!< \brief Enable debug output for implicit-lines agglomeration. */
+  bool MG_DebugHaloCoordinates; /*!< \brief Enable halo CV coordinate validation for multigrid. */
+  su2double MG_Smooth_Coeff; /*!< \brief Smoothing coefficient for multigrid correction smoothing. */
   su2double *LocationStations;        /*!< \brief Airfoil sections in wing slicing subroutine. */
 
   ENUM_MULTIZONE Kind_MZSolver;    /*!< \brief Kind of multizone solver.  */
@@ -2911,7 +2920,7 @@ class CConfig {
   unsigned short GetFinestMesh(void) const { return FinestMesh; }
 
   /*!
-   * \brief Get the kind of multigrid (V or W).
+   * \brief Get the kind of multigrid (V, W or FULLMG).
    * \note This variable is used in a recursive way to perform the different kind of cycles
    * \return 0 or 1 depending of we are dealing with a V or W cycle.
    */
@@ -3054,7 +3063,28 @@ class CConfig {
    * \brief Get the number of Runge-Kutta steps.
    * \return Number of Runge-Kutta steps.
    */
-  unsigned short GetnRKStep(void) const { return nRKStep; }
+ unsigned short GetnRKStep(void) const {
+
+    unsigned short iRKLimit = 1;
+
+    switch (GetKind_TimeIntScheme()) {
+      case RUNGE_KUTTA_EXPLICIT:
+        iRKLimit = GetnRKStep();
+        break;
+      case CLASSICAL_RK4_EXPLICIT:
+        iRKLimit = 4;
+        break;
+      case EULER_EXPLICIT:
+      case EULER_IMPLICIT:
+        iRKLimit = 1;
+        break;
+      default:
+        iRKLimit = 1;
+        break;
+    }
+  //return nRKStep;
+  return iRKLimit;
+}
 
   /*!
    * \brief Get the number of time levels for time accurate local time stepping.
@@ -3839,6 +3869,55 @@ class CConfig {
     return MG_CorrecSmooth[val_mesh];
   }
 
+  /*!
+   * \brief Get whether early exit is enabled for MG smoothing.
+   * \return True if early exit is enabled.
+   */
+  bool GetMG_Smooth_EarlyExit() const { return MG_Smooth_EarlyExit; }
+
+  /*!
+   * \brief Get the residual threshold for early exit in MG smoothing.
+   * \return Residual threshold.
+   */
+  su2double GetMG_Smooth_Res_Threshold() const { return MG_Smooth_Res_Threshold; }
+
+  /*!
+   * \brief Get whether per-iteration output is enabled for MG smoothing.
+   * \return True if output is enabled.
+   */
+  bool GetMG_Smooth_Output() const { return MG_Smooth_Output; }
+
+  /*!\
+   * \brief Get whether implicit-lines agglomeration is enabled.
+   * \return True if implicit-lines agglomeration is enabled.
+   */
+  bool GetMG_Implicit_Lines() const { return MG_Implicit_Lines; }
+
+  /*!\
+   * \brief Get whether implicit-lines debug output is enabled.
+   * \return True if implicit-lines debug output is enabled.
+   */
+  bool GetMG_Implicit_Debug() const { return MG_Implicit_Debug; }
+
+  /*!\
+   * \brief Get whether halo CV coordinate validation is enabled for multigrid.
+   * \return True if halo coordinate validation is enabled.
+   */
+  bool GetMG_DebugHaloCoordinates() const { return MG_DebugHaloCoordinates; }
+
+  /*!\
+   * \brief Get the minimum mesh size threshold used to compute effective MG levels.
+   * \return Minimum mesh size per coarsest level.
+   */
+  unsigned long GetMG_Min_MeshSize() const { return MG_Min_MeshSize; }
+
+
+  /*!
+   * \brief Get the smoothing coefficient for MG correction smoothing.
+   * \return Smoothing coefficient.
+   */
+  su2double GetMG_Smooth_Coeff() const { return MG_Smooth_Coeff; }
+
   /*!
    * \brief plane of the FFD (I axis) that should be fixed.
    * \param[in] val_index - Index of the arrray with all the planes in the I direction that should be fixed.
@@ -6791,6 +6870,9 @@ class CConfig {
    */
   su2double GetDamp_Res_Restric(void) const { return Damp_Res_Restric; }
 
+  /*!\n+   * \brief Set the damping factor for the residual restriction.\n+   * \param[in] val New value for the damping factor.\n+   */
+  void SetDamp_Res_Restric(su2double val) { Damp_Res_Restric = val; }
+
   /*!
    * \brief Value of the damping factor for the correction prolongation.
    * \return Value of the damping factor.
diff --git a/Common/include/geometry/CGeometry.hpp b/Common/include/geometry/CGeometry.hpp
index 975e1b78cc19..d926f5e0d8d5 100644
--- a/Common/include/geometry/CGeometry.hpp
+++ b/Common/include/geometry/CGeometry.hpp
@@ -246,6 +246,7 @@ class CGeometry {
 
   /*!< \brief Bool if boundary-marker is straight(2D)/plane(3D) for each local marker. */
   vector<bool> boundIsStraight;
+
   vector<su2double> SurfaceAreaCfgFile; /*!< \brief Total Surface area for all markers. */
 
   /*--- Partitioning-specific variables ---*/
diff --git a/Common/include/geometry/CMultiGridGeometry.hpp b/Common/include/geometry/CMultiGridGeometry.hpp
index 5765bab0e07d..b46e6b183572 100644
--- a/Common/include/geometry/CMultiGridGeometry.hpp
+++ b/Common/include/geometry/CMultiGridGeometry.hpp
@@ -28,28 +28,29 @@
 #pragma once
 
 #include "CGeometry.hpp"
+class CMultiGridQueue;
 
 /*!
  * \class CMultiGridGeometry
- * \brief Class for defining the multigrid geometry, the main delicated part is the
+ * \brief Class for defining the multigrid geometry, the main delegated part is the
  *        agglomeration stage, which is done in the declaration.
  * \author F. Palacios
  */
 class CMultiGridGeometry final : public CGeometry {
  private:
   /*!
-   * \brief Determine if a CVPoint van be agglomerated, if it have the same marker point as the seed.
+   * \brief Determine if a CVPoint can be agglomerated, if it has the same marker point as the seed.
    * \param[in] CVPoint - Control volume to be agglomerated.
    * \param[in] marker_seed - Marker of the seed.
    * \param[in] fine_grid - Geometrical definition of the problem.
    * \param[in] config - Definition of the particular problem.
    * \return <code>TRUE</code> or <code>FALSE</code> depending if the control volume can be agglomerated.
    */
-  bool SetBoundAgglomeration(unsigned long CVPoint, short marker_seed, const CGeometry* fine_grid,
+  bool SetBoundAgglomeration(unsigned long CVPoint, vector<short> marker_seed, const CGeometry* fine_grid,
                              const CConfig* config) const;
 
   /*!
-   * \brief Determine if a can be agglomerated using geometrical criteria.
+   * \brief Determine if a Point can be agglomerated using geometrical criteria.
    * \param[in] iPoint - Seed point.
    * \param[in] fine_grid - Geometrical definition of the problem.
    * \param[in] config - Definition of the particular problem.
@@ -57,7 +58,7 @@ class CMultiGridGeometry final : public CGeometry {
   bool GeometricalCheck(unsigned long iPoint, const CGeometry* fine_grid, const CConfig* config) const;
 
   /*!
-   * \brief Determine if a CVPoint van be agglomerated, if it have the same marker point as the seed.
+   * \brief Determine if a CVPoint can be agglomerated, if it has the same marker point as the seed.
    * \param[out] Suitable_Indirect_Neighbors - List of Indirect Neighbours that can be agglomerated.
    * \param[in] iPoint - Seed point.
    * \param[in] Index_CoarseCV - Index of agglomerated point.
@@ -66,6 +67,32 @@ class CMultiGridGeometry final : public CGeometry {
   void SetSuitableNeighbors(vector<unsigned long>& Suitable_Indirect_Neighbors, unsigned long iPoint,
                             unsigned long Index_CoarseCV, const CGeometry* fine_grid) const;
 
+  /*!
+   * \brief Agglomerate high-aspect ratio interior cells along implicit lines starting from wall vertices.
+   * \param[in,out] Index_CoarseCV - Reference to the current coarse control volume index that will be
+   *                                  incremented as new coarse CVs are created.
+   * \param[in] fine_grid - Geometrical definition of the problem (fine grid).
+   * \param[in] config - Definition of the particular problem.
+   * \param[in,out] MGQueue_InnerCV - Queue used for STEP 2; agglomerated points will be removed from it.
+   */
+  void AgglomerateImplicitLines(unsigned long& Index_CoarseCV, const CGeometry* fine_grid, const CConfig* config,
+                                CMultiGridQueue& MGQueue_InnerCV);
+
+  /*!
+   * \brief Compute surface straightness for multigrid geometry.
+   * \param[in] config - Definition of the particular problem.
+   */
+  void ComputeSurfStraightness(CConfig* config);
+
+  /*!
+   * \brief Compute local curvature at a boundary vertex on Euler wall.
+   * \param[in] fine_grid - Fine grid geometry.
+   * \param[in] iPoint - Point index.
+   * \param[in] iMarker - Marker index.
+   * \return Maximum angle (in degrees) between this vertex normal and adjacent vertex normals.
+   */
+  su2double ComputeLocalCurvature(const CGeometry* fine_grid, unsigned long iPoint, unsigned short iMarker) const;
+
  public:
   /*--- This is to suppress Woverloaded-virtual, omitting it has no negative impact. ---*/
   using CGeometry::SetBoundControlVolume;
@@ -154,4 +181,11 @@ class CMultiGridGeometry final : public CGeometry {
    * \param[in] val_marker - Index of the boundary marker.
    */
   void SetMultiGridWallTemperature(const CGeometry* fine_grid, unsigned short val_marker) override;
+
+  /*!
+   * \brief Validate that halo CV coordinates match corresponding domain CVs on remote ranks (debug feature).
+   * \param[in] config - Definition of the particular problem.
+   * \param[in] iMesh - Multigrid level for reporting.
+   */
+  void ValidateHaloCoordinates(const CConfig* config, unsigned short iMesh) const;
 };
diff --git a/Common/include/geometry/CMultiGridQueue.hpp b/Common/include/geometry/CMultiGridQueue.hpp
index 6cb421cdcd31..3be217b540ca 100644
--- a/Common/include/geometry/CMultiGridQueue.hpp
+++ b/Common/include/geometry/CMultiGridQueue.hpp
@@ -93,7 +93,7 @@ class CMultiGridQueue {
   void IncrPriorityCV(unsigned long incrPoint);
 
   /*!
-   * \brief Increase the priority of the CV.
+   * \brief Reduce the priority of the CV.
    * \param[in] redPoint - Index of the control volume.
    */
   void RedPriorityCV(unsigned long redPoint);
diff --git a/Common/src/CConfig.cpp b/Common/src/CConfig.cpp
index 4dd41a6c9cf5..cb71db991e9a 100644
--- a/Common/src/CConfig.cpp
+++ b/Common/src/CConfig.cpp
@@ -1982,6 +1982,31 @@ void CConfig::SetConfig_Options() {
   /*!\brief MG_DAMP_PROLONGATION\n DESCRIPTION: Damping factor for the correction prolongation. DEFAULT 0.75 \ingroup Config*/
   addDoubleOption("MG_DAMP_PROLONGATION", Damp_Correc_Prolong, 0.75);
 
+  /*!\brief MG_SMOOTH_EARLY_EXIT\n DESCRIPTION: Enable early exit for MG smoothing iterations based on RMS residual. DEFAULT: NO \ingroup Config*/
+  addBoolOption("MG_SMOOTH_EARLY_EXIT", MG_Smooth_EarlyExit, false);
+  /*!\brief MG_SMOOTH_RES_THRESHOLD\n DESCRIPTION: RMS residual threshold for early exit in MG smoothing. DEFAULT: 1e-2 \ingroup Config*/
+  addDoubleOption("MG_SMOOTH_RES_THRESHOLD", MG_Smooth_Res_Threshold, 1e-2);
+
+  /*!\brief MG_SMOOTH_OUTPUT\n DESCRIPTION: Output per-iteration RMS for MG smoothing. DEFAULT: NO \ingroup Config*/
+  addBoolOption("MG_SMOOTH_OUTPUT", MG_Smooth_Output, false);
+
+  /*!\brief MG_IMPLICIT_LINES\n DESCRIPTION: Enable agglomeration along implicit lines from wall seeds. DEFAULT: NO \ingroup Config*/
+  addBoolOption("MG_IMPLICIT_LINES", MG_Implicit_Lines, false);
+
+  /*!\brief MG_IMPLICIT_DEBUG\n DESCRIPTION: Enable debug output for implicit-lines agglomeration. DEFAULT: NO \ingroup Config*/
+  addBoolOption("MG_IMPLICIT_DEBUG", MG_Implicit_Debug, false);
+
+  /*!\brief MG_DEBUG_HALO_COORDINATES\n DESCRIPTION: Enable halo CV coordinate validation for multigrid (expensive MPI check). DEFAULT: NO \ingroup Config*/
+  addBoolOption("MG_DEBUG_HALO_COORDINATES", MG_DebugHaloCoordinates, false);
+
+  /*!\brief MG_MIN_MESHSIZE
+   \ DESCRIPTION: Minimum global mesh size (points) to allow another multigrid level. DEFAULT: 1000 \ingroup Config*/
+  addUnsignedLongOption("MG_MIN_MESHSIZE", MG_Min_MeshSize, 1000);
+
+
+  /*!\brief MG_SMOOTH_COEFF\n DESCRIPTION: Smoothing coefficient for MG correction smoothing. DEFAULT: 1.25 \ingroup Config*/
+  addDoubleOption("MG_SMOOTH_COEFF", MG_Smooth_Coeff, 1.25);
+
   /*!\par CONFIG_CATEGORY: Spatial Discretization \ingroup Config*/
   /*--- Options related to the spatial discretization ---*/
 
@@ -3575,7 +3600,7 @@ void CConfig::SetPostprocessing(SU2_COMPONENT val_software, unsigned short val_i
   }
 
   /*--- Check if MULTIGRID is requested in VOLUME_OUTPUT and set the config boolean accordingly. ---*/
-  Wrt_MultiGrid = false;
+  Wrt_MultiGrid = true;
   for (unsigned short iField = 0; iField < nVolumeOutput; iField++) {
     if(VolumeOutput[iField].find("MULTIGRID") != string::npos) {
       Wrt_MultiGrid = true;
diff --git a/Common/src/geometry/CGeometry.cpp b/Common/src/geometry/CGeometry.cpp
index 3c611456c04f..58a35af774f8 100644
--- a/Common/src/geometry/CGeometry.cpp
+++ b/Common/src/geometry/CGeometry.cpp
@@ -2472,8 +2472,7 @@ void CGeometry::ComputeModifiedSymmetryNormals(const CConfig* config) {
    * All nodes that are shared by multiple symmetries have to get a corrected normal. */
 
   /*--- Compute if markers are straight lines or planes. ---*/
-  ComputeSurfStraightness(config, false);
-
+  ComputeSurfStraightness(config, true);
   symmetryNormals.clear();
   symmetryNormals.resize(nMarker);
   std::vector<unsigned short> symMarkers, curvedSymMarkers;
@@ -2589,19 +2588,18 @@ void CGeometry::ComputeSurfStraightness(const CConfig* config, bool print_on_scr
   constexpr passivedouble epsilon = 1.0e-6;
   su2double Area;
   string Local_TagBound, Global_TagBound;
-
-  vector<su2double> Normal(nDim), UnitNormal(nDim), RefUnitNormal(nDim);
-
-  /*--- Assume now that this boundary marker is straight. As soon as one
-        AreaElement is found that is not aligend with a Reference then it is
-        certain that the boundary marker is not straight and one can stop
-        searching. Another possibility is that this process doesn't own
-        any nodes of that boundary, in that case we also have to assume the
-        boundary is straight.
-        Any boundary type other than SYMMETRY_PLANE or EULER_WALL gets
-        the value false (or see cases specified in the conditional below)
-        which could be wrong. ---*/
+  cout << "Computing surface straightness for symmetry and Euler wall boundary markers..." << endl;
+
+  vector<su2double> Normal(nDim), UnitNormal(nDim), RefUnitNormal(nDim); /*--- Assume now that this boundary marker is
+       straight. As soon as one AreaElement is found that is not aligned with a Reference then it is certain that the
+       boundary marker is not straight and one can stop searching. Another possibility is that this process doesn't own
+       any nodes of that boundary, in that case we also have to assume the
+       boundary is straight.
+       Any boundary type other than SYMMETRY_PLANE or EULER_WALL gets
+       the value false (or see cases specified in the conditional below)
+       which could be wrong. ---*/
   boundIsStraight.resize(nMarker);
+  cout << "boundisstraight size = " << boundIsStraight.size() << endl;
   fill(boundIsStraight.begin(), boundIsStraight.end(), true);
 
   /*--- Loop over all local markers ---*/
@@ -2685,7 +2683,7 @@ void CGeometry::ComputeSurfStraightness(const CConfig* config, bool print_on_scr
     /*--- Product of type <int>(bool) is equivalnt to a 'logical and' ---*/
     SU2_MPI::Allreduce(Buff_Send_isStraight.data(), Buff_Recv_isStraight.data(), nMarker_Global, MPI_INT, MPI_PROD,
                        SU2_MPI::GetComm());
-
+    cout << "global markers = " << nMarker_Global << endl;
     /*--- Print results on screen. ---*/
     if (rank == MASTER_NODE) {
       for (iMarker_Global = 0; iMarker_Global < nMarker_Global; iMarker_Global++) {
@@ -3902,11 +3900,13 @@ void CGeometry::ColorMGLevels(unsigned short nMGLevels, const CGeometry* const*
     for (auto step = 0u; step < iMesh; ++step) {
       auto coarseMesh = geometry[iMesh - 1 - step];
       if (step)
-        for (auto iPoint = 0ul; iPoint < coarseMesh->GetnPoint(); ++iPoint)
+        for (auto iPoint = 0ul; iPoint < coarseMesh->GetnPoint(); ++iPoint) {
           CoarseGridColor_(iPoint, step) = CoarseGridColor_(coarseMesh->nodes->GetParent_CV(iPoint), step - 1);
+        }
       else
-        for (auto iPoint = 0ul; iPoint < coarseMesh->GetnPoint(); ++iPoint)
+        for (auto iPoint = 0ul; iPoint < coarseMesh->GetnPoint(); ++iPoint) {
           CoarseGridColor_(iPoint, step) = color[coarseMesh->nodes->GetParent_CV(iPoint)];
+        }
     }
   }
 }
diff --git a/Common/src/geometry/CMultiGridGeometry.cpp b/Common/src/geometry/CMultiGridGeometry.cpp
index c485b3c44744..455f169bf119 100644
--- a/Common/src/geometry/CMultiGridGeometry.cpp
+++ b/Common/src/geometry/CMultiGridGeometry.cpp
@@ -29,20 +29,36 @@
 #include "../../include/geometry/CMultiGridQueue.hpp"
 #include "../../include/toolboxes/printing_toolbox.hpp"
 #include "../../../Common/include/toolboxes/geometry_toolbox.hpp"
-
+#include <cmath>
+#include <unordered_map>
+#include <climits>
+#include <iostream>
+#include <iomanip>
+#include <cstdlib>
+#include <map>
+
+/*--- Nijso says: this could perhaps be replaced by metis partitioning? ---*/
 CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, unsigned short iMesh) : CGeometry() {
   nDim = fine_grid->GetnDim();  // Write the number of dimensions of the coarse grid.
+  /*--- Maximum agglomeration size in 2D is 4 nodes, in 3D is 8 nodes. ---*/
+  const short int maxAgglomSize = 4;
+
+  /*--- Inherit boundary properties from fine grid ---*/
+  boundIsStraight = fine_grid->boundIsStraight;
 
-  /*--- Create a queue system to do the agglomeration
+  /*--- Agglomeration Scheme II (Nishikawa, Diskin, Thomas)
+        Create a queue system to do the agglomeration
    1st) More than two markers ---> Vertices (never agglomerate)
    2nd) Two markers ---> Edges (agglomerate if same BC, never agglomerate if different BC)
    3rd) One marker ---> Surface (always agglomerate)
    4th) No marker ---> Internal Volume (always agglomerate) ---*/
 
+   //note that for MPI, we introduce interfaces and we can choose to have agglomeration over
+   //the interface or not. Nishikawa chooses not to agglomerate over interfaces.
+
   /*--- Set a marker to indicate indirect agglomeration, for quads and hexs,
    i.e. consider up to neighbors of neighbors of neighbors.
    For other levels this information is propagated down during their construction. ---*/
-
   if (iMesh == MESH_1) {
     for (auto iPoint = 0ul; iPoint < fine_grid->GetnPoint(); iPoint++)
       fine_grid->nodes->SetAgglomerate_Indirect(iPoint, false);
@@ -59,7 +75,6 @@ CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, un
   }
 
   /*--- Create the coarse grid structure using as baseline the fine grid ---*/
-
   CMultiGridQueue MGQueue_InnerCV(fine_grid->GetnPoint());
   vector<unsigned long> Suitable_Indirect_Neighbors;
 
@@ -67,15 +82,27 @@ CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, un
 
   unsigned long Index_CoarseCV = 0;
 
-  /*--- The first step is the boundary agglomeration. ---*/
+  /*--- Statistics for Euler wall agglomeration ---*/
+  map<unsigned short, unsigned long> euler_wall_agglomerated, euler_wall_rejected_curvature,
+      euler_wall_rejected_straight;
+  for (unsigned short iMarker = 0; iMarker < fine_grid->GetnMarker(); iMarker++) {
+    if (config->GetMarker_All_KindBC(iMarker) == EULER_WALL) {
+      euler_wall_agglomerated[iMarker] = 0;
+      euler_wall_rejected_curvature[iMarker] = 0;
+      euler_wall_rejected_straight[iMarker] = 0;
+    }
+  }
 
+  /*--- STEP 1: The first step is the boundary agglomeration. ---*/
   for (auto iMarker = 0u; iMarker < fine_grid->GetnMarker(); iMarker++) {
+    cout << "marker name = " << config->GetMarker_All_TagBound(iMarker) << endl;
     for (auto iVertex = 0ul; iVertex < fine_grid->GetnVertex(iMarker); iVertex++) {
       const auto iPoint = fine_grid->vertex[iMarker][iVertex]->GetNode();
 
       /*--- If the element has not been previously agglomerated and it
-       belongs to this physical domain, and it meets the geometrical
-       criteria, the agglomeration is studied. ---*/
+      belongs to this physical domain, and it meets the geometrical
+      criteria, the agglomeration is studied. ---*/
+      vector<short> marker_seed;
 
       if ((!fine_grid->nodes->GetAgglomerate(iPoint)) && (fine_grid->nodes->GetDomain(iPoint)) &&
           (GeometricalCheck(iPoint, fine_grid, config))) {
@@ -89,58 +116,131 @@ CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, un
         /*--- We add the seed point (child) to the parent control volume ---*/
 
         nodes->SetChildren_CV(Index_CoarseCV, 0, iPoint);
-        bool agglomerate_seed = true;
+        bool agglomerate_seed = false;
         auto counter = 0;
         unsigned short copy_marker[3] = {};
-        const auto marker_seed = iMarker;
+        marker_seed.push_back(iMarker);
 
         /*--- For a particular point in the fine grid we save all the markers
          that are in that point ---*/
 
-        for (auto jMarker = 0u; jMarker < fine_grid->GetnMarker() && counter < 3; jMarker++) {
+        for (auto jMarker = 0u; jMarker < fine_grid->GetnMarker(); jMarker++) {
+          const string Marker_Tag = config->GetMarker_All_TagBound(iMarker);  // fine_grid->GetMarker_Tag(jMarker);
           if (fine_grid->nodes->GetVertex(iPoint, jMarker) != -1) {
             copy_marker[counter] = jMarker;
             counter++;
+
+            if (jMarker != iMarker) {
+              marker_seed.push_back(jMarker);
+            }
           }
         }
 
-        /*--- To aglomerate a vertex it must have only one physical bc!!
-         This can be improved. If there is only a marker, it is a good
+        /*--- To agglomerate a vertex it must have only one physical bc!!
+         This can be improved. If there is only one marker, it is a good
          candidate for agglomeration ---*/
 
+        /*--- 1 BC, so either an edge in 2D or the interior of a plane in 3D ---*/
+        /*--- Valley -> Valley : conditionally allowed when both points are on the same marker. ---*/
+        /*--- ! Note that in the case of MPI SEND_RECEIVE markers, we might need other conditions ---*/
         if (counter == 1) {
+          cout << "we have exactly one marker at point " << iPoint << endl;
+          cout << " marker is " << marker_seed[0]
+               << ", marker name = " << config->GetMarker_All_TagBound(marker_seed[0]);
+          cout << ", marker type = " << config->GetMarker_All_KindBC(marker_seed[0]) << endl;
+          // The seed/parent is one valley, so we set this part to true
+          // if the child is only on this same valley, we set it to true as well.
           agglomerate_seed = true;
-
-          /*--- Euler walls can be curved and agglomerating them leads to difficulties ---*/
-          if (config->GetMarker_All_KindBC(marker_seed) == EULER_WALL) agglomerate_seed = false;
+          /*--- Euler walls: check curvature-based agglomeration criterion ---*/
+          if (config->GetMarker_All_KindBC(marker_seed[0]) == EULER_WALL) {
+            /*--- Allow agglomeration if marker is straight OR local curvature is small ---*/
+            if (!boundIsStraight[marker_seed[0]]) {
+              /*--- Compute local curvature at this point ---*/
+              su2double local_curvature = ComputeLocalCurvature(fine_grid, iPoint, marker_seed[0]);
+              // limit to 30 degrees
+              if (local_curvature >= 30.0) {
+                agglomerate_seed = false;  // High curvature: do not agglomerate
+                euler_wall_rejected_curvature[marker_seed[0]]++;
+              } else {
+                euler_wall_agglomerated[marker_seed[0]]++;
+              }
+            } else {
+              /*--- Straight wall: agglomerate ---*/
+              euler_wall_agglomerated[marker_seed[0]]++;
+            }
+          }
+          /*--- Note that if the marker is a SEND_RECEIVE, then the node is actually an interior point.
+                In that case it can only be agglomerated with another interior point. ---*/
+          /*--- Temporarily don't agglomerate SEND_RECEIVE markers---*/
+          if (config->GetMarker_All_KindBC(marker_seed[0]) == SEND_RECEIVE) {
+            agglomerate_seed = false;
+          }
         }
+
         /*--- If there are two markers, we will agglomerate if any of the
          markers is SEND_RECEIVE ---*/
 
+        /*--- Note that in 2D, this is a corner and we do not agglomerate. ---*/
+        /*--- In 3D, we agglomerate if the 2 markers are the same. ---*/
         if (counter == 2) {
-          agglomerate_seed = (config->GetMarker_All_KindBC(copy_marker[0]) == SEND_RECEIVE) ||
-                             (config->GetMarker_All_KindBC(copy_marker[1]) == SEND_RECEIVE);
 
-          /* --- Euler walls can also not be agglomerated when the point has 2 markers ---*/
-          if ((config->GetMarker_All_KindBC(copy_marker[0]) == EULER_WALL) ||
-              (config->GetMarker_All_KindBC(copy_marker[1]) == EULER_WALL)) {
-            agglomerate_seed = false;
+          /*--- Only agglomerate if one of the 2 markers are MPI markers. ---*/
+          //agglomerate_seed = (config->GetMarker_All_KindBC(copy_marker[0]) == SEND_RECEIVE) ||
+          //                   (config->GetMarker_All_KindBC(copy_marker[1]) == SEND_RECEIVE);
+          /*--- Do not agglomerate if one of the 2 markers are MPI markers. ---*/
+          agglomerate_seed = (config->GetMarker_All_KindBC(copy_marker[0]) != SEND_RECEIVE) &&
+                             (config->GetMarker_All_KindBC(copy_marker[1]) != SEND_RECEIVE);
+
+          /*--- Euler walls: check curvature-based agglomeration criterion for both markers ---*/
+          bool euler_wall_rejected_here = false;
+          for (unsigned short i = 0; i < 2; i++) {
+            if (config->GetMarker_All_KindBC(copy_marker[i]) == EULER_WALL) {
+              if (!boundIsStraight[copy_marker[i]]) {
+                /*--- Compute local curvature at this point ---*/
+                su2double local_curvature = ComputeLocalCurvature(fine_grid, iPoint, copy_marker[i]);
+                // limit to 30 degrees
+                if (local_curvature >= 30.0) {
+                  agglomerate_seed = false;  // High curvature: do not agglomerate
+                  euler_wall_rejected_curvature[copy_marker[i]]++;
+                  euler_wall_rejected_here = true;
+                }
+              }
+              /*--- Track agglomeration if not rejected ---*/
+              if (agglomerate_seed && !euler_wall_rejected_here) {
+                euler_wall_agglomerated[copy_marker[i]]++;
+              }
+            }
           }
+
+          /*--- In 2D, corners are not agglomerated, but in 3D counter=2 means we are on the
+                edge of a 2D face. In that case, agglomerate if both nodes are the same. ---*/
+          // if (nDim == 2) agglomerate_seed = false;
         }
 
         /*--- If there are more than 2 markers, the aglomeration will be discarded ---*/
 
         if (counter > 2) agglomerate_seed = false;
+        // note that if one of the markers is SEND_RECEIVE, then we could allow agglomeration since
+        // the real number of markers is then 2.
 
-        /*--- If the seed can be agglomerated, we try to agglomerate more points ---*/
-
+        /*--- If the seed (parent) can be agglomerated, we try to agglomerate connected childs to the parent ---*/
+        /*--- Note that in 2D we allow a maximum of 4 nodes to be agglomerated ---*/
         if (agglomerate_seed) {
+          // cout << "    seed can be agglomerated to more points." << endl;
           /*--- Now we do a sweep over all the nodes that surround the seed point ---*/
 
           for (auto CVPoint : fine_grid->nodes->GetPoints(iPoint)) {
-            /*--- The new point can be agglomerated ---*/
+            // cout << "    checking child CVPoint = "
+            //      << CVPoint
+            //      << ", coord = "
+            //      << fine_grid->nodes->GetCoord(CVPoint, 0)
+            //      << " "
+            //      << fine_grid->nodes->GetCoord(CVPoint, 1)
+            //      << endl;
 
+            /*--- The new point can be agglomerated ---*/
             if (SetBoundAgglomeration(CVPoint, marker_seed, fine_grid, config)) {
+              // cout << "    agglomerate " << CVPoint << " with seed point "<< iPoint << endl;
               /*--- We set the value of the parent ---*/
 
               fine_grid->nodes->SetParent_CV(CVPoint, Index_CoarseCV);
@@ -149,38 +249,53 @@ CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, un
 
               nodes->SetChildren_CV(Index_CoarseCV, nChildren, CVPoint);
               nChildren++;
+              /*--- In 2D, we agglomerate exactly 2 nodes if the nodes are on the line edge. ---*/
+              if ((nDim == 2) && (counter == 1)) break;
+              /*--- In 3D, we agglomerate exactly 2 nodes if the nodes are on the surface edge. ---*/
+              if ((nDim == 3) && (counter == 2)) break;
+              /*--- Apply maxAgglomSize limit for 3D internal boundary face nodes (counter==1 in 3D). ---*/
+              if (nChildren == maxAgglomSize) break;
             }
           }
 
-          Suitable_Indirect_Neighbors.clear();
-
-          if (fine_grid->nodes->GetAgglomerate_Indirect(iPoint))
-            SetSuitableNeighbors(Suitable_Indirect_Neighbors, iPoint, Index_CoarseCV, fine_grid);
-
-          /*--- Now we do a sweep over all the indirect nodes that can be added ---*/
-
-          for (auto CVPoint : Suitable_Indirect_Neighbors) {
-            /*--- The new point can be agglomerated ---*/
-
-            if (SetBoundAgglomeration(CVPoint, marker_seed, fine_grid, config)) {
-              /*--- We set the value of the parent ---*/
-
-              fine_grid->nodes->SetParent_CV(CVPoint, Index_CoarseCV);
-
-              /*--- We set the indirect agglomeration information of the corse point
-               based on its children in the fine grid. ---*/
-
-              if (fine_grid->nodes->GetAgglomerate_Indirect(CVPoint))
-                nodes->SetAgglomerate_Indirect(Index_CoarseCV, true);
-
-              /*--- We set the value of the child ---*/
-
-              nodes->SetChildren_CV(Index_CoarseCV, nChildren, CVPoint);
-              nChildren++;
+          /*--- Only take into account indirect neighbors for 3D faces, not 2D. ---*/
+          if (nDim == 3) {
+            Suitable_Indirect_Neighbors.clear();
+
+            if (fine_grid->nodes->GetAgglomerate_Indirect(iPoint))
+              SetSuitableNeighbors(Suitable_Indirect_Neighbors, iPoint, Index_CoarseCV, fine_grid);
+
+            /*--- Now we do a sweep over all the indirect nodes that can be added ---*/
+            for (auto CVPoint : Suitable_Indirect_Neighbors) {
+              // cout << "         Boundary: checking indirect neighbors " << CVPoint
+              // << ", coord = "
+              //     << fine_grid->nodes->GetCoord(CVPoint, 0)
+              //     << " "
+              //     << fine_grid->nodes->GetCoord(CVPoint, 1)
+              // << endl;
+              /*--- The new point can be agglomerated ---*/
+              if (SetBoundAgglomeration(CVPoint, marker_seed, fine_grid, config)) {
+                // cout << "         Boundary: indirect neighbor " << CVPoint << " can be agglomerated." << endl;
+                /*--- We set the value of the parent ---*/
+                fine_grid->nodes->SetParent_CV(CVPoint, Index_CoarseCV);
+
+                /*--- We set the indirect agglomeration information of the corse point
+                based on its children in the fine grid. ---*/
+                if (fine_grid->nodes->GetAgglomerate_Indirect(CVPoint))
+                  nodes->SetAgglomerate_Indirect(Index_CoarseCV, true);
+
+                /*--- We set the value of the child ---*/
+                nodes->SetChildren_CV(Index_CoarseCV, nChildren, CVPoint);
+                nChildren++;
+                /*--- Apply maxAgglomSize limit for 3D internal boundary face nodes. ---*/
+                if (nChildren == maxAgglomSize) break;
+              }
             }
           }
         }
 
+        /*--- At this stage we can check if the node is an isolated node. ---*/
+
         /*--- Update the number of children of the coarse control volume. ---*/
 
         nodes->SetnChildren_CV(Index_CoarseCV, nChildren);
@@ -195,8 +310,10 @@ CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, un
   for (auto iMarker = 0u; iMarker < fine_grid->GetnMarker(); iMarker++) {
     for (auto iVertex = 0ul; iVertex < fine_grid->GetnVertex(iMarker); iVertex++) {
       const auto iPoint = fine_grid->vertex[iMarker][iVertex]->GetNode();
-
+      // cout << "point " << iPoint << ", parent = " << fine_grid->nodes->GetParent_CV(iPoint)
+      //     << " " << fine_grid->nodes->GetAgglomerate(iPoint) << endl;
       if ((!fine_grid->nodes->GetAgglomerate(iPoint)) && (fine_grid->nodes->GetDomain(iPoint))) {
+        // cout << "       Boundary:mark left-over nodes " << endl;
         fine_grid->nodes->SetParent_CV(iPoint, Index_CoarseCV);
         nodes->SetChildren_CV(Index_CoarseCV, 0, iPoint);
         nodes->SetnChildren_CV(Index_CoarseCV, 1);
@@ -223,8 +340,13 @@ CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, un
     }
   }
 
-  /*--- Agglomerate the domain points. ---*/
+  /*--- Agglomerate high-aspect-ratio interior nodes along implicit lines from walls. ---*/
+  if (config->GetMG_Implicit_Lines()) {
+    AgglomerateImplicitLines(Index_CoarseCV, fine_grid, config, MGQueue_InnerCV);
+  }
 
+  /*--- STEP 2: Agglomerate the domain points. ---*/
+  // cout << "*********** STEP 2 ***" << endl;
   auto iteration = 0ul;
   while (!MGQueue_InnerCV.EmptyQueue() && (iteration < fine_grid->GetnPoint())) {
     const auto iPoint = MGQueue_InnerCV.NextCV();
@@ -236,6 +358,8 @@ CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, un
     if ((!fine_grid->nodes->GetAgglomerate(iPoint)) && (fine_grid->nodes->GetDomain(iPoint)) &&
         (GeometricalCheck(iPoint, fine_grid, config))) {
       unsigned short nChildren = 1;
+      // cout << "***** internal seed point " << iPoint << ", coord = " << fine_grid->nodes->GetCoord(iPoint, 0) << " "
+      // << fine_grid->nodes->GetCoord(iPoint, 1) << endl;
 
       /*--- We set an index for the parent control volume ---*/
 
@@ -258,7 +382,7 @@ CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, un
         if ((!fine_grid->nodes->GetAgglomerate(CVPoint)) && (fine_grid->nodes->GetDomain(CVPoint)) &&
             (GeometricalCheck(CVPoint, fine_grid, config))) {
           /*--- We set the value of the parent ---*/
-
+          // cout << "agglomerate " << CVPoint << " to internal seed point " << iPoint << endl;
           fine_grid->nodes->SetParent_CV(CVPoint, Index_CoarseCV);
 
           /*--- We set the value of the child ---*/
@@ -271,6 +395,7 @@ CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, un
 
           MGQueue_InnerCV.Update(CVPoint, fine_grid);
         }
+        if (nChildren == maxAgglomSize) break;
       }
 
       /*--- Identify the indirect neighbors ---*/
@@ -282,11 +407,13 @@ CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, un
       /*--- Now we do a sweep over all the indirect nodes that can be added ---*/
 
       for (auto CVPoint : Suitable_Indirect_Neighbors) {
+        // if we have reached the maximum, get out.
+        if (nChildren == maxAgglomSize) break;
         /*--- The new point can be agglomerated ---*/
-
         if ((!fine_grid->nodes->GetAgglomerate(CVPoint)) && (fine_grid->nodes->GetDomain(CVPoint))) {
-          /*--- We set the value of the parent ---*/
+          // cout << "indirect agglomerate " << CVPoint << " to internal seed point " << iPoint << endl;
 
+          /*--- We set the value of the parent ---*/
           fine_grid->nodes->SetParent_CV(CVPoint, Index_CoarseCV);
 
           /*--- We set the indirect agglomeration information ---*/
@@ -321,6 +448,7 @@ CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, un
 
   for (auto iPoint = 0ul; iPoint < fine_grid->GetnPoint(); iPoint++) {
     if ((!fine_grid->nodes->GetAgglomerate(iPoint)) && (fine_grid->nodes->GetDomain(iPoint))) {
+      cout << "!!! agglomerate isolated point " << iPoint << endl;
       fine_grid->nodes->SetParent_CV(iPoint, Index_CoarseCV);
       if (fine_grid->nodes->GetAgglomerate_Indirect(iPoint)) nodes->SetAgglomerate_Indirect(Index_CoarseCV, true);
       nodes->SetChildren_CV(Index_CoarseCV, 0, iPoint);
@@ -340,16 +468,63 @@ CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, un
   for (auto iCoarsePoint = 0ul; iCoarsePoint < nPointDomain; iCoarsePoint++) {
     if (nodes->GetnPoint(iCoarsePoint) == 1) {
       /*--- Find the neighbor of the isolated point. This neighbor is the right control volume ---*/
-
+      cout << "isolated point " << iCoarsePoint << endl;
       const auto iCoarsePoint_Complete = nodes->GetPoint(iCoarsePoint, 0);
 
-      /*--- Add the children to the connected control volume (and modify its parent indexing).
-       Identify the child CV from the finest grid and add it to the correct control volume.
-       Set the parent CV of iFinePoint. Instead of using the original one
-       (iCoarsePoint), use the new one (iCoarsePoint_Complete) ---*/
+      /*--- Check if merging would exceed the maximum agglomeration size ---*/
+      auto nChildren_Target = nodes->GetnChildren_CV(iCoarsePoint_Complete);
+      auto nChildren_Isolated = nodes->GetnChildren_CV(iCoarsePoint);
+      auto nChildren_Total = nChildren_Target + nChildren_Isolated;
+
+      /*--- If the total would exceed maxAgglomSize, try to redistribute children to neighbors ---*/
+      if (nChildren_Total > maxAgglomSize) {
+        cout << "   Merging isolated point " << iCoarsePoint << " to point " << iCoarsePoint_Complete
+             << " would exceed limit (" << nChildren_Total << " > " << maxAgglomSize << ")" << endl;
+
+        /*--- Find neighbors of the target coarse point that have room ---*/
+        unsigned short nChildrenToRedistribute = nChildren_Total - maxAgglomSize;
+
+        for (auto jCoarsePoint : nodes->GetPoints(iCoarsePoint_Complete)) {
+          if (nChildrenToRedistribute == 0) break;
+
+          auto nChildren_Neighbor = nodes->GetnChildren_CV(jCoarsePoint);
+          if (nChildren_Neighbor < maxAgglomSize) {
+            unsigned short nCanTransfer =
+                min(nChildrenToRedistribute, static_cast<unsigned short>(maxAgglomSize - nChildren_Neighbor));
+
+            cout << "   Redistributing " << nCanTransfer << " children from point " << iCoarsePoint_Complete
+                 << " to neighbor " << jCoarsePoint << endl;
+
+            /*--- Transfer children from target to neighbor ---*/
+            for (unsigned short iTransfer = 0; iTransfer < nCanTransfer; iTransfer++) {
+              /*--- Take from the end of the target's children list ---*/
+              auto nChildren_Current = nodes->GetnChildren_CV(iCoarsePoint_Complete);
+              if (nChildren_Current > 0) {
+                auto iFinePoint = nodes->GetChildren_CV(iCoarsePoint_Complete, nChildren_Current - 1);
+
+                /*--- Add to neighbor ---*/
+                auto nChildren_Neighbor_Current = nodes->GetnChildren_CV(jCoarsePoint);
+                nodes->SetChildren_CV(jCoarsePoint, nChildren_Neighbor_Current, iFinePoint);
+                nodes->SetnChildren_CV(jCoarsePoint, nChildren_Neighbor_Current + 1);
 
-      auto nChildren = nodes->GetnChildren_CV(iCoarsePoint_Complete);
+                /*--- Update parent ---*/
+                fine_grid->nodes->SetParent_CV(iFinePoint, jCoarsePoint);
 
+                /*--- Remove from target (by reducing count) ---*/
+                nodes->SetnChildren_CV(iCoarsePoint_Complete, nChildren_Current - 1);
+
+                nChildrenToRedistribute--;
+              }
+            }
+          }
+        }
+
+        /*--- Update the target's child count after redistribution ---*/
+        nChildren_Target = nodes->GetnChildren_CV(iCoarsePoint_Complete);
+      }
+
+      /*--- Add the isolated point's children to the target control volume ---*/
+      auto nChildren = nChildren_Target;
       for (auto iChildren = 0u; iChildren < nodes->GetnChildren_CV(iCoarsePoint); iChildren++) {
         const auto iFinePoint = nodes->GetChildren_CV(iCoarsePoint, iChildren);
         nodes->SetChildren_CV(iCoarsePoint_Complete, nChildren, iFinePoint);
@@ -358,9 +533,10 @@ CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, un
       }
 
       /*--- Update the number of children control volumes ---*/
-
       nodes->SetnChildren_CV(iCoarsePoint_Complete, nChildren);
       nodes->SetnChildren_CV(iCoarsePoint, 0);
+
+      cout << "   Final: point " << iCoarsePoint_Complete << " has " << nChildren << " children" << endl;
     }
   }
 
@@ -369,15 +545,28 @@ CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, un
   nodes->ResetPoints();
 
 #ifdef HAVE_MPI
+  /*--- Reset halo point parents before MPI agglomeration.
+   This is critical for multi-level multigrid: when creating level N from level N-1,
+   the fine grid (level N-1) already has Parent_CV set from when it was created from level N-2.
+   Those parent indices point to level N, but when creating level N+1, they would be
+   incorrectly interpreted as level N+1 indices. Resetting ensures clean agglomeration. ---*/
+
+  for (auto iPoint = fine_grid->GetnPointDomain(); iPoint < fine_grid->GetnPoint(); iPoint++) {
+    fine_grid->nodes->SetParent_CV(iPoint, ULONG_MAX);
+  }
+
   /*--- Dealing with MPI parallelization, the objective is that the received nodes must be agglomerated
    in the same way as the donor (send) nodes. Send the node agglomeration information of the donor
    (parent and children). The agglomerated halos of this rank are set according to the rank where
    they are domain points. ---*/
 
   for (auto iMarker = 0u; iMarker < config->GetnMarker_All(); iMarker++) {
+    cout << " marker name = " << config->GetMarker_All_TagBound(iMarker) << endl;
+    cout << " marker type = " << config->GetMarker_All_KindBC(iMarker) << endl;
+    cout << " send/recv = " << config->GetMarker_All_SendRecv(iMarker) << endl;
     if ((config->GetMarker_All_KindBC(iMarker) == SEND_RECEIVE) && (config->GetMarker_All_SendRecv(iMarker) > 0)) {
-      const auto MarkerS = iMarker;
-      const auto MarkerR = iMarker + 1;
+      const auto MarkerS = iMarker; // sending marker
+      const auto MarkerR = iMarker + 1; // receiving marker
 
       const auto send_to = config->GetMarker_All_SendRecv(MarkerS) - 1;
       const auto receive_from = abs(config->GetMarker_All_SendRecv(MarkerR)) - 1;
@@ -424,38 +613,260 @@ CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, un
       vector<unsigned long> Parent_Local(nVertexR);
       vector<unsigned long> Children_Local(nVertexR);
 
+      /*--- First pass: Determine which parents will actually be used (have non-skipped children).
+       This prevents creating orphaned halo CVs that have coordinates (0,0,0). ---*/
+      vector<bool> parent_used(Aux_Parent.size(), false);
+      vector<unsigned long> parent_local_index(Aux_Parent.size(), ULONG_MAX);
+
       for (auto iVertex = 0ul; iVertex < nVertexR; iVertex++) {
-        /*--- We use the same sorting as in the donor domain, i.e. the local parents
-         are numbered according to their order in the remote rank. ---*/
+        const auto iPoint_Fine = fine_grid->vertex[MarkerR][iVertex]->GetNode();
+        auto existing_parent = fine_grid->nodes->GetParent_CV(iPoint_Fine);
+
+        /*--- Skip if already agglomerated (first-wins policy) ---*/
+        if (existing_parent != ULONG_MAX) continue;
+
+        /*--- Find which parent this vertex maps to ---*/
+        for (auto jVertex = 0ul; jVertex < Aux_Parent.size(); jVertex++) {
+          if (Parent_Remote[iVertex] == Aux_Parent[jVertex]) {
+            parent_used[jVertex] = true;
+            break;
+          }
+        }
+      }
+
+      /*--- Assign local indices only to used parents ---*/
+      unsigned long nUsedParents = 0;
+      for (auto jVertex = 0ul; jVertex < Aux_Parent.size(); jVertex++) {
+        if (parent_used[jVertex]) {
+          parent_local_index[jVertex] = Index_CoarseCV + nUsedParents;
+          nUsedParents++;
+        }
+      }
 
+      /*--- Now map each received vertex to its local parent ---*/
+      for (auto iVertex = 0ul; iVertex < nVertexR; iVertex++) {
+        Parent_Local[iVertex] = ULONG_MAX;
         for (auto jVertex = 0ul; jVertex < Aux_Parent.size(); jVertex++) {
           if (Parent_Remote[iVertex] == Aux_Parent[jVertex]) {
-            Parent_Local[iVertex] = jVertex + Index_CoarseCV;
+            Parent_Local[iVertex] = parent_local_index[jVertex];
             break;
           }
         }
+
+        /*--- Validate that parent mapping was found (only matters if not skipped later) ---*/
+        if (Parent_Local[iVertex] == ULONG_MAX) {
+          SU2_MPI::Error(string("MPI agglomeration failed to map parent index ") +
+                         to_string(Parent_Remote[iVertex]) +
+                         string(" for vertex ") + to_string(iVertex),
+                         CURRENT_FUNCTION);
+        }
+
         Children_Local[iVertex] = fine_grid->vertex[MarkerR][iVertex]->GetNode();
       }
 
-      Index_CoarseCV += Aux_Parent.size();
+      /*--- Debug: Track state before updating Index_CoarseCV ---*/
+      auto Index_CoarseCV_Before = Index_CoarseCV;
+
+      /*--- Only increment by the number of parents that will actually be used ---*/
+      Index_CoarseCV += nUsedParents;
 
-      vector<unsigned short> nChildren_MPI(Index_CoarseCV, 0);
+      /*--- Debug counters ---*/
+      unsigned long nConflicts = 0, nSkipped = 0, nOutOfBounds = 0, nSuccess = 0;
 
       /*--- Create the final structure ---*/
       for (auto iVertex = 0ul; iVertex < nVertexR; iVertex++) {
         const auto iPoint_Coarse = Parent_Local[iVertex];
         const auto iPoint_Fine = Children_Local[iVertex];
 
-        /*--- Be careful, it is possible that a node changes the agglomeration configuration,
-         the priority is always when receiving the information. ---*/
+        /*--- Debug: Check for out-of-bounds access ---*/
+        if (iPoint_Coarse >= Index_CoarseCV) {
+          cout << "ERROR [Rank " << rank << ", Marker " << MarkerS << "/" << MarkerR
+               << "]: Out-of-bounds coarse CV index " << iPoint_Coarse
+               << " >= " << Index_CoarseCV
+               << " (vertex " << iVertex << ", fine point " << iPoint_Fine << ")" << endl;
+          nOutOfBounds++;
+          continue;
+        }
+
+        /*--- Solution 1: Skip if this halo point was already agglomerated ---*/
+        auto existing_parent = fine_grid->nodes->GetParent_CV(iPoint_Fine);
+        if (existing_parent != ULONG_MAX) {
+          if (existing_parent != iPoint_Coarse) {
+            /*--- Conflict detected: different parent from different interface ---*/
+            nConflicts++;
+
+            /*--- Only print detailed info for first few conflicts or if suspicious ---*/
+            if (nConflicts <= 5 || existing_parent < nPointDomain) {
+              cout << "INFO [Rank " << rank << ", Marker " << MarkerS << "/" << MarkerR
+                   << "]: Halo point " << iPoint_Fine
+                   << " already agglomerated to parent " << existing_parent
+                   << (existing_parent < nPointDomain ? " (DOMAIN CV!)" : " (halo CV)")
+                   << ", skipping reassignment to " << iPoint_Coarse
+                   << " (from rank " << receive_from << ")" << endl;
+            }
+          } else {
+            /*--- Same parent from different interface (duplicate) - just skip silently ---*/
+            nSkipped++;
+          }
+          continue;  // First-wins: keep existing assignment
+        }
+
+        /*--- First assignment for this halo point - proceed with agglomeration ---*/
+
+        /*--- Critical fix: Append to existing children, don't overwrite ---*/
+        auto existing_children_count = nodes->GetnChildren_CV(iPoint_Coarse);
+
         fine_grid->nodes->SetParent_CV(iPoint_Fine, iPoint_Coarse);
-        nodes->SetChildren_CV(iPoint_Coarse, nChildren_MPI[iPoint_Coarse], iPoint_Fine);
-        nChildren_MPI[iPoint_Coarse]++;
-        nodes->SetnChildren_CV(iPoint_Coarse, nChildren_MPI[iPoint_Coarse]);
+        nodes->SetChildren_CV(iPoint_Coarse, existing_children_count, iPoint_Fine);
+        nodes->SetnChildren_CV(iPoint_Coarse, existing_children_count + 1);
         nodes->SetDomain(iPoint_Coarse, false);
+        nSuccess++;
       }
+
+      /*--- Debug: Report statistics for this marker pair ---*/
+      cout << "MPI Agglomeration [Rank " << rank << ", Marker " << MarkerS << "/" << MarkerR
+           << " (rank " << send_to << " <-> " << receive_from << ")]: "
+           << nSuccess << " assigned, " << nSkipped << " duplicates, "
+           << nConflicts << " conflicts";
+      if (nOutOfBounds > 0) {
+        cout << ", " << nOutOfBounds << " OUT-OF-BOUNDS (CRITICAL!)";
+      }
+      cout << endl;
+
+      if (nConflicts > 5) {
+        cout << "  Note: Only first 5 conflicts shown in detail, total conflicts = " << nConflicts << endl;
+      }
+
+      /*--- Debug: Validate buffer size assumption ---*/
+      if (nVertexS != nVertexR) {
+        cout << "WARNING [Rank " << rank << ", Marker " << MarkerS << "/" << MarkerR
+             << "]: Asymmetric interface - nVertexS=" << nVertexS
+             << " != nVertexR=" << nVertexR << endl;
+      }
+    }
+  }
+
+  /*--- Post-process: Check for orphaned coarse CVs (should be none with new logic) ---*/
+
+  if (size > SINGLE_NODE) {
+    unsigned long nOrphaned = 0;
+
+    /*--- Count orphaned CVs for reporting ---*/
+    for (auto iCoarse = nPointDomain; iCoarse < Index_CoarseCV; iCoarse++) {
+      if (nodes->GetnChildren_CV(iCoarse) == 0) {
+        nOrphaned++;
+        /*--- This shouldn't happen with the new parent prefiltering logic ---*/
+        cout << "WARNING [Rank " << rank << "]: Orphaned halo CV " << iCoarse
+             << " detected (should not occur with current logic)" << endl;
+      }
+    }
+
+    if (nOrphaned > 0) {
+      cout << "WARNING [Rank " << rank << "]: " << nOrphaned
+           << " orphaned halo coarse CVs found - this indicates a logic error!" << endl;
     }
   }
+
+  /*--- Debug validation: Verify halo CV coordinates match domain CVs on remote ranks ---*/
+  /*--- Note: This validation is deferred until after SetVertex() is called, as vertices ---*/
+  /*--- are not yet initialized at the end of the constructor. The validation is performed ---*/
+  /*--- by calling ValidateHaloCoordinates() after SetVertex() in the driver. ---*/
+
+  /*--- For now, we skip this validation in the constructor to avoid segfaults. ---*/
+  /*--- TODO: Move this to a separate validation function called after SetVertex(). ---*/
+
+#if 0  // Disabled - causes segfault as vertex array not yet initialized
+  if (size > SINGLE_NODE && config->GetMG_DebugHaloCoordinates()) {
+
+    if (rank == MASTER_NODE) {
+      cout << "\n--- MG Halo Coordinate Validation (Level " << iMesh << ") ---" << endl;
+    }
+
+    /*--- For each SEND_RECEIVE marker pair, exchange coordinates and validate ---*/
+    for (auto iMarker = 0u; iMarker < config->GetnMarker_All(); iMarker++) {
+      if ((config->GetMarker_All_KindBC(iMarker) == SEND_RECEIVE) && (config->GetMarker_All_SendRecv(iMarker) > 0)) {
+        const auto MarkerS = iMarker;
+        const auto MarkerR = iMarker + 1;
+
+        const auto send_to = config->GetMarker_All_SendRecv(MarkerS) - 1;
+        const auto receive_from = abs(config->GetMarker_All_SendRecv(MarkerR)) - 1;
+
+        const auto nVertexS = nVertex[MarkerS];
+        const auto nVertexR = nVertex[MarkerR];
+
+        /*--- Allocate buffers for coordinate exchange ---*/
+        vector<su2double> Buffer_Send_Coord(nVertexS * nDim);
+        vector<su2double> Buffer_Receive_Coord(nVertexR * nDim);
+
+        /*--- Pack SEND coordinates (domain CVs being sent) ---*/
+        for (auto iVertex = 0ul; iVertex < nVertexS; iVertex++) {
+          const auto iPoint = vertex[MarkerS][iVertex]->GetNode();
+          const auto* Coord = nodes->GetCoord(iPoint);
+          for (auto iDim = 0u; iDim < nDim; iDim++) {
+            Buffer_Send_Coord[iVertex * nDim + iDim] = Coord[iDim];
+          }
+        }
+
+        /*--- Exchange coordinates ---*/
+        SU2_MPI::Sendrecv(Buffer_Send_Coord.data(), nVertexS * nDim, MPI_DOUBLE, send_to, 0,
+                          Buffer_Receive_Coord.data(), nVertexR * nDim, MPI_DOUBLE, receive_from, 0,
+                          SU2_MPI::GetComm(), MPI_STATUS_IGNORE);
+
+        /*--- Validate RECEIVE coordinates against local halo CVs ---*/
+        unsigned long nMismatch = 0;
+        su2double maxError = 0.0;
+        su2double tolerance = 1e-10;
+
+        for (auto iVertex = 0ul; iVertex < nVertexR; iVertex++) {
+          const auto iPoint = vertex[MarkerR][iVertex]->GetNode();
+          const auto* Coord_Local = nodes->GetCoord(iPoint);
+
+          su2double error = 0.0;
+          for (auto iDim = 0u; iDim < nDim; iDim++) {
+            su2double coord_remote = Buffer_Receive_Coord[iVertex * nDim + iDim];
+            su2double diff = fabs(Coord_Local[iDim] - coord_remote);
+            error += diff * diff;
+          }
+          error = sqrt(error);
+
+          if (error > tolerance) {
+            nMismatch++;
+            maxError = max(maxError, error);
+
+            if (nMismatch <= 5) {  // Only print first 5 mismatches
+              cout << "COORD MISMATCH [Rank " << rank << ", Marker " << MarkerS << "/" << MarkerR
+                   << ", Vertex " << iVertex << ", Point " << iPoint << "]: ";
+              cout << "Local=(";
+              for (auto iDim = 0u; iDim < nDim; iDim++) {
+                cout << Coord_Local[iDim];
+                if (iDim < nDim - 1) cout << ", ";
+              }
+              cout << "), Remote=(";
+              for (auto iDim = 0u; iDim < nDim; iDim++) {
+                cout << Buffer_Receive_Coord[iVertex * nDim + iDim];
+                if (iDim < nDim - 1) cout << ", ";
+              }
+              cout << "), Error=" << error << endl;
+            }
+          }
+        }
+
+        if (nMismatch > 0) {
+          cout << "WARNING [Rank " << rank << ", Marker " << MarkerS << "/" << MarkerR
+               << "]: " << nMismatch << " coordinate mismatches detected (max error: "
+               << maxError << ")" << endl;
+        } else if (nVertexR > 0) {
+          cout << "INFO [Rank " << rank << ", Marker " << MarkerS << "/" << MarkerR
+               << "]: All " << nVertexR << " halo CV coordinates match (tol=" << tolerance << ")" << endl;
+        }
+      }
+    }
+
+    if (rank == MASTER_NODE) {
+      cout << "--- End MG Halo Coordinate Validation ---\n" << endl;
+    }
+  }
+#endif  // Disabled validation code
 #endif  // HAVE_MPI
 
   /*--- Update the number of points after the MPI agglomeration ---*/
@@ -463,25 +874,27 @@ CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, un
   nPoint = Index_CoarseCV;
 
   /*--- Console output with the summary of the agglomeration ---*/
-
-  unsigned long nPointFine = fine_grid->GetnPoint();
+  // nijso: do not include halo points in the count
+  unsigned long nPointFine = fine_grid->GetnPointDomain();
   unsigned long Global_nPointCoarse, Global_nPointFine;
 
-  SU2_MPI::Allreduce(&nPoint, &Global_nPointCoarse, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm());
+  SU2_MPI::Allreduce(&nPointDomain, &Global_nPointCoarse, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm());
   SU2_MPI::Allreduce(&nPointFine, &Global_nPointFine, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm());
 
   SetGlobal_nPointDomain(Global_nPointCoarse);
 
   if (iMesh != MESH_0) {
-    const su2double factor = 1.5;
-    const su2double Coeff = pow(su2double(Global_nPointFine) / Global_nPointCoarse, 1.0 / nDim);
-    const su2double CFL = factor * config->GetCFL(iMesh - 1) / Coeff;
+    // const su2double factor = 1.5; //nijso: too high
+    const su2double factor = 1.1;
+    // const su2double Coeff = pow(su2double(Global_nPointFine) / Global_nPointCoarse, 1.0 / nDim);
+    const su2double CFL = factor * config->GetCFL(iMesh - 1);  // / Coeff;
     config->SetCFL(iMesh, CFL);
   }
 
   const su2double ratio = su2double(Global_nPointFine) / su2double(Global_nPointCoarse);
-
-  if (((nDim == 2) && (ratio < 2.5)) || ((nDim == 3) && (ratio < 2.5))) {
+  cout << "********** ratio = " << ratio << endl;
+  // lower value leads to more levels being accepted.
+  if (((nDim == 2) && (ratio < 1.5)) || ((nDim == 3) && (ratio < 1.5))) {
     config->SetMGLevels(iMesh - 1);
   } else if (rank == MASTER_NODE) {
     PrintingToolbox::CTablePrinter MGTable(&std::cout);
@@ -503,11 +916,86 @@ CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, un
     }
   }
 
+  /*--- Output Euler wall agglomeration statistics ---*/
+  if (rank == MASTER_NODE) {
+    /*--- Gather global statistics for Euler walls ---*/
+    bool has_euler_walls = false;
+    for (unsigned short iMarker = 0; iMarker < fine_grid->GetnMarker(); iMarker++) {
+      if (config->GetMarker_All_KindBC(iMarker) == EULER_WALL) {
+        has_euler_walls = true;
+        break;
+      }
+    }
+
+    if (has_euler_walls) {
+      cout << endl;
+      cout << "Euler Wall Agglomeration Statistics (45° curvature threshold):" << endl;
+      cout << "----------------------------------------------------------------" << endl;
+
+      for (unsigned short iMarker = 0; iMarker < fine_grid->GetnMarker(); iMarker++) {
+        if (config->GetMarker_All_KindBC(iMarker) == EULER_WALL) {
+          string marker_name = config->GetMarker_All_TagBound(iMarker);
+          unsigned long agglomerated = euler_wall_agglomerated[iMarker];
+          unsigned long rejected = euler_wall_rejected_curvature[iMarker];
+          unsigned long total = agglomerated + rejected;
+
+          if (total > 0) {
+            su2double accept_rate = 100.0 * su2double(agglomerated) / su2double(total);
+            cout << "  Marker: " << marker_name << endl;
+            cout << "    Seeds agglomerated:       " << agglomerated << " (" << std::setprecision(1) << std::fixed
+                 << accept_rate << "%)" << endl;
+            cout << "    Seeds rejected (>45° curv): " << rejected << " (" << std::setprecision(1) << std::fixed
+                 << (100.0 - accept_rate) << "%)" << endl;
+          }
+        }
+      }
+      cout << "----------------------------------------------------------------" << endl;
+    }
+  }
+
   edgeColorGroupSize = config->GetEdgeColoringGroupSize();
 }
 
-bool CMultiGridGeometry::SetBoundAgglomeration(unsigned long CVPoint, short marker_seed, const CGeometry* fine_grid,
-                                               const CConfig* config) const {
+bool CMultiGridGeometry::GeometricalCheck(unsigned long iPoint, const CGeometry* fine_grid,
+                                          const CConfig* config) const {
+  su2double max_dimension = 1.2;
+
+  /*--- Evaluate the total size of the element ---*/
+
+  bool Volume = true;
+  su2double ratio = pow(fine_grid->nodes->GetVolume(iPoint), 1.0 / su2double(nDim)) * max_dimension;
+  su2double limit = pow(config->GetDomainVolume(), 1.0 / su2double(nDim));
+  if (ratio > limit) {
+    Volume = false;
+    cout << "Volume limit reached!" << endl;
+  }
+  /*--- Evaluate the stretching of the element ---*/
+
+  bool Stretching = true;
+
+  /* unsigned short iNode, iDim;
+   unsigned long jPoint;
+   su2double *Coord_i = fine_grid->nodes->GetCoord(iPoint);
+   su2double max_dist = 0.0 ; su2double min_dist = 1E20;
+   for (iNode = 0; iNode < fine_grid->nodes->GetnPoint(iPoint); iNode ++) {
+   jPoint = fine_grid->nodes->GetPoint(iPoint, iNode);
+   su2double *Coord_j = fine_grid->nodes->GetCoord(jPoint);
+   su2double distance = 0.0;
+   for (iDim = 0; iDim < nDim; iDim++)
+   distance += (Coord_j[iDim]-Coord_i[iDim])*(Coord_j[iDim]-Coord_i[iDim]);
+   distance = sqrt(distance);
+   max_dist = max(distance, max_dist);
+   min_dist = min(distance, min_dist);
+   }
+   if ( max_dist/min_dist > 100.0 ) Stretching = false;*/
+
+  return (Stretching && Volume);
+}
+
+void CMultiGridGeometry::ComputeSurfStraightness(CConfig* config) { CGeometry::ComputeSurfStraightness(config, true); }
+
+bool CMultiGridGeometry::SetBoundAgglomeration(unsigned long CVPoint, vector<short> marker_seed,
+                                               const CGeometry* fine_grid, const CConfig* config) const {
   bool agglomerate_CV = false;
 
   /*--- Basic condition, the point has not been previously agglomerated, it belongs to the domain,
@@ -517,12 +1005,13 @@ bool CMultiGridGeometry::SetBoundAgglomeration(unsigned long CVPoint, short mark
       (GeometricalCheck(CVPoint, fine_grid, config))) {
     /*--- If the point belongs to a boundary, its type must be compatible with the seed marker. ---*/
 
+    int counter = 0;
+    unsigned short copy_marker[3] = {};
+
     if (fine_grid->nodes->GetBoundary(CVPoint)) {
       /*--- Identify the markers of the vertex that we want to agglomerate ---*/
 
       // count number of markers on the agglomeration candidate
-      int counter = 0;
-      unsigned short copy_marker[3] = {};
       for (auto jMarker = 0u; jMarker < fine_grid->GetnMarker() && counter < 3; jMarker++) {
         if (fine_grid->nodes->GetVertex(CVPoint, jMarker) != -1) {
           copy_marker[counter] = jMarker;
@@ -530,94 +1019,80 @@ bool CMultiGridGeometry::SetBoundAgglomeration(unsigned long CVPoint, short mark
         }
       }
 
-      /*--- The basic condition is that the aglomerated vertex must have the same physical marker,
+      /*--- The basic condition is that the agglomerated vertex must have the same physical marker,
        but eventually a send-receive condition ---*/
 
-      /*--- Only one marker in the vertex that is going to be aglomerated ---*/
+      /*--- Only one marker in the vertex that is going to be agglomerated ---*/
 
+      /*--- Valley -> Valley: only if of the same type---*/
       if (counter == 1) {
         /*--- We agglomerate if there is only one marker and it is the same marker as the seed marker ---*/
-        // note that this should be the same marker id, not just the same marker type
-        if (copy_marker[0] == marker_seed) agglomerate_CV = true;
-
-        /*--- If there is only one marker, but the marker is the SEND_RECEIVE ---*/
-
-        if (config->GetMarker_All_KindBC(copy_marker[0]) == SEND_RECEIVE) {
-          agglomerate_CV = true;
+        // So this is the case when in 2D we are on an edge, and in 3D we are in the interior of a surface.
+        // note that this should be the same marker id, not just the same marker type.
+        // also note that the seed point can have 2 markers, one of them may be a send-receive.
+        if ((marker_seed.size() == 1) && (copy_marker[0] == marker_seed[0])) agglomerate_CV = true;
+        if ((marker_seed.size() == 2) && (config->GetMarker_All_KindBC(marker_seed[0]) == SEND_RECEIVE)) {
+          if (copy_marker[0] == marker_seed[1]) {
+            agglomerate_CV = true;
+          }
         }
-
-        if ((config->GetMarker_All_KindBC(marker_seed) == SYMMETRY_PLANE) ||
-            (config->GetMarker_All_KindBC(marker_seed) == EULER_WALL)) {
-          if (config->GetMarker_All_KindBC(copy_marker[0]) == SEND_RECEIVE) {
-            agglomerate_CV = false;
+        if ((marker_seed.size() == 2) && (config->GetMarker_All_KindBC(marker_seed[1]) == SEND_RECEIVE)) {
+          if (copy_marker[0] == marker_seed[0]) {
+            agglomerate_CV = true;
           }
         }
+
+        /*--- Note: If there is only one marker, but the marker is the SEND_RECEIVE, then the point is actually an
+              interior point and we do not agglomerate.  ---*/
+
+        // if ((config->GetMarker_All_KindBC(marker_seed[0]) == SYMMETRY_PLANE) ||
+        //     (config->GetMarker_All_KindBC(marker_seed[0]) == EULER_WALL)) {
+        //   if (config->GetMarker_All_KindBC(copy_marker[0]) == SEND_RECEIVE) {
+        //     agglomerate_CV = false;
+        //   }
+        // }
       }
 
       /*--- If there are two markers in the vertex that is going to be aglomerated ---*/
 
       if (counter == 2) {
-        /*--- First we verify that the seed is a physical boundary ---*/
-
-        if (config->GetMarker_All_KindBC(marker_seed) != SEND_RECEIVE) {
-          /*--- Then we check that one of the markers is equal to the seed marker, and the other is send/receive ---*/
+        /*--- Both markers have to be the same. ---*/
 
-          if (((copy_marker[0] == marker_seed) && (config->GetMarker_All_KindBC(copy_marker[1]) == SEND_RECEIVE)) ||
-              ((config->GetMarker_All_KindBC(copy_marker[0]) == SEND_RECEIVE) && (copy_marker[1] == marker_seed))) {
+        if (marker_seed.size() == 2) {
+          if (((copy_marker[0] == marker_seed[0]) && (copy_marker[1] == marker_seed[1])) ||
+              ((copy_marker[0] == marker_seed[1]) && (copy_marker[1] == marker_seed[0]))) {
             agglomerate_CV = true;
           }
         }
       }
     }
-    /*--- If the element belongs to the domain, it is always agglomerated. ---*/
+    /*--- If the element belongs to the domain, it is never agglomerated with a boundary node. ---*/
     else {
-      agglomerate_CV = true;
+      agglomerate_CV = false;
 
       // actually, for symmetry (and possibly other cells) we only agglomerate cells that are on the marker
       // at this point, the seed was on the boundary and the CV was not. so we check if the seed is a symmetry
-      if ((config->GetMarker_All_KindBC(marker_seed) == SYMMETRY_PLANE) ||
-          (config->GetMarker_All_KindBC(marker_seed) == EULER_WALL)) {
-        agglomerate_CV = false;
-      }
+      // if ((config->GetMarker_All_KindBC(marker_seed[0]) == SYMMETRY_PLANE) ||
+      //     (config->GetMarker_All_KindBC(marker_seed[0]) == EULER_WALL)) {
+      //   agglomerate_CV = false;
+      // }
     }
+
+    // /*--- Check for curved EULER_WALL ---*/
+    // if (agglomerate_CV && fine_grid->nodes->GetBoundary(CVPoint)) {
+    //   for (int i = 0; i < counter; i++) {
+    //     if (config->GetMarker_All_KindBC(copy_marker[i]) == EULER_WALL && !boundIsStraight[copy_marker[i]]) {
+    //       agglomerate_CV = false;
+    //       break;
+    //     }
+    //   }
+    // }
   }
 
   return agglomerate_CV;
 }
 
-bool CMultiGridGeometry::GeometricalCheck(unsigned long iPoint, const CGeometry* fine_grid,
-                                          const CConfig* config) const {
-  su2double max_dimension = 1.2;
-
-  /*--- Evaluate the total size of the element ---*/
-
-  bool Volume = true;
-  su2double ratio = pow(fine_grid->nodes->GetVolume(iPoint), 1.0 / su2double(nDim)) * max_dimension;
-  su2double limit = pow(config->GetDomainVolume(), 1.0 / su2double(nDim));
-  if (ratio > limit) Volume = false;
-
-  /*--- Evaluate the stretching of the element ---*/
-
-  bool Stretching = true;
-
-  /* unsigned short iNode, iDim;
-   unsigned long jPoint;
-   su2double *Coord_i = fine_grid->nodes->GetCoord(iPoint);
-   su2double max_dist = 0.0 ; su2double min_dist = 1E20;
-   for (iNode = 0; iNode < fine_grid->nodes->GetnPoint(iPoint); iNode ++) {
-   jPoint = fine_grid->nodes->GetPoint(iPoint, iNode);
-   su2double *Coord_j = fine_grid->nodes->GetCoord(jPoint);
-   su2double distance = 0.0;
-   for (iDim = 0; iDim < nDim; iDim++)
-   distance += (Coord_j[iDim]-Coord_i[iDim])*(Coord_j[iDim]-Coord_i[iDim]);
-   distance = sqrt(distance);
-   max_dist = max(distance, max_dist);
-   min_dist = min(distance, min_dist);
-   }
-   if ( max_dist/min_dist > 100.0 ) Stretching = false;*/
-
-  return (Stretching && Volume);
-}
+/*--- ---*/
 
 void CMultiGridGeometry::SetSuitableNeighbors(vector<unsigned long>& Suitable_Indirect_Neighbors, unsigned long iPoint,
                                               unsigned long Index_CoarseCV, const CGeometry* fine_grid) const {
@@ -637,8 +1112,8 @@ void CMultiGridGeometry::SetSuitableNeighbors(vector<unsigned long>& Suitable_In
 
       auto end = First_Neighbor_Points.end();
       if (find(First_Neighbor_Points.begin(), end, kPoint) == end) {
-        Second_Neighbor_Points.push_back(kPoint);
-        Second_Origin_Points.push_back(jPoint);
+        Second_Neighbor_Points.push_back(kPoint);  // neighbor of a neighbor, not connected to original ipoint
+        Second_Origin_Points.push_back(jPoint);    // the neighbor that is connected to ipoint
       }
     }
   }
@@ -673,48 +1148,364 @@ void CMultiGridGeometry::SetSuitableNeighbors(vector<unsigned long>& Suitable_In
 
   /// TODO: This repeats the process above but I doubt it catches any more points.
 
-  vector<unsigned long> Third_Neighbor_Points, Third_Origin_Points;
+  // vector<unsigned long> Third_Neighbor_Points, Third_Origin_Points;
 
-  for (auto kPoint : Suitable_Second_Neighbors) {
-    for (auto lPoint : fine_grid->nodes->GetPoints(kPoint)) {
-      /*--- Check that the third neighbor does not belong to the first neighbors or the seed ---*/
+  // for (auto kPoint : Suitable_Second_Neighbors) {
+  //   for (auto lPoint : fine_grid->nodes->GetPoints(kPoint)) {
+  //     /*--- Check that the third neighbor does not belong to the first neighbors or the seed ---*/
 
-      auto end1 = First_Neighbor_Points.end();
-      if (find(First_Neighbor_Points.begin(), end1, lPoint) != end1) continue;
+  //     auto end1 = First_Neighbor_Points.end();
+  //     if (find(First_Neighbor_Points.begin(), end1, lPoint) != end1) continue;
 
-      /*--- Check that the third neighbor does not belong to the second neighbors ---*/
+  //     /*--- Check that the third neighbor does not belong to the second neighbors ---*/
 
-      auto end2 = Suitable_Second_Neighbors.end();
-      if (find(Suitable_Second_Neighbors.begin(), end2, lPoint) != end2) continue;
+  //     auto end2 = Suitable_Second_Neighbors.end();
+  //     if (find(Suitable_Second_Neighbors.begin(), end2, lPoint) != end2) continue;
 
-      Third_Neighbor_Points.push_back(lPoint);
-      Third_Origin_Points.push_back(kPoint);
-    }
-  }
+  //     Third_Neighbor_Points.push_back(lPoint);
+  //     Third_Origin_Points.push_back(kPoint);
+  //   }
+  // }
 
   /*--- Identify those third neighbors that are repeated (candidate to be added). ---*/
 
-  for (auto iNeighbor = 0ul; iNeighbor < Third_Neighbor_Points.size(); iNeighbor++) {
-    for (auto jNeighbor = iNeighbor + 1; jNeighbor < Third_Neighbor_Points.size(); jNeighbor++) {
-      /*--- Repeated third neighbor with different origin ---*/
+  // for (auto iNeighbor = 0ul; iNeighbor < Third_Neighbor_Points.size(); iNeighbor++) {
+  //   for (auto jNeighbor = iNeighbor + 1; jNeighbor < Third_Neighbor_Points.size(); jNeighbor++) {
+  //     /*--- Repeated third neighbor with different origin ---*/
+
+  //     if ((Third_Neighbor_Points[iNeighbor] == Third_Neighbor_Points[jNeighbor]) &&
+  //         (Third_Origin_Points[iNeighbor] != Third_Origin_Points[jNeighbor])) {
+  //       Suitable_Indirect_Neighbors.push_back(Third_Neighbor_Points[iNeighbor]);
+  //     }
+  //   }
+  // }
+
+  /*--- Remove duplicates from the final list of Suitable Indirect Neighbors. ---*/
+
+  // sort(Suitable_Indirect_Neighbors.begin(), Suitable_Indirect_Neighbors.end());
+  // auto it2 = unique(Suitable_Indirect_Neighbors.begin(), Suitable_Indirect_Neighbors.end());
+  // Suitable_Indirect_Neighbors.resize(it2 - Suitable_Indirect_Neighbors.begin());
+}
 
-      if ((Third_Neighbor_Points[iNeighbor] == Third_Neighbor_Points[jNeighbor]) &&
-          (Third_Origin_Points[iNeighbor] != Third_Origin_Points[jNeighbor])) {
-        Suitable_Indirect_Neighbors.push_back(Third_Neighbor_Points[iNeighbor]);
+void CMultiGridGeometry::AgglomerateImplicitLines(unsigned long& Index_CoarseCV, const CGeometry* fine_grid,
+                                                  const CConfig* config, CMultiGridQueue& MGQueue_InnerCV) {
+  /*--- Parameters ---*/
+  const su2double angle_threshold_deg = 20.0; /* stop line if direction deviates more than this */
+  const su2double pi = acos(-1.0);
+  const su2double cos_threshold = cos(angle_threshold_deg * pi / 180.0);
+  const unsigned long max_line_length = 20; /* maximum number of nodes on implicit line (including wall) */
+
+  const unsigned long nPoint = fine_grid->GetnPoint();
+  vector<char> reserved(nPoint, 0);
+  const bool debug = config->GetMG_Implicit_Debug();
+  if (debug)
+    std::cout << "[MG_IMPLICIT] Starting AgglomerateImplicitLines: nPoint=" << nPoint
+              << " angle_thresh=" << angle_threshold_deg << " max_line_length=" << max_line_length << std::endl;
+
+  /*--- Collect implicit lines starting at wall vertices ---*/
+  vector<vector<unsigned long>> lines;  // each line: [W, n1, n2, ...]
+  vector<unsigned long> wall_nodes;     // wall node for each line (index into lines)
+  for (auto iMarker = 0u; iMarker < fine_grid->GetnMarker(); iMarker++) {
+    /* skip non-wall markers (keep same condition as FindNormal_Neighbor) */
+    if (config->GetMarker_All_KindBC(iMarker) == SEND_RECEIVE ||
+        config->GetMarker_All_KindBC(iMarker) == INTERNAL_BOUNDARY ||
+        config->GetMarker_All_KindBC(iMarker) == NEARFIELD_BOUNDARY)
+      continue;
+    cout << "We are on marker with name " << config->GetMarker_CfgFile_TagBound(iMarker) << " and kind "
+         << config->GetMarker_All_KindBC(iMarker) << endl;
+
+    for (auto iVertex = 0ul; iVertex < fine_grid->GetnVertex(iMarker); iVertex++) {
+      const auto iPoint = fine_grid->vertex[iMarker][iVertex]->GetNode();
+      cout << "node number " << iPoint << endl;
+      ///* skip already agglomerated or non-domain points */
+      // if (fine_grid->nodes->GetAgglomerate(iPoint) || !fine_grid->nodes->GetDomain(iPoint)) continue;
+
+      /* get normal at the vertex; if not available skip */
+      const long ChildVertex = fine_grid->nodes->GetVertex(iPoint, iMarker);
+      if (ChildVertex == -1) continue;
+      su2double Normal[MAXNDIM] = {0.0};
+      fine_grid->vertex[iMarker][ChildVertex]->GetNormal(Normal);
+
+      /* start building the implicit line */
+      vector<unsigned long> L;
+      L.push_back(iPoint);
+
+      /* prev direction: use vertex normal (assumed inward) */
+      vector<su2double> prev_dir(nDim, 0.0);
+      for (unsigned i = 0; i < nDim; ++i) prev_dir[i] = Normal[i];
+      su2double norm_prev = GeometryToolbox::Norm(nDim, prev_dir.data());
+      if (norm_prev == 0.0) continue;
+      for (unsigned i = 0; i < nDim; ++i) prev_dir[i] /= norm_prev;
+
+      unsigned long current = iPoint;
+      while (L.size() < max_line_length) {
+        /* find best interior neighbor aligned with prev_dir */
+        su2double best_dot = -2.0;
+        unsigned long best_neighbor = ULONG_MAX;
+        for (auto jPoint : fine_grid->nodes->GetPoints(current)) {
+          if (jPoint == current) continue;
+          if (fine_grid->nodes->GetAgglomerate(jPoint)) {
+            if (debug) {
+              const auto parent = fine_grid->nodes->GetParent_CV(jPoint);
+              unsigned short nchild = 0;
+              if (parent != ULONG_MAX) nchild = nodes->GetnChildren_CV(parent);
+              std::cout << "[MG_IMPLICIT] Neighbor " << jPoint << " already agglomerated; parent=" << parent
+                        << " nChildren=" << nchild << " children: ";
+              if (parent != ULONG_MAX) {
+                for (unsigned short ci = 0; ci < nchild; ++ci) {
+                  std::cout << nodes->GetChildren_CV(parent, ci) << " ";
+                }
+              }
+              std::cout << std::endl;
+
+              /*--- If parent has exactly two children, try to identify wall child and other child ---*/
+              if (parent != ULONG_MAX && nchild == 2) {
+                unsigned long wall_child = ULONG_MAX;
+                unsigned long other_child = ULONG_MAX;
+                for (unsigned short ci = 0; ci < nchild; ++ci) {
+                  const auto ch = nodes->GetChildren_CV(parent, ci);
+                  if (fine_grid->nodes->GetBoundary(ch))
+                    wall_child = ch;
+                  else
+                    other_child = ch;
+                }
+                if (wall_child != ULONG_MAX && other_child != ULONG_MAX) {
+                  std::cout << "[MG_IMPLICIT] node " << wall_child << " was agglomerated with node " << other_child
+                            << ", node " << wall_child << " has " << nchild << " children" << std::endl;
+                } else {
+                  std::cout << "[MG_IMPLICIT] parent " << parent << " children do not match expected (wall + child)"
+                            << std::endl;
+                }
+              }
+            }
+          } else {
+            cout << "neighbor " << jPoint << " not agglomerated!!!!!!!!!!!!!!!!!!!" << endl;
+          }
+          if (!fine_grid->nodes->GetDomain(jPoint)) continue;
+          if (fine_grid->nodes->GetBoundary(jPoint)) continue; /* avoid boundary nodes */
+
+          /* compute direction */
+          su2double vec[MAXNDIM] = {0.0};
+          su2double len = 0.0;
+          for (auto d = 0u; d < nDim; ++d) {
+            vec[d] = fine_grid->nodes->GetCoord(jPoint, d) - fine_grid->nodes->GetCoord(current, d);
+            len += vec[d] * vec[d];
+          }
+          if (len <= 0.0) continue;
+          len = sqrt(len);
+          for (auto d = 0u; d < nDim; ++d) vec[d] /= len;
+
+          /* alignment with prev_dir */
+          su2double dot = 0.0;
+          for (auto d = 0u; d < nDim; ++d) dot += vec[d] * prev_dir[d];
+          if (dot > best_dot) {
+            best_dot = dot;
+            best_neighbor = jPoint;
+          }
+        }
+
+        if (best_neighbor == ULONG_MAX) break;
+        if (best_dot < cos_threshold) break; /* deviation too large */
+
+        /* append neighbor */
+        L.push_back(best_neighbor);
+
+        /* update prev_dir */
+        su2double dir_new[MAXNDIM] = {0.0};
+        su2double len_new = 0.0;
+        for (auto d = 0u; d < nDim; ++d) {
+          dir_new[d] = fine_grid->nodes->GetCoord(best_neighbor, d) - fine_grid->nodes->GetCoord(current, d);
+          len_new += dir_new[d] * dir_new[d];
+        }
+        if (len_new <= 0.0) break;
+        len_new = sqrt(len_new);
+        for (auto d = 0u; d < nDim; ++d) prev_dir[d] = dir_new[d] / len_new;
+
+        current = best_neighbor;
+      }
+
+      /* accept line only if it contains at least two interior nodes (i.e., length >= 3 incl. wall) */
+      if (L.size() >= 3) {
+        if (debug) {
+          std::cout << "[MG_IMPLICIT] Found implicit line starting at wall " << iPoint << " : ";
+          for (auto pid : L) {
+            std::cout << pid << " ";
+          }
+          std::cout << " (len=" << L.size() << ")\n";
+        }
+        lines.push_back(L);
+        wall_nodes.push_back(iPoint);
       }
     }
   }
 
-  /*--- Remove duplicates from the final list of Suitable Indirect Neighbors. ---*/
+  if (lines.empty()) return;
+
+  /*--- Advancing-front greedy pairing with cross-line merging:
+       For each pair position (first+second, then third+fourth...), attempt to
+       merge pairs across lines when their wall seeds share the same parent.
+       If no neighbor-line match exists, create a 2-child coarse CV for the pair. ---*/
+  unsigned pair_idx = 0;
+  bool done_stage = false;
+  while (!done_stage) {
+    done_stage = true;
+
+    // map wall parent -> list of line indices that start with a wall node having that parent
+    unordered_map<unsigned long, vector<unsigned long>> parent_to_lines;
+    parent_to_lines.reserve(lines.size() * 2);
+    for (unsigned long li = 0; li < lines.size(); ++li) {
+      const auto& L = lines[li];
+      if (L.empty()) continue;
+      const auto W = L[0];
+      const auto pW = fine_grid->nodes->GetParent_CV(W);
+      parent_to_lines[pW].push_back(li);
+    }
+
+    // track which lines had a pair processed this stage
+    vector<char> line_processed(lines.size(), 0);
+
+    // First, attempt cross-line merges for parents that have >1 wall-line
+    for (auto& entry : parent_to_lines) {
+      const auto& line_ids = entry.second;
+      if (line_ids.size() < 2) continue;
+
+      // pair up lines in consecutive order (0 with 1, 2 with 3, ...)
+      for (size_t k = 0; k + 1 < line_ids.size(); k += 2) {
+        const auto li1 = line_ids[k];
+        const auto li2 = line_ids[k + 1];
+        if (line_processed[li1] || line_processed[li2]) continue;
+
+        const auto& L1 = lines[li1];
+        const auto& L2 = lines[li2];
+        const auto idx1 = 1 + 2 * pair_idx;
+        const auto idx2 = idx1 + 1;
+        if (L1.size() <= idx2 || L2.size() <= idx2) continue;  // both must have the pair at this stage
+
+        const auto a = L1[idx1];
+        const auto b = L1[idx2];
+        const auto c = L2[idx1];
+        const auto d = L2[idx2];
+
+        // skip if any already agglomerated or reserved
+        if (fine_grid->nodes->GetAgglomerate(a) || fine_grid->nodes->GetAgglomerate(b) ||
+            fine_grid->nodes->GetAgglomerate(c) || fine_grid->nodes->GetAgglomerate(d))
+          continue;
+        if (reserved[a] || reserved[b] || reserved[c] || reserved[d]) continue;
+
+        // geometrical checks for all
+        if (!GeometricalCheck(a, fine_grid, config) || !GeometricalCheck(b, fine_grid, config) ||
+            !GeometricalCheck(c, fine_grid, config) || !GeometricalCheck(d, fine_grid, config))
+          continue;
+
+        // Check for duplicate indices — guard against merging the same node twice.
+        if (a == b || a == c || a == d || b == c || b == d || c == d) {
+          if (debug) {
+            std::cout << "[MG_IMPLICIT] Duplicate indices detected in CROSS-LINE merge at stage " << pair_idx
+                      << " lines " << li1 << "," << li2 << " : indices = " << a << "," << b << "," << c << "," << d
+                      << "\n";
+            std::cout << "[MG_IMPLICIT] Parents: " << fine_grid->nodes->GetParent_CV(a) << ","
+                      << fine_grid->nodes->GetParent_CV(b) << "," << fine_grid->nodes->GetParent_CV(c) << ","
+                      << fine_grid->nodes->GetParent_CV(d) << "\n";
+            std::cout << "[MG_IMPLICIT] Reserved flags: " << reserved[a] << "," << reserved[b] << "," << reserved[c]
+                      << "," << reserved[d] << "\n";
+          } else {
+            std::cout
+                << "[MG_IMPLICIT] Duplicate indices detected in CROSS-LINE merge; skipping merge for Index_CoarseCV="
+                << Index_CoarseCV << "\n";
+          }
+
+          // Stop implicit-line agglomeration for other lines that share this parent
+          // so only one wall-line per parent is processed (prevents later conflicts).
+          for (auto other_li : entry.second) line_processed[other_li] = 1;
 
-  sort(Suitable_Indirect_Neighbors.begin(), Suitable_Indirect_Neighbors.end());
-  auto it2 = unique(Suitable_Indirect_Neighbors.begin(), Suitable_Indirect_Neighbors.end());
-  Suitable_Indirect_Neighbors.resize(it2 - Suitable_Indirect_Neighbors.begin());
+          continue;
+        }
+
+        // create a new coarse control volume merging {a,b,c,d}
+        if (debug) {
+          std::cout << "[MG_IMPLICIT] Stage " << pair_idx << ": CROSS-LINE merge creating coarse CV " << Index_CoarseCV
+                    << " from points ";
+          std::cout << a << "," << b << "," << c << "," << d << std::endl;
+        }
+        fine_grid->nodes->SetParent_CV(a, Index_CoarseCV);
+        nodes->SetChildren_CV(Index_CoarseCV, 0, a);
+        fine_grid->nodes->SetParent_CV(b, Index_CoarseCV);
+        nodes->SetChildren_CV(Index_CoarseCV, 1, b);
+        fine_grid->nodes->SetParent_CV(c, Index_CoarseCV);
+        nodes->SetChildren_CV(Index_CoarseCV, 2, c);
+        fine_grid->nodes->SetParent_CV(d, Index_CoarseCV);
+        nodes->SetChildren_CV(Index_CoarseCV, 3, d);
+        nodes->SetnChildren_CV(Index_CoarseCV, 4);
+
+        reserved[a] = reserved[b] = reserved[c] = reserved[d] = 1;
+        MGQueue_InnerCV.RemoveCV(a);
+        MGQueue_InnerCV.RemoveCV(b);
+        MGQueue_InnerCV.RemoveCV(c);
+        MGQueue_InnerCV.RemoveCV(d);
+
+        Index_CoarseCV++;
+        line_processed[li1] = line_processed[li2] = 1;
+        // mark all other lines sharing this parent as processed (stop their implicit agglomeration)
+        for (auto other_li : line_ids)
+          if (other_li != li1 && other_li != li2) line_processed[other_li] = 1;
+        done_stage = false;
+      }
+    }
+
+    // Second, for remaining lines, create 2-child coarse CVs for that stage
+    for (unsigned long li = 0; li < lines.size(); ++li) {
+      if (line_processed[li]) continue;
+      const auto& L = lines[li];
+      const auto idx1 = 1 + 2 * pair_idx;
+      const auto idx2 = idx1 + 1;
+      if (L.size() <= idx2) continue;  // no pair at this stage
+
+      const auto a = L[idx1];
+      const auto b = L[idx2];
+      if (fine_grid->nodes->GetAgglomerate(a) || fine_grid->nodes->GetAgglomerate(b)) continue;
+      if (reserved[a] || reserved[b]) continue;
+      if (!GeometricalCheck(a, fine_grid, config) || !GeometricalCheck(b, fine_grid, config)) continue;
+
+      if (debug) {
+        std::cout << "[MG_IMPLICIT] Stage " << pair_idx << ": 2-child merge creating coarse CV " << Index_CoarseCV
+                  << " from points " << a << "," << b << std::endl;
+      }
+
+      // create 2-child coarse CV
+      fine_grid->nodes->SetParent_CV(a, Index_CoarseCV);
+      nodes->SetChildren_CV(Index_CoarseCV, 0, a);
+      fine_grid->nodes->SetParent_CV(b, Index_CoarseCV);
+      nodes->SetChildren_CV(Index_CoarseCV, 1, b);
+      nodes->SetnChildren_CV(Index_CoarseCV, 2);
+
+      reserved[a] = reserved[b] = 1;
+      MGQueue_InnerCV.RemoveCV(a);
+      MGQueue_InnerCV.RemoveCV(b);
+
+      Index_CoarseCV++;
+      done_stage = false;
+    }
+
+    pair_idx++;
+    // stop when no more pairs available at this stage across any line
+    bool any_more = false;
+    for (const auto& L : lines) {
+      if (L.size() > 1 + 2 * pair_idx) {
+        any_more = true;
+        break;
+      }
+    }
+    if (!any_more) break;
+  }
+
+  /* NOTE: cross-line / neighbor-line merging (when wall nodes were agglomerated together)
+     is not implemented here yet. This function implements the advancing-front greedy
+     pairing along implicit lines as a first pass. */
 }
 
 void CMultiGridGeometry::SetPoint_Connectivity(const CGeometry* fine_grid) {
   /*--- Temporary, CPoint (nodes) then compresses this structure. ---*/
-  vector<vector<unsigned long> > points(nPoint);
+  vector<vector<unsigned long>> points(nPoint);
 
   for (auto iCoarsePoint = 0ul; iCoarsePoint < nPoint; iCoarsePoint++) {
     /*--- For each child CV (of the fine grid), ---*/
@@ -1089,3 +1880,168 @@ void CMultiGridGeometry::FindNormal_Neighbor(const CConfig* config) {
     }
   }
 }
+
+su2double CMultiGridGeometry::ComputeLocalCurvature(const CGeometry* fine_grid, unsigned long iPoint,
+                                                    unsigned short iMarker) const {
+  /*--- Compute local curvature (maximum angle between adjacent face normals) at a boundary vertex.
+        This is used to determine if agglomeration is safe based on a curvature threshold. ---*/
+
+  /*--- Get the vertex index for this point on this marker ---*/
+  long iVertex = fine_grid->nodes->GetVertex(iPoint, iMarker);
+  if (iVertex < 0) return 0.0;  // Point not on this marker
+
+  /*--- Get the normal at this vertex ---*/
+  su2double Normal_i[MAXNDIM] = {0.0};
+  fine_grid->vertex[iMarker][iVertex]->GetNormal(Normal_i);
+  su2double Area_i = GeometryToolbox::Norm(int(nDim), Normal_i);
+
+  if (Area_i < 1e-12) return 0.0;  // Skip degenerate vertices
+
+  /*--- Normalize the normal ---*/
+  for (unsigned short iDim = 0; iDim < nDim; iDim++) {
+    Normal_i[iDim] /= Area_i;
+  }
+
+  /*--- Find maximum angle with neighboring vertices on the same marker ---*/
+  su2double max_angle = 0.0;
+
+  /*--- Loop over edges connected to this point ---*/
+  for (unsigned short iEdge = 0; iEdge < fine_grid->nodes->GetnPoint(iPoint); iEdge++) {
+    unsigned long jPoint = fine_grid->nodes->GetPoint(iPoint, iEdge);
+
+    /*--- Check if neighbor is also on this marker ---*/
+    long jVertex = fine_grid->nodes->GetVertex(jPoint, iMarker);
+    if (jVertex < 0) continue;  // Not on this marker
+
+    /*--- Get normal at neighbor vertex ---*/
+    su2double Normal_j[MAXNDIM] = {0.0};
+    fine_grid->vertex[iMarker][jVertex]->GetNormal(Normal_j);
+    su2double Area_j = GeometryToolbox::Norm(int(nDim), Normal_j);
+
+    if (Area_j < 1e-12) continue;  // Skip degenerate neighbor
+
+    /*--- Normalize the neighbor normal ---*/
+    for (unsigned short iDim = 0; iDim < nDim; iDim++) {
+      Normal_j[iDim] /= Area_j;
+    }
+
+    /*--- Compute dot product: cos(angle) = n_i · n_j ---*/
+    su2double dot_product = 0.0;
+    for (unsigned short iDim = 0; iDim < nDim; iDim++) {
+      dot_product += Normal_i[iDim] * Normal_j[iDim];
+    }
+
+    /*--- Clamp to [-1, 1] to avoid numerical issues with acos ---*/
+    dot_product = max(-1.0, min(1.0, dot_product));
+
+    /*--- Compute angle in degrees ---*/
+    su2double angle_rad = acos(dot_product);
+    su2double angle_deg = angle_rad * 180.0 / PI_NUMBER;
+
+    /*--- Track maximum angle ---*/
+    max_angle = max(max_angle, angle_deg);
+  }
+
+  return max_angle;
+}
+
+void CMultiGridGeometry::ValidateHaloCoordinates(const CConfig* config, unsigned short iMesh) const {
+#ifdef HAVE_MPI
+
+  int size = SU2_MPI::GetSize();
+  int rank = SU2_MPI::GetRank();
+
+  if (size == SINGLE_NODE || !config->GetMG_DebugHaloCoordinates()) {
+    return;  // Skip if single-node or debug option disabled
+  }
+
+  if (rank == MASTER_NODE) {
+    cout << "\n--- MG Halo Coordinate Validation (Level " << iMesh << ") ---" << endl;
+  }
+
+  /*--- For each SEND_RECEIVE marker pair, exchange coordinates and validate ---*/
+  for (auto iMarker = 0u; iMarker < config->GetnMarker_All(); iMarker++) {
+    if ((config->GetMarker_All_KindBC(iMarker) == SEND_RECEIVE) && (config->GetMarker_All_SendRecv(iMarker) > 0)) {
+      const auto MarkerS = iMarker;
+      const auto MarkerR = iMarker + 1;
+
+      const auto send_to = config->GetMarker_All_SendRecv(MarkerS) - 1;
+      const auto receive_from = abs(config->GetMarker_All_SendRecv(MarkerR)) - 1;
+
+      const auto nVertexS = nVertex[MarkerS];
+      const auto nVertexR = nVertex[MarkerR];
+
+      /*--- Allocate buffers for coordinate exchange ---*/
+      vector<su2double> Buffer_Send_Coord(nVertexS * nDim);
+      vector<su2double> Buffer_Receive_Coord(nVertexR * nDim);
+
+      /*--- Pack SEND coordinates (domain CVs being sent) ---*/
+      for (auto iVertex = 0ul; iVertex < nVertexS; iVertex++) {
+        const auto iPoint = vertex[MarkerS][iVertex]->GetNode();
+        const auto* Coord = nodes->GetCoord(iPoint);
+        for (auto iDim = 0u; iDim < nDim; iDim++) {
+          Buffer_Send_Coord[iVertex * nDim + iDim] = Coord[iDim];
+        }
+      }
+
+      /*--- Exchange coordinates ---*/
+      SU2_MPI::Sendrecv(Buffer_Send_Coord.data(), nVertexS * nDim, MPI_DOUBLE, send_to, 0,
+                        Buffer_Receive_Coord.data(), nVertexR * nDim, MPI_DOUBLE, receive_from, 0,
+                        SU2_MPI::GetComm(), MPI_STATUS_IGNORE);
+
+      /*--- Validate RECEIVE coordinates against local halo CVs ---*/
+      unsigned long nMismatch = 0;
+      su2double maxError = 0.0;
+      su2double tolerance = 1e-10;
+
+      for (auto iVertex = 0ul; iVertex < nVertexR; iVertex++) {
+        const auto iPoint = vertex[MarkerR][iVertex]->GetNode();
+        const auto* Coord_Local = nodes->GetCoord(iPoint);
+
+        su2double error = 0.0;
+        for (auto iDim = 0u; iDim < nDim; iDim++) {
+          su2double coord_remote = Buffer_Receive_Coord[iVertex * nDim + iDim];
+          su2double diff = fabs(Coord_Local[iDim] - coord_remote);
+          error += diff * diff;
+        }
+        error = sqrt(error);
+
+        if (error > tolerance) {
+          nMismatch++;
+          maxError = max(maxError, error);
+
+          if (nMismatch <= 5) {  // Only print first 5 mismatches
+            cout << "COORD MISMATCH [Rank " << rank << ", Marker " << MarkerS << "/" << MarkerR
+                 << ", Vertex " << iVertex << ", Point " << iPoint << "]: ";
+            cout << "Local=(";
+            for (auto iDim = 0u; iDim < nDim; iDim++) {
+              cout << Coord_Local[iDim];
+              if (iDim < nDim - 1) cout << ", ";
+            }
+            cout << "), Remote=(";
+            for (auto iDim = 0u; iDim < nDim; iDim++) {
+              cout << Buffer_Receive_Coord[iVertex * nDim + iDim];
+              if (iDim < nDim - 1) cout << ", ";
+            }
+            cout << "), Error=" << error << endl;
+          }
+        }
+      }
+
+      if (nMismatch > 0) {
+        cout << "WARNING [Rank " << rank << ", Marker " << MarkerS << "/" << MarkerR
+             << "]: " << nMismatch << " coordinate mismatches detected (max error: "
+             << maxError << ")" << endl;
+      } else if (nVertexR > 0) {
+        cout << "INFO [Rank " << rank << ", Marker " << MarkerS << "/" << MarkerR
+             << "]: All " << nVertexR << " halo CV coordinates match (tol=" << tolerance << ")" << endl;
+      }
+    }
+  }
+
+  if (rank == MASTER_NODE) {
+    cout << "--- End MG Halo Coordinate Validation ---\n" << endl;
+  }
+
+#endif  // HAVE_MPI
+}
diff --git a/Common/src/geometry/dual_grid/CPoint.cpp b/Common/src/geometry/dual_grid/CPoint.cpp
index 2d234b88c5e6..0851de19f6eb 100644
--- a/Common/src/geometry/dual_grid/CPoint.cpp
+++ b/Common/src/geometry/dual_grid/CPoint.cpp
@@ -25,6 +25,7 @@
  * License along with SU2. If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <climits>
 #include "../../../include/geometry/dual_grid/CPoint.hpp"
 #include "../../../include/CConfig.hpp"
 #include "../../../include/parallelization/omp_structure.hpp"
@@ -73,7 +74,7 @@ void CPoint::FullAllocation(unsigned short imesh, const CConfig* config) {
 
   /*--- Multigrid structures. ---*/
   if (config->GetnMGLevels() > 0) {
-    Parent_CV.resize(npoint) = 0;
+    Parent_CV.resize(npoint) = ULONG_MAX;  // Initialize to ULONG_MAX to indicate unagglomerated
     Agglomerate.resize(npoint) = false;
     Agglomerate_Indirect.resize(npoint) = false;
     /*--- The finest grid does not have children CV's. ---*/
diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp
index 4414645385ad..58dcc676e9a1 100644
--- a/Common/src/linear_algebra/CSysMatrix.cpp
+++ b/Common/src/linear_algebra/CSysMatrix.cpp
@@ -847,6 +847,7 @@ void CSysMatrix<ScalarType>::ComputeLU_SGSPreconditioner(const CSysVector<Scalar
 
     for (auto iPoint = begin; iPoint < end; ++iPoint) {
       auto idx = iPoint * nVar;
+
       LowerProduct(prod, iPoint, begin, low_prod);         // Compute L.x*
       VectorSubtraction(&vec[idx], low_prod, &prod[idx]);  // Compute y = b - L.x*
       Gauss_Elimination(iPoint, &prod[idx]);               // Solve D.x* = y
@@ -873,6 +874,7 @@ void CSysMatrix<ScalarType>::ComputeLU_SGSPreconditioner(const CSysVector<Scalar
     for (auto iPoint = row_end; iPoint > begin;) {
       iPoint--;  // because of unsigned type
       auto idx = iPoint * nVar;
+
       DiagonalProduct(prod, iPoint, dia_prod);           // Compute D.x*
       UpperProduct(prod, iPoint, row_end, up_prod);      // Compute U.x_(n+1)
       VectorSubtraction(dia_prod, up_prod, &prod[idx]);  // Compute y = D.x*-U.x_(n+1)
diff --git a/QuickStart/inv_NACA0012.cfg b/QuickStart/inv_NACA0012.cfg
index f0859e250869..5507dfacc849 100644
--- a/QuickStart/inv_NACA0012.cfg
+++ b/QuickStart/inv_NACA0012.cfg
@@ -105,7 +105,7 @@ CFL_ADAPT= NO
 CFL_ADAPT_PARAM= ( 0.1, 2.0, 10.0, 1e10 )
 %
 % Number of total iterations
-ITER= 250
+ITER= 5
 
 % ------------------------ LINEAR SOLVER DEFINITION ---------------------------%
 %
@@ -136,13 +136,22 @@ MG_PRE_SMOOTH= ( 1, 2, 3, 3 )
 MG_POST_SMOOTH= ( 0, 0, 0, 0 )
 %
 % Jacobi implicit smoothing of the correction
-MG_CORRECTION_SMOOTH= ( 0, 0, 0, 0 )
+MG_CORRECTION_SMOOTH= ( 0, 3, 3, 3 )
 %
 % Damping factor for the residual restriction
 MG_DAMP_RESTRICTION= 1.0
 %
 % Damping factor for the correction prolongation
 MG_DAMP_PROLONGATION= 1.0
+%
+% Enable early exit for multigrid smoothing iterations
+MG_SMOOTH_EARLY_EXIT= YES
+%
+% RMS residual threshold for early exit (fraction of initial RMS)
+MG_SMOOTH_RES_THRESHOLD= 0.99
+%
+% Enable output of RMS residual during smoothing iterations
+MG_SMOOTH_OUTPUT= YES
 
 % -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------%
 %
diff --git a/SU2_CFD/include/integration/CMultiGridIntegration.hpp b/SU2_CFD/include/integration/CMultiGridIntegration.hpp
index d44eb4c30927..07a4e61c07f5 100644
--- a/SU2_CFD/include/integration/CMultiGridIntegration.hpp
+++ b/SU2_CFD/include/integration/CMultiGridIntegration.hpp
@@ -68,7 +68,8 @@ class CMultiGridIntegration final : public CIntegration {
   void MultiGrid_Cycle(CGeometry ****geometry, CSolver *****solver_container,
                        CNumerics ******numerics_container, CConfig **config,
                        unsigned short iMesh, unsigned short mu, unsigned short RunTime_EqSystem,
-                       unsigned short iZone, unsigned short iInst);
+                       unsigned short iZone, unsigned short iInst,
+                       unsigned short *lastPreSmoothIters);
 
   /*!
    * \brief Compute the forcing term.
@@ -79,7 +80,8 @@ class CMultiGridIntegration final : public CIntegration {
    * \param[in] config - Definition of the particular problem.
    */
   void SetForcing_Term(CSolver *sol_fine, CSolver *sol_coarse, CGeometry *geo_fine,
-                       CGeometry *geo_coarse, CConfig *config, unsigned short iMesh);
+                       CGeometry *geo_coarse, CConfig *config, unsigned short iMesh,
+                       su2double val_factor = -1.0);
 
   /*!
    * \brief Add the truncation error to the residual.
@@ -133,6 +135,30 @@ class CMultiGridIntegration final : public CIntegration {
   void SetProlongated_Solution(unsigned short RunTime_EqSystem, CSolver *sol_fine, CSolver *sol_coarse,
                                CGeometry *geo_fine, CGeometry *geo_coarse, CConfig *config);
 
+
+  void PostSmoothing(unsigned short RunTime_EqSystem,
+  CSolver* solver_fine,
+  CNumerics** numerics_fine,
+  CGeometry* geometry_fine,
+  CSolver** solver_container_fine,
+  CConfig *config,
+  unsigned short iMesh,
+  unsigned short iRKLimit);
+
+  void PreSmoothing(unsigned short RunTime_EqSystem,
+  CGeometry**** geometry,
+  CSolver***** solver_container,
+  CConfig **config_container,
+  CSolver* solver_fine,
+  CNumerics** numerics_fine,
+  CGeometry* geometry_fine,
+  CSolver** solver_container_fine,
+  CConfig *config,
+  unsigned short iMesh,
+  unsigned short iZone,
+  unsigned short iRKLimit,
+  unsigned short *lastPreSmoothIters);
+
   /*!
    * \brief Compute the fine grid correction from the coarse solution.
    * \param[out] sol_fine - Pointer to the solution on the fine grid.
@@ -154,7 +180,7 @@ class CMultiGridIntegration final : public CIntegration {
    * \param[in] config - Definition of the particular problem.
    */
   void SmoothProlongated_Correction(unsigned short RunTime_EqSystem, CSolver *solver, CGeometry *geometry,
-                                    unsigned short val_nSmooth, su2double val_smooth_coeff, CConfig *config);
+                                    unsigned short val_nSmooth, su2double val_smooth_coeff, CConfig *config, unsigned short iMesh);
 
   /*!
    * \brief Restrict solution from fine grid to a coarse grid.
diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp
index 362e2d51c4b0..0274ef26a92b 100644
--- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp
+++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp
@@ -578,7 +578,7 @@ class CFVMFlowSolverBase : public CSolver {
 
     }
 
-    /*--- Recompute the unsteady time step for the dual time strategy if the unsteady CFL is diferent from 0.
+    /*--- Recompute the unsteady time step for the dual time strategy if the unsteady CFL is different from 0.
      * This is only done once because in dual time the time step cannot be variable. ---*/
 
     if (dual_time && (Iteration == config->GetRestart_Iter()) && (config->GetUnst_CFL() != 0.0) && (iMesh == MESH_0)) {
diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
index 8dc0e4b63931..5ea610abe4af 100644
--- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
+++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
@@ -1130,6 +1130,35 @@ void CFVMFlowSolverBase<V, FlowRegime>::BC_Sym_Plane(CGeometry* geometry, CSolve
     /*--- Halo points do not need to be considered. ---*/
     if (!geometry->nodes->GetDomain(iPoint)) continue;
 
+    /*--- On coarse multigrid levels with agglomerated Euler walls, skip flux computation
+     *    and directly zero momentum residuals to avoid errors from averaged normals. ---*/
+    if (config->GetMarker_All_KindBC(val_marker) == EULER_WALL && geometry->GetMGLevel() > MESH_0) {
+      for (auto iDim = 0u; iDim < nDim; iDim++) {
+        LinSysRes(iPoint, iVel + iDim) = 0.0;
+      }
+      if (implicit) {
+        /*--- Zero momentum rows in Jacobian ---*/
+        for (auto iDim = 0u; iDim < nDim; iDim++) {
+          auto* block = Jacobian.GetBlock(iPoint, iPoint);
+          for (auto jVar = 0u; jVar < nVar; jVar++) {
+            block[(iVel + iDim) * nVar + jVar] = 0.0;
+          }
+          block[(iVel + iDim) * nVar + (iVel + iDim)] = 1.0;  // Diagonal
+        }
+        /*--- Zero momentum columns in off-diagonal blocks ---*/
+        for (size_t iNeigh = 0; iNeigh < geometry->nodes->GetnPoint(iPoint); ++iNeigh) {
+          auto jPoint = geometry->nodes->GetPoint(iPoint, iNeigh);
+          auto* block = Jacobian.GetBlock(iPoint, jPoint);
+          for (auto iDim = 0u; iDim < nDim; iDim++) {
+            for (auto jVar = 0u; jVar < nVar; jVar++) {
+              block[(iVel + iDim) * nVar + jVar] = 0.0;
+            }
+          }
+        }
+      }
+      continue;  // Skip normal flux computation
+    }
+
     /*--- Get the normal of the current symmetry. This may be the original normal of the vertex
      * or a modified normal if there are intersecting symmetries. ---*/
 
diff --git a/SU2_CFD/include/solvers/CScalarSolver.inl b/SU2_CFD/include/solvers/CScalarSolver.inl
index 01f05939a2c5..02886968c68f 100644
--- a/SU2_CFD/include/solvers/CScalarSolver.inl
+++ b/SU2_CFD/include/solvers/CScalarSolver.inl
@@ -132,7 +132,7 @@ void CScalarSolver<VariableType>::Upwind_Residual(CGeometry* geometry, CSolver**
   /*--- Define booleans that are solver specific through CConfig's GlobalParams which have to be set in CFluidIteration
    * before calling these solver functions. ---*/
   const bool implicit = (config->GetKind_TimeIntScheme() == EULER_IMPLICIT);
-  const bool muscl = config->GetMUSCL();
+  const bool muscl = config->GetMUSCL() && (iMesh == MESH_0);
   const bool limiter = (config->GetKind_SlopeLimit() != LIMITER::NONE) &&
                        (config->GetInnerIter() <= config->GetLimiterIter());
 
diff --git a/SU2_CFD/include/solvers/CSolver.hpp b/SU2_CFD/include/solvers/CSolver.hpp
index 32b7bf43c08e..f5b8f2af1c58 100644
--- a/SU2_CFD/include/solvers/CSolver.hpp
+++ b/SU2_CFD/include/solvers/CSolver.hpp
@@ -37,6 +37,8 @@
 #include <algorithm>
 #include <iostream>
 #include <set>
+#include <deque>
+#include <utility>
 #include <stdlib.h>
 #include <stdio.h>
 
@@ -79,6 +81,14 @@ class CSolver {
   vector<su2double> NonLinRes_Series; /*!< \brief Vector holding the nonlinear residual indicator series. */
   su2double Old_Func,  /*!< \brief Old value of the nonlinear residual indicator. */
   New_Func;            /*!< \brief Current value of the nonlinear residual indicator. */
+
+  /*--- Adaptive CFL State Variables ---*/
+  unsigned long consecutiveReduceIters;   /*!< \brief Counter for consecutive iterations requiring CFL reduction. */
+  unsigned long consecutiveIncreaseIters; /*!< \brief Counter for consecutive iterations allowing CFL increase. */
+  unsigned long oscillationCounter;       /*!< \brief Counter for oscillation detection. */
+  bool previousOscillation;               /*!< \brief Flag for previous oscillation state. */
+  std::deque<std::pair<unsigned long, su2double>> peaks;   /*!< \brief History of residual peaks. */
+  std::deque<std::pair<unsigned long, su2double>> valleys; /*!< \brief History of residual valleys. */
   unsigned short nVar,           /*!< \brief Number of variables of the problem. */
   nPrimVar,                      /*!< \brief Number of primitive variables of the problem. */
   nPrimVarGrad,                  /*!< \brief Number of primitive variables of the problem in the gradient computation. */
@@ -1433,6 +1443,63 @@ class CSolver {
    */
   void AdaptCFLNumber(CGeometry **geometry, CSolver ***solver_container, CConfig *config);
 
+private:
+
+  /*! \brief Data structures for modular CFL adaptation */
+
+  struct CFLAdaptParams {
+    su2double CFLFactorDecrease, CFLFactorIncrease;
+    su2double CFLMin, CFLMax;
+    su2double acceptableLinTol, startingIter;
+    unsigned long Res_Count, stallWindow;
+  };
+
+  su2double ComputeAdjustedCFL(su2double currentCFL, su2double underRelaxation,
+                                bool reduceCFL, bool resetCFL, bool canIncrease,
+                                unsigned long iter, su2double startingIter,
+                                su2double CFLMin, su2double CFLMax,
+                                su2double CFLFactorDecrease, su2double CFLFactorIncrease);
+
+  /*! \brief Helper methods for modular CFL adaptation */
+
+  CFLAdaptParams InitializeCFLAdaptParams(CConfig *config);
+
+  su2double ComputeMaxLinearResidual(CSolver *solverFlow, CSolver *solverTurb, CSolver *solverSpecies);
+
+  bool DetectFlipFlop(const CFLAdaptParams &params, CConfig *config);
+
+  void DetermineLinearSolverBasedCFLFlags(const CFLAdaptParams &params, CConfig *config,
+                                          su2double linRes, su2double linTol,
+                                          bool &reduceCFL, bool &resetCFL, bool &canIncrease);
+
+  void TrackResidualHistory(const CFLAdaptParams &params, CConfig *config,
+                            CSolver **solver_container, unsigned short iMesh,
+                            su2double &New_Func, su2double &Old_Func,
+                            bool &reduceCFL, bool &resetCFL);
+
+  void DetectFastDivergence(const CFLAdaptParams &params, CConfig *config,
+                            su2double New_Func, su2double Old_Func,
+                            bool &reduceCFL, bool &resetCFL);
+
+  void DetectPeakValley(const CFLAdaptParams &params, CConfig *config,
+                        su2double New_Func, bool &reduceCFL, bool &resetCFL);
+
+  void ApplyCFLToAllPoints(CGeometry *geometry, CSolver **solver_container,
+                           CConfig *config, unsigned short iMesh,
+                           bool reduceCFL, bool resetCFL, bool canIncrease,
+                           su2double startingIter);
+
+  void PerformCFLReductions(CGeometry *geometry, CConfig *config, unsigned short iMesh);
+
+  void SynchronizeCFLFlags(bool &reduceCFL, bool &resetCFL, bool &canIncrease);
+
+  void ApplyCFLToCoarseGrid(CGeometry *geometry, CSolver **solver_container,
+                           CConfig *config, unsigned short iMesh,
+                           bool reduceCFL, bool resetCFL, bool canIncrease,
+                           su2double startingIter);
+
+public:
+
   /*!
    * \brief Reset the local CFL adaption variables
    */
@@ -4350,8 +4417,8 @@ class CSolver {
       }
       const su2double scale = 1 / geoCoarse.nodes->GetVolume(iPointCoarse);
 
-      for (auto iChildren = 0ul; iChildren < geoCoarse.nodes->GetnChildren_CV(iPointCoarse); ++iChildren) {
-        const auto iPointFine = geoCoarse.nodes->GetChildren_CV(iPointCoarse, iChildren);
+      for (auto iChild = 0ul; iChild < geoCoarse.nodes->GetnChildren_CV(iPointCoarse); ++iChild) {
+        const auto iPointFine = geoCoarse.nodes->GetChildren_CV(iPointCoarse, iChild);
         const su2double w = geoFine.nodes->GetVolume(iPointFine) * scale;
         for (auto iVar = 0ul; iVar < varsCoarse.cols(); ++iVar) {
           varsCoarse(iPointCoarse, iVar) += w * varsFine(iPointFine, iVar);
diff --git a/SU2_CFD/src/drivers/CDriver.cpp b/SU2_CFD/src/drivers/CDriver.cpp
index 7e17ed9c6015..01d7048580cb 100644
--- a/SU2_CFD/src/drivers/CDriver.cpp
+++ b/SU2_CFD/src/drivers/CDriver.cpp
@@ -105,6 +105,33 @@
 #endif
 #include <cfenv>
 
+// Compute the runtime effective number of multigrid levels for a zone.
+// This helper does the MPI Allreduce of the local point count and returns
+// the effective number of MG levels using the same algorithm used elsewhere
+// in the codebase. It returns 0 if multigrid is disabled by the user.
+static unsigned short getEffectiveMGLevels(CConfig* config, unsigned long local_nPoint, unsigned short nDim, unsigned long &Global_nPointFine) {
+  const unsigned short user_req = config->GetnMGLevels();
+  if (user_req == 0) return 0; // user explicitly disabled multigrid
+
+  Global_nPointFine = 0ul;
+  SU2_MPI::Allreduce(&local_nPoint, &Global_nPointFine, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm());
+
+  const unsigned long mg_min = config->GetMG_Min_MeshSize();
+  const unsigned short reduction = (nDim == 2) ? 4u : 8u;
+
+  unsigned short computed_max_levels = 1;
+  if ((mg_min > 0) && (Global_nPointFine > mg_min)) {
+    const double ratio = static_cast<double>(Global_nPointFine) / static_cast<double>(mg_min);
+    const double levels = floor(log(ratio) / log(static_cast<double>(reduction)));
+    computed_max_levels = (levels < 1.0) ? 1 : static_cast<unsigned short>(levels);
+  }
+
+  unsigned short effective = user_req;
+  if (computed_max_levels < effective) effective = computed_max_levels;
+
+  return effective;
+}
+
 CDriver::CDriver(char* confFile, unsigned short val_nZone, SU2_Comm MPICommunicator, bool dummy_geo) :
 CDriverBase(confFile, val_nZone, MPICommunicator), StopCalc(false), fsi(false), fem_solver(false), dry_run(dummy_geo) {
 
@@ -701,6 +728,29 @@ void CDriver::InitializeGeometryFVM(CConfig *config, CGeometry **&geometry) {
 
   nDim = geometry_aux->GetnDim();
 
+  /*--- Compute effective MG levels from mesh size here so that we overwrite
+       the configured MG levels before any subsequent calls to
+       config->GetnMGLevels() (which are used to size arrays). ---*/
+  {
+    const unsigned short user_req = config->GetnMGLevels();
+    if (user_req != 0) {
+      unsigned long Global_nPointFine = 0ul;
+      const unsigned short effective = getEffectiveMGLevels(config, geometry_aux->GetnPoint(), nDim, Global_nPointFine);
+
+      if (effective != user_req) {
+        if (rank == MASTER_NODE) {
+          const unsigned long mg_min = config->GetMG_Min_MeshSize();
+          cout << "WARNING: MGLEVEL=" << user_req << " treated as maximum. "
+               << "Effective MG levels set to " << effective
+               << " (Global points=" << Global_nPointFine << ", MG_MIN_MESHSIZE=" << mg_min << ").\n";
+        }
+        config->SetMGLevels(effective);
+        requestedMGlevels = config->GetnMGLevels();
+      }
+    }
+  }
+
+
   /*--- Color the initial grid and set the send-receive domains (ParMETIS) ---*/
 
   geometry_aux->SetColorGrid_Parallel(config);
@@ -791,6 +841,8 @@ void CDriver::InitializeGeometryFVM(CConfig *config, CGeometry **&geometry) {
     geometry[MESH_0]->ComputeSurf_Curvature(config);
   }
 
+
+
   /*--- Compute the global surface areas for all markers. ---*/
 
   geometry[MESH_0]->ComputeSurfaceAreaCfgFile(config);
@@ -810,7 +862,7 @@ void CDriver::InitializeGeometryFVM(CConfig *config, CGeometry **&geometry) {
 
   geometry[MESH_0]->SetMGLevel(MESH_0);
   if ((config->GetnMGLevels() != 0) && (rank == MASTER_NODE))
-    cout << "Setting the multigrid structure." << endl;
+    cout << "Setting the multigrid structure. NMG="<< config->GetnMGLevels() << endl;
 
   /*--- Loop over all the new grid ---*/
 
@@ -829,6 +881,11 @@ void CDriver::InitializeGeometryFVM(CConfig *config, CGeometry **&geometry) {
     geometry[iMGlevel]->SetEdges();
     geometry[iMGlevel]->SetVertex(geometry[iMGlevel-1], config);
 
+    /*--- Validate halo CV coordinates if debug option enabled ---*/
+    if (auto* mg_geometry = dynamic_cast<CMultiGridGeometry*>(geometry[iMGlevel])) {
+      mg_geometry->ValidateHaloCoordinates(config, iMGlevel);
+    }
+
     /*--- Create the control volume structures ---*/
 
     geometry[iMGlevel]->SetControlVolume(geometry[iMGlevel-1], ALLOCATE);
diff --git a/SU2_CFD/src/integration/CFEM_DG_Integration.cpp b/SU2_CFD/src/integration/CFEM_DG_Integration.cpp
index 7a52df8bc60f..5946900a52cb 100644
--- a/SU2_CFD/src/integration/CFEM_DG_Integration.cpp
+++ b/SU2_CFD/src/integration/CFEM_DG_Integration.cpp
@@ -38,7 +38,7 @@ void CFEM_DG_Integration::SingleGrid_Iteration(CGeometry ****geometry,
                                                unsigned short iZone,
                                                unsigned short iInst) {
 
-  unsigned short iMesh, iStep, iLimit = 1;
+  unsigned short iMesh, iStep;
   unsigned short SolContainer_Position = config[iZone]->GetContainerPosition(RunTime_EqSystem);
   unsigned short FinestMesh = config[iZone]->GetFinestMesh();
 
@@ -60,12 +60,8 @@ void CFEM_DG_Integration::SingleGrid_Iteration(CGeometry ****geometry,
         complicated algorithm must be used to facilitate time accurate
         local time stepping.  Note that we are currently hard-coding
         the classical RK4 scheme. ---*/
-  bool useADER = false;
-  switch (config[iZone]->GetKind_TimeIntScheme()) {
-    case RUNGE_KUTTA_EXPLICIT: iLimit = config[iZone]->GetnRKStep(); break;
-    case CLASSICAL_RK4_EXPLICIT: iLimit = 4; break;
-    case ADER_DG: iLimit = 1; useADER = true; break;
-    case EULER_EXPLICIT: case EULER_IMPLICIT: iLimit = 1; break; }
+  unsigned short iLimit = config[iZone]->GetnRKStep();
+
 
   /*--- In case an unsteady simulation is carried out, it is possible that a
         synchronization time step is specified. If so, set the boolean
@@ -101,7 +97,7 @@ void CFEM_DG_Integration::SingleGrid_Iteration(CGeometry ****geometry,
           space and time integration are tightly coupled and cannot be treated
           segregatedly. Therefore a different function is called for ADER to
           carry out the space and time integration. ---*/
-    if( useADER ) {
+    if( config[iZone]->GetKind_TimeIntScheme() == ADER_DG ) {
       solver_container[iZone][iInst][iMesh][SolContainer_Position]->ADER_SpaceTimeIntegration(geometry[iZone][iInst][iMesh], solver_container[iZone][iInst][iMesh],
                                                                                               numerics_container[iZone][iInst][iMesh][SolContainer_Position],
                                                                                               config[iZone], iMesh, RunTime_EqSystem);
diff --git a/SU2_CFD/src/integration/CIntegration.cpp b/SU2_CFD/src/integration/CIntegration.cpp
index 039fad933a4c..65322ff04e96 100644
--- a/SU2_CFD/src/integration/CIntegration.cpp
+++ b/SU2_CFD/src/integration/CIntegration.cpp
@@ -202,7 +202,6 @@ void CIntegration::Space_Integration(CGeometry *geometry,
 
 void CIntegration::Time_Integration(CGeometry *geometry, CSolver **solver_container, CConfig *config,
                                     unsigned short iRKStep, unsigned short RunTime_EqSystem) {
-
   unsigned short MainSolver = config->GetContainerPosition(RunTime_EqSystem);
 
   switch (config->GetKind_TimeIntScheme()) {
diff --git a/SU2_CFD/src/integration/CMultiGridIntegration.cpp b/SU2_CFD/src/integration/CMultiGridIntegration.cpp
index f2a391869093..0ed3d1ad9ab5 100644
--- a/SU2_CFD/src/integration/CMultiGridIntegration.cpp
+++ b/SU2_CFD/src/integration/CMultiGridIntegration.cpp
@@ -28,6 +28,15 @@
 #include "../../include/integration/CMultiGridIntegration.hpp"
 #include "../../../Common/include/parallelization/omp_structure.hpp"
 
+#include "ComputeLinSysResRMS.hpp"
+#include <algorithm>
+
+// Forward declaration of the adaptive damping helper so it can be used by
+// MultiGrid_Cycle before the helper's definition further down the file.
+static su2double Damp_Restric_Adapt(const unsigned short *lastPreSmoothIters,
+                                   unsigned short lastPreSmoothCount,
+                                   CConfig *config);
+
 
 CMultiGridIntegration::CMultiGridIntegration() : CIntegration() { }
 
@@ -104,8 +113,16 @@ void CMultiGridIntegration::MultiGrid_Iteration(CGeometry ****geometry,
 
   /*--- Perform the Full Approximation Scheme multigrid ---*/
 
+  // Allocate per-MG-run storage for the number of pre-smoothing iterations
+  const unsigned short nMGLevels = config[iZone]->GetnMGLevels();
+  unsigned short *lastPreSmoothIters = new unsigned short[nMGLevels + 1];
+  for (unsigned short ii = 0; ii <= nMGLevels; ++ii) lastPreSmoothIters[ii] = 0;
+
   MultiGrid_Cycle(geometry, solver_container, numerics_container, config,
-                  FinestMesh, RecursiveParam, RunTime_EqSystem, iZone, iInst);
+                  FinestMesh, RecursiveParam, RunTime_EqSystem, iZone, iInst,
+                  lastPreSmoothIters);
+
+  delete [] lastPreSmoothIters;
 
 
   /*--- Computes primitive variables and gradients in the finest mesh (useful for the next solver (turbulence) and output ---*/
@@ -134,12 +151,12 @@ void CMultiGridIntegration::MultiGrid_Cycle(CGeometry ****geometry,
                                             unsigned short RecursiveParam,
                                             unsigned short RunTime_EqSystem,
                                             unsigned short iZone,
-                                            unsigned short iInst) {
+                                            unsigned short iInst,
+                                            unsigned short *lastPreSmoothIters) {
 
   CConfig* config = config_container[iZone];
 
   const unsigned short Solver_Position = config->GetContainerPosition(RunTime_EqSystem);
-  const bool classical_rk4 = (config->GetKind_TimeIntScheme() == CLASSICAL_RK4_EXPLICIT);
   const bool implicit = (config->GetKind_TimeIntScheme() == EULER_IMPLICIT);
 
   /*--- Shorter names to refer to fine grid entities. ---*/
@@ -151,71 +168,18 @@ void CMultiGridIntegration::MultiGrid_Cycle(CGeometry ****geometry,
 
   /*--- Number of RK steps. ---*/
 
-  unsigned short iRKLimit = 1;
-
-  switch (config->GetKind_TimeIntScheme()) {
-    case RUNGE_KUTTA_EXPLICIT:
-      iRKLimit = config->GetnRKStep();
-      break;
-    case CLASSICAL_RK4_EXPLICIT:
-      iRKLimit = 4;
-      break;
-    case EULER_EXPLICIT:
-    case EULER_IMPLICIT:
-      iRKLimit = 1;
-      break;
-  }
-
-  /*--- Do a presmoothing on the grid iMesh to be restricted to the grid iMesh+1 ---*/
-
-  for (unsigned short iPreSmooth = 0; iPreSmooth < config->GetMG_PreSmooth(iMesh); iPreSmooth++) {
-
-    /*--- Time and space integration ---*/
-
-    for (unsigned short iRKStep = 0; iRKStep < iRKLimit; iRKStep++) {
-
-      /*--- Send-Receive boundary conditions, and preprocessing ---*/
-
-      solver_fine->Preprocessing(geometry_fine, solver_container_fine, config, iMesh, iRKStep, RunTime_EqSystem, false);
-
-
-      if (iRKStep == 0) {
-
-        /*--- Set the old solution ---*/
-
-        solver_fine->Set_OldSolution();
-
-        if (classical_rk4) solver_fine->Set_NewSolution();
-
-        /*--- Compute time step, max eigenvalue, and integration scheme (steady and unsteady problems) ---*/
-
-        solver_fine->SetTime_Step(geometry_fine, solver_container_fine, config, iMesh, config->GetTimeIter());
-
-        /*--- Restrict the solution and gradient for the adjoint problem ---*/
-
-        Adjoint_Setup(geometry, solver_container, config_container, RunTime_EqSystem, config->GetTimeIter(), iZone);
-
-      }
-
-      /*--- Space integration ---*/
-
-      Space_Integration(geometry_fine, solver_container_fine, numerics_fine, config, iMesh, iRKStep, RunTime_EqSystem);
-
-      /*--- Time integration, update solution using the old solution plus the solution increment ---*/
-
-      Time_Integration(geometry_fine, solver_container_fine, config, iRKStep, RunTime_EqSystem);
+  unsigned short iRKLimit = config->GetnRKStep();
 
-      /*--- Send-Receive boundary conditions, and postprocessing ---*/
+  // standard multigrid: pre-smoothing
+  // start with solution on fine grid h
+  // apply nonlinear smoother (e.g. Gauss-Seidel) to system to damp high-frequency errors.
+  PreSmoothing(RunTime_EqSystem, geometry, solver_container, config_container, solver_fine, numerics_fine, geometry_fine, solver_container_fine, config, iMesh, iZone, iRKLimit, lastPreSmoothIters);
 
-      solver_fine->Postprocessing(geometry_fine, solver_container_fine, config, iMesh);
-
-    }
-
-  }
 
   /*--- Compute Forcing Term $P_(k+1) = I^(k+1)_k(P_k+F_k(u_k))-F_(k+1)(I^(k+1)_k u_k)$ and update solution for multigrid ---*/
 
   if ( iMesh < config->GetnMGLevels() ) {
+    //cout << "imesh =" << iMesh << " " << config->GetnMGLevels() << endl;
 
     /*--- Shorter names to refer to coarse grid entities. ---*/
 
@@ -239,7 +203,7 @@ void CMultiGridIntegration::MultiGrid_Cycle(CGeometry ****geometry,
     SetResidual_Term(geometry_fine, solver_fine);
 
     /*--- Compute $r_(k+1) = F_(k+1)(I^(k+1)_k u_k)$ ---*/
-
+    // restrict the fine-grid solution to the coarser grid. This must be FAS multigrid, because else we restrict the residual.
     SetRestricted_Solution(RunTime_EqSystem, solver_fine, solver_coarse, geometry_fine, geometry_coarse, config);
 
     solver_coarse->Preprocessing(geometry_coarse, solver_container_coarse, config, iMesh+1, NO_RK_ITER, RunTime_EqSystem, false);
@@ -248,7 +212,10 @@ void CMultiGridIntegration::MultiGrid_Cycle(CGeometry ****geometry,
 
     /*--- Compute $P_(k+1) = I^(k+1)_k(r_k) - r_(k+1) ---*/
 
-    SetForcing_Term(solver_fine, solver_coarse, geometry_fine, geometry_coarse, config, iMesh+1);
+    // Adapt damping based on recorded pre-smoothing iterations and apply to forcing term
+    //cout << "calling damp_restric_Adapt" << endl;
+    su2double adapted_factor = Damp_Restric_Adapt(lastPreSmoothIters, config->GetnMGLevels() + 1, config);
+    SetForcing_Term(solver_fine, solver_coarse, geometry_fine, geometry_coarse, config, iMesh+1, adapted_factor);
 
     /*--- Restore the time integration settings. ---*/
 
@@ -265,46 +232,151 @@ void CMultiGridIntegration::MultiGrid_Cycle(CGeometry ****geometry,
         nextRecurseParam = 0;
 
       MultiGrid_Cycle(geometry, solver_container, numerics_container, config_container,
-                      iMesh+1, nextRecurseParam, RunTime_EqSystem, iZone, iInst);
+              iMesh+1, nextRecurseParam, RunTime_EqSystem, iZone, iInst, lastPreSmoothIters);
     }
 
     /*--- Compute prolongated solution, and smooth the correction $u^(new)_k = u_k +  Smooth(I^k_(k+1)(u_(k+1)-I^(k+1)_k u_k))$ ---*/
 
     GetProlongated_Correction(RunTime_EqSystem, solver_fine, solver_coarse, geometry_fine, geometry_coarse, config);
-
-    SmoothProlongated_Correction(RunTime_EqSystem, solver_fine, geometry_fine, config->GetMG_CorrecSmooth(iMesh), 1.25, config);
-
+    SmoothProlongated_Correction(RunTime_EqSystem, solver_fine, geometry_fine, config->GetMG_CorrecSmooth(iMesh), config->GetMG_Smooth_Coeff(), config, iMesh);
     SetProlongated_Correction(solver_fine, geometry_fine, config, iMesh);
 
 
     /*--- Solution post-smoothing in the prolongated grid. ---*/
+    PostSmoothing(RunTime_EqSystem, solver_fine, numerics_fine, geometry_fine, solver_container_fine, config, iMesh, iRKLimit);
 
-    for (unsigned short iPostSmooth = 0; iPostSmooth < config->GetMG_PostSmooth(iMesh); iPostSmooth++) {
+  }
 
-      for (unsigned short iRKStep = 0; iRKStep < iRKLimit; iRKStep++) {
+}
 
-        solver_fine->Preprocessing(geometry_fine, solver_container_fine, config, iMesh, iRKStep, RunTime_EqSystem, false);
 
-        if (iRKStep == 0) {
-          solver_fine->Set_OldSolution();
+void CMultiGridIntegration::PreSmoothing(unsigned short RunTime_EqSystem,
+CGeometry**** geometry,
+CSolver***** solver_container,
+CConfig **config_container,
+CSolver* solver_fine,
+CNumerics** numerics_fine,
+CGeometry* geometry_fine,
+CSolver** solver_container_fine,
+CConfig *config,
+unsigned short iMesh,
+unsigned short iZone,
+unsigned short iRKLimit,
+unsigned short *lastPreSmoothIters) {
+  const bool classical_rk4 = (config->GetKind_TimeIntScheme() == CLASSICAL_RK4_EXPLICIT);
+  const unsigned short nPreSmooth = config->GetMG_PreSmooth(iMesh);
+  const unsigned long timeIter = config->GetTimeIter();
+
+  // Early exit settings from config
+  const bool early_exit_enabled = config->GetMG_Smooth_EarlyExit();
+  const su2double early_exit_threshold = config->GetMG_Smooth_Res_Threshold();
+  const bool output_enabled = config->GetMG_Smooth_Output();
+
+  su2double initial_rms = 0.0;
+  if (early_exit_enabled || output_enabled) {
+    initial_rms = ComputeLinSysResRMS(solver_fine, geometry_fine);
+    if (output_enabled) {
+      cout << "MG Pre-Smoothing Level " << iMesh << " Initial RMS: " << initial_rms << endl;
+    }
+  }
 
-          if (classical_rk4) solver_fine->Set_NewSolution();
+  /*--- Do a presmoothing on the grid iMesh to be restricted to the grid iMesh+1 ---*/
+  unsigned short actual_iterations = nPreSmooth;
+  for (unsigned short iPreSmooth = 0; iPreSmooth < nPreSmooth; iPreSmooth++) {
+    /*--- Time and space integration ---*/
+    for (unsigned short iRKStep = 0; iRKStep < iRKLimit; iRKStep++) {
+      /*--- Send-Receive boundary conditions, and preprocessing ---*/
+      solver_fine->Preprocessing(geometry_fine, solver_container_fine, config, iMesh, iRKStep, RunTime_EqSystem, false);
+      if (iRKStep == 0) {
+        /*--- Set the old solution ---*/
+        solver_fine->Set_OldSolution();
+        if (classical_rk4) solver_fine->Set_NewSolution();
+        solver_fine->SetTime_Step(geometry_fine, solver_container_fine, config, iMesh, timeIter);
+        Adjoint_Setup(geometry, solver_container, config_container, RunTime_EqSystem, timeIter, iZone);
+      }
+      /*--- Space integration ---*/
+      Space_Integration(geometry_fine, solver_container_fine, numerics_fine, config, iMesh, iRKStep, RunTime_EqSystem);
+      /*--- Time integration, update solution using the old solution plus the solution increment ---*/
+      Time_Integration(geometry_fine, solver_container_fine, config, iRKStep, RunTime_EqSystem);
+      /*--- Send-Receive boundary conditions, and postprocessing ---*/
+      solver_fine->Postprocessing(geometry_fine, solver_container_fine, config, iMesh);
+    }
 
-          solver_fine->SetTime_Step(geometry_fine, solver_container_fine, config, iMesh,  config->GetTimeIter());
+    // Early exit check and output
+    if (early_exit_enabled || output_enabled) {
+      su2double current_rms = ComputeLinSysResRMS(solver_fine, geometry_fine);
+      if (output_enabled) {
+        cout << "MG Pre-Smoothing Level " << iMesh << " Iteration " << iPreSmooth + 1 << "/" << nPreSmooth << " RMS: " << current_rms << endl;
+      }
+      if (early_exit_enabled && current_rms < early_exit_threshold * initial_rms) {
+        if (output_enabled) {
+          cout << "MG Pre-Smoothing Level " << iMesh << " Early exit at iteration " << iPreSmooth + 1 << endl;
         }
+        actual_iterations = iPreSmooth + 1;
+        break;
+      }
+    }
+  }
 
-        Space_Integration(geometry_fine, solver_container_fine, numerics_fine, config, iMesh, iRKStep, RunTime_EqSystem);
+  // Record actual iterations performed for this MG level
+  if (lastPreSmoothIters != nullptr) lastPreSmoothIters[iMesh] = actual_iterations;
+}
 
-        Time_Integration(geometry_fine, solver_container_fine, config, iRKStep, RunTime_EqSystem);
 
-        solver_fine->Postprocessing(geometry_fine, solver_container_fine, config, iMesh);
+void CMultiGridIntegration::PostSmoothing(unsigned short RunTime_EqSystem, CSolver* solver_fine,CNumerics** numerics_fine, CGeometry* geometry_fine, CSolver** solver_container_fine, CConfig *config, unsigned short iMesh,unsigned short iRKLimit)
+{
+  const bool classical_rk4 = (config->GetKind_TimeIntScheme() == CLASSICAL_RK4_EXPLICIT);
+  const unsigned short nPostSmooth = config->GetMG_PostSmooth(iMesh);
+  const unsigned long timeIter = config->GetTimeIter();
+
+  // Early exit settings from config
+  const bool early_exit_enabled = config->GetMG_Smooth_EarlyExit();
+  const su2double early_exit_threshold = config->GetMG_Smooth_Res_Threshold();
+  const bool output_enabled = config->GetMG_Smooth_Output();
+
+  su2double initial_rms = 0.0;
+  if (early_exit_enabled || output_enabled) {
+    initial_rms = ComputeLinSysResRMS(solver_fine, geometry_fine);
+    if (output_enabled) {
+      cout << "MG Post-Smoothing Level " << iMesh << " Initial RMS: " << initial_rms << endl;
+    }
+  }
 
+  /*--- Do a postsmoothing on the grid iMesh after prolongation from the grid iMesh+1 ---*/
+  for (unsigned short iPostSmooth = 0; iPostSmooth < nPostSmooth; iPostSmooth++) {
+    for (unsigned short iRKStep = 0; iRKStep < iRKLimit; iRKStep++) {
+      solver_fine->Preprocessing(geometry_fine, solver_container_fine, config, iMesh, iRKStep, RunTime_EqSystem, false);
+      if (iRKStep == 0) {
+        /*--- Set the old solution ---*/
+        solver_fine->Set_OldSolution();
+        if (classical_rk4) solver_fine->Set_NewSolution();
+        solver_fine->SetTime_Step(geometry_fine, solver_container_fine, config, iMesh,  timeIter);
       }
+      /*--- Space integration ---*/
+      Space_Integration(geometry_fine, solver_container_fine, numerics_fine, config, iMesh, iRKStep, RunTime_EqSystem);
+      /*--- Time integration, update solution using the old solution plus the solution increment ---*/
+      Time_Integration(geometry_fine, solver_container_fine, config, iRKStep, RunTime_EqSystem);
+      /*--- Send-Receive boundary conditions, and postprocessing ---*/
+      solver_fine->Postprocessing(geometry_fine, solver_container_fine, config, iMesh);
     }
-  }
 
+    // Early exit check and output
+    if (early_exit_enabled || output_enabled) {
+      su2double current_rms = ComputeLinSysResRMS(solver_fine, geometry_fine);
+      if (output_enabled) {
+        cout << "MG Post-Smoothing Level " << iMesh << " Iteration " << iPostSmooth + 1 << "/" << nPostSmooth << " RMS: " << current_rms << endl;
+      }
+      if (early_exit_enabled && current_rms < early_exit_threshold * initial_rms) {
+        if (output_enabled) {
+          cout << "MG Post-Smoothing Level " << iMesh << " Early exit at iteration " << iPostSmooth + 1 << endl;
+        }
+        break;
+      }
+    }
+  }
 }
 
+
 void CMultiGridIntegration::GetProlongated_Correction(unsigned short RunTime_EqSystem, CSolver *sol_fine, CSolver *sol_coarse,
                                                       CGeometry *geo_fine, CGeometry *geo_coarse, CConfig *config) {
   unsigned long Point_Fine, Point_Coarse, iVertex;
@@ -353,8 +425,8 @@ void CMultiGridIntegration::GetProlongated_Correction(unsigned short RunTime_EqS
 
         Point_Coarse = geo_coarse->vertex[iMarker][iVertex]->GetNode();
 
-        /*--- For dirichlet boundary condtions, set the correction to zero.
-         Note that Solution_Old stores the correction not the actual value ---*/
+        /*--- For dirichlet boundary conditions, set the correction to zero.
+         Note that Solution_Old stores the correction, not the actual value ---*/
 
         su2double zero[3] = {0.0};
         sol_coarse->GetNodes()->SetVelocity_Old(Point_Coarse, zero);
@@ -381,7 +453,7 @@ void CMultiGridIntegration::GetProlongated_Correction(unsigned short RunTime_EqS
 }
 
 void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_EqSystem, CSolver *solver, CGeometry *geometry,
-                                                         unsigned short val_nSmooth, su2double val_smooth_coeff, CConfig *config) {
+                                                         unsigned short val_nSmooth, su2double val_smooth_coeff, CConfig *config, unsigned short iMesh) {
 
   /*--- Check if there is work to do. ---*/
   if (val_nSmooth == 0) return;
@@ -399,7 +471,19 @@ void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_
   }
   END_SU2_OMP_FOR
 
-  /*--- Jacobi iterations. ---*/
+  /*--- Compute initial RMS for adaptive smoothing ---*/
+  su2double initial_rms = 0.0;
+  if (config->GetMG_Smooth_EarlyExit()) {
+    initial_rms = ComputeLinSysResRMS(solver, geometry);
+  }
+
+  /*--- Output initial RMS if enabled ---*/
+  if (config->GetMG_Smooth_Output()) {
+    cout << "MG Correction-Smoothing Level " << iMesh << " Initial RMS: " << initial_rms << endl;
+  }
+
+  /*--- Jacobi iterations with adaptive early exit ---*/
+  unsigned short actual_iterations = val_nSmooth;
 
   for (iSmooth = 0; iSmooth < val_nSmooth; iSmooth++) {
 
@@ -451,6 +535,35 @@ void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_
       }
     }
 
+    /*--- Output RMS residual if enabled ---*/
+    if (config->GetMG_Smooth_Output()) {
+      const su2double RMS_Res = ComputeLinSysResRMS(solver, geometry);
+      cout << "MG Correction-Smoothing Level " << iMesh << " Iteration " << iSmooth+1 << "/" << val_nSmooth << " RMS: " << RMS_Res << endl;
+    }
+
+    /*--- Adaptive early exit check after first iteration ---*/
+    if (config->GetMG_Smooth_EarlyExit() && iSmooth == 0 && val_nSmooth > 1) {
+      su2double current_rms = ComputeLinSysResRMS(solver, geometry);
+      su2double reduction_ratio = current_rms / initial_rms;
+
+      // If RMS reduction is sufficient (ratio <= threshold), additional iterations may not be necessary
+      if (reduction_ratio <= config->GetMG_Smooth_Res_Threshold()) {
+        if (config->GetMG_Smooth_Output()) {
+          cout << "MG Correction-Smoothing Level " << iMesh << " Early exit: sufficient RMS reduction ("
+               << reduction_ratio << " <= " << config->GetMG_Smooth_Res_Threshold() << ")" << endl;
+        }
+        actual_iterations = 1;  // Only do this one iteration
+        break;
+      }
+      // If reduction is insufficient (ratio > threshold), continue with remaining iterations
+    }
+
+  }
+
+  /*--- Log if iterations were skipped ---*/
+  if (config->GetMG_Smooth_Output() && actual_iterations < val_nSmooth) {
+    cout << "MG Correction-Smoothing Level " << iMesh << " completed " << actual_iterations
+         << "/" << val_nSmooth << " iterations (adaptive early exit)" << endl;
   }
 
 }
@@ -500,7 +613,8 @@ void CMultiGridIntegration::SetProlongated_Solution(unsigned short RunTime_EqSys
 }
 
 void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coarse, CGeometry *geo_fine,
-                                            CGeometry *geo_coarse, CConfig *config, unsigned short iMesh) {
+                                            CGeometry *geo_coarse, CConfig *config, unsigned short iMesh,
+                                            su2double val_factor) {
 
   unsigned long Point_Fine, Point_Coarse, iVertex;
   unsigned short iMarker, iVar, iChildren;
@@ -508,6 +622,7 @@ void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coar
 
   const unsigned short nVar = sol_coarse->GetnVar();
   su2double factor = config->GetDamp_Res_Restric();
+  if (val_factor > 0.0) factor = val_factor;
 
   auto *Residual = new su2double[nVar];
 
@@ -530,6 +645,7 @@ void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coar
 
   delete [] Residual;
 
+  /*--- Zero momentum components of truncation error on viscous walls ---*/
   for (iMarker = 0; iMarker < config->GetnMarker_All(); iMarker++) {
     if (config->GetViscous_Wall(iMarker)) {
       SU2_OMP_FOR_STAT(32)
@@ -549,6 +665,121 @@ void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coar
 
 }
 
+/*
+ * Damp_Restric_Adapt
+ * ------------------
+ * Compute an adapted restriction damping factor (Damp_Res_Restric) based on the
+ * number of pre-smoothing iterations actually performed on each multigrid level.
+ *
+ * Arguments:
+ *  - lastPreSmoothIters: array of length at least (nMGLevels+1) containing the
+ *    recorded number of pre-smoothing iterations performed at each MG level.
+ *    The convention used here is that index == level (0..nMGLevels).
+ *  - lastPreSmoothCount: length of the array passed in (for safety checks).
+ *  - config: the CConfig pointer used to read current settings (no write performed).
+ *
+ * Returns:
+ *  - the adapted damping factor (may be equal to current factor if no change).
+ *
+ * Notes:
+ *  - This routine performs MPI Allreduce operations to make the adapt decision
+ *    globally consistent across ranks.
+ *  - The function intentionally does NOT write back into CConfig; it returns the
+ *    new factor so the caller can decide how to apply it (e.g., via a setter
+ *    once available or by passing it into the next call).
+ */
+static su2double Damp_Restric_Adapt(const unsigned short *lastPreSmoothIters,
+                                   unsigned short lastPreSmoothCount,
+                                   CConfig *config) {
+  //cout << "starting Damp_Restric_Adapt" << endl;
+  if (config == nullptr) return 0.0;
+
+  const unsigned short nMGLevels = config->GetnMGLevels();
+
+  // Safety: require the provided array to have at least nMGLevels+1 entries
+  if (lastPreSmoothIters == nullptr || lastPreSmoothCount < (nMGLevels + 1)) {
+    // Not enough information to adapt; return current factor unchanged
+    return config->GetDamp_Res_Restric();
+  }
+
+  int local_any_full = 0; // true if any inspected coarse level reached configured max
+  int local_all_one = 1;  // true if all inspected coarse levels performed exactly 1 iter
+  int local_inspected = 0; // number of coarse levels that have recorded a pre-smooth value
+
+  // Inspect all coarse-grid levels (levels 1..nMGLevels). Ignore entries
+  // that are still zero (not yet visited during this MG run).
+  for (unsigned short lvl = 1; lvl <= nMGLevels; ++lvl) {
+    const unsigned short performed = lastPreSmoothIters[lvl];
+    const unsigned short configured = config->GetMG_PreSmooth(lvl);
+    //cout << "  Level " << lvl << ": performed=" << performed
+    //     << " configured=" << configured << endl;
+    if (performed == 0) continue; // skip un-inspected level
+    ++local_inspected;
+    if (performed >= configured) local_any_full = 1;
+    if (performed != 1) local_all_one = 0;
+  }
+  //cout << "local_inspected=" << local_inspected
+  //     << " local_any_full=" << local_any_full
+  //     << " local_all_one=" << local_all_one << endl;
+
+
+    // Debug output: local decision stats
+    //cout << "Damp_Restric_Adapt local: inspected=" << local_inspected
+    //  << " any_full=" << local_any_full << " all_one=" << local_all_one << endl;
+
+    // Make decision globally consistent across MPI ranks
+  int global_any_full = 0;
+  int global_all_one = 0;
+  int global_inspected = 0;
+
+  // Sum inspected counts across ranks; if nobody inspected any levels, skip adaptation
+  SU2_MPI::Allreduce(&local_inspected, &global_inspected, 1, MPI_INT, MPI_SUM, SU2_MPI::GetComm());
+
+  if (global_inspected == 0) {
+    // No ranks have inspected coarse levels yet — do not adapt
+    return config->GetDamp_Res_Restric();
+  }
+
+    SU2_MPI::Allreduce(&local_any_full, &global_any_full, 1, MPI_INT, MPI_MAX, SU2_MPI::GetComm());
+    SU2_MPI::Allreduce(&local_all_one, &global_all_one, 1, MPI_INT, MPI_MIN, SU2_MPI::GetComm());
+
+
+  const su2double current = config->GetDamp_Res_Restric();
+  //cout << "Current Damp_Res_Restric: " << current << endl;
+  su2double new_factor = current;
+
+  const su2double scale_down = 0.99;
+  const su2double scale_up = 1.01;
+  const su2double clamp_min = 0.1;
+  // larger than this is bad for stability
+  const su2double clamp_max = 0.95;
+
+  if (global_any_full) {
+    new_factor = current * scale_down;
+  } else if (global_all_one) {
+    new_factor = current * scale_up;
+  } else {
+    // no change
+    return current;
+  }
+
+  // Clamp result
+  new_factor = std::min(std::max(new_factor, clamp_min), clamp_max);
+
+  // Optionally log the change on all ranks (config controls verbosity)
+  if (config->GetMG_Smooth_Output()) {
+    cout << "Adaptive Damp_Res_Restric: " << current << " -> " << new_factor << endl;
+  }
+  // Persist the adapted factor into the runtime config so subsequent cycles
+  // observe the updated value. This is safe because the adapt decision was
+  // already made through global MPI reductions, so calling the setter on all
+  // ranks yields the same value everywhere.
+  config->SetDamp_Res_Restric(new_factor);
+  //cout << "ending Damp_Restric_Adapt, damping = " <<config->GetDamp_Res_Restric()  << endl;
+  return new_factor;
+}
+
+
 void CMultiGridIntegration::SetResidual_Term(CGeometry *geometry, CSolver *solver) {
 
   AD::StartNoSharedReading();
@@ -590,6 +821,7 @@ void CMultiGridIntegration::SetRestricted_Solution(unsigned short RunTime_EqSyst
             sol_coarse->GetNodes()->SetVelSolutionVector(Point_Coarse, Grid_Vel);
           }
           else {
+            // nijso asks: why only for the velocity?
             /*--- For stationary no-slip walls, set the velocity to zero. ---*/
             su2double zero[3] = {0.0};
             sol_coarse->GetNodes()->SetVelSolutionVector(Point_Coarse, zero);
diff --git a/SU2_CFD/src/integration/ComputeLinSysResRMS.hpp b/SU2_CFD/src/integration/ComputeLinSysResRMS.hpp
new file mode 100644
index 000000000000..f69698146207
--- /dev/null
+++ b/SU2_CFD/src/integration/ComputeLinSysResRMS.hpp
@@ -0,0 +1,26 @@
+#pragma once
+#include "../../../Common/include/geometry/CGeometry.hpp"
+#include "../../include/solvers/CSolver.hpp"
+#include <vector>
+#include <cmath>
+#include "../../../Common/include/CConfig.hpp"
+
+inline su2double ComputeLinSysResRMS(const CSolver* solver, const CGeometry* geometry) {
+    unsigned short nVar = solver->GetnVar();
+    unsigned long nPointDomain = geometry->GetnPointDomain();
+    std::vector<su2double> sumRes(nVar, 0.0);
+    su2double localSum = 0.0;
+    for (unsigned long i = 0; i < nPointDomain; ++i) {
+        const su2double* res = solver->LinSysRes.GetBlock(i);
+        for (unsigned short v = 0; v < nVar; ++v) {
+            sumRes[v] += res[v] * res[v];
+        }
+    }
+    for (unsigned short v = 0; v < nVar; ++v) localSum += sumRes[v];
+    su2double globalSum = 0.0;
+    SU2_MPI::Allreduce(&localSum, &globalSum, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
+    unsigned long globalNPointDomain = 0;
+    SU2_MPI::Allreduce(&nPointDomain, &globalNPointDomain, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm());
+    if (globalNPointDomain == 0) return 0.0;
+    return std::sqrt(globalSum / (globalNPointDomain * nVar));
+}
diff --git a/SU2_CFD/src/solvers/CIncNSSolver.cpp b/SU2_CFD/src/solvers/CIncNSSolver.cpp
index 383c55945ec8..c8c7c34a3f6e 100644
--- a/SU2_CFD/src/solvers/CIncNSSolver.cpp
+++ b/SU2_CFD/src/solvers/CIncNSSolver.cpp
@@ -739,6 +739,7 @@ void CIncNSSolver::SetTau_Wall_WF(CGeometry *geometry, CSolver **solver_containe
 
       const auto iPoint = geometry->vertex[iMarker][iVertex]->GetNode();
       const auto Point_Normal = geometry->vertex[iMarker][iVertex]->GetNormal_Neighbor();
+
       /*--- On the finest mesh compute also on halo nodes to avoid communication of tau wall. ---*/
       if ((!geometry->nodes->GetDomain(iPoint)) && !(MGLevel==MESH_0)) continue;
 
diff --git a/SU2_CFD/src/solvers/CSolver.cpp b/SU2_CFD/src/solvers/CSolver.cpp
index 99be4b04cc40..08cf348a1d89 100644
--- a/SU2_CFD/src/solvers/CSolver.cpp
+++ b/SU2_CFD/src/solvers/CSolver.cpp
@@ -30,6 +30,7 @@
 #include "../../include/gradients/computeGradientsGreenGauss.hpp"
 #include "../../include/gradients/computeGradientsLeastSquares.hpp"
 #include "../../include/limiters/computeLimiters.hpp"
+#include <deque>
 #include "../../../Common/include/toolboxes/MMS/CIncTGVSolution.hpp"
 #include "../../../Common/include/toolboxes/MMS/CInviscidVortexSolution.hpp"
 #include "../../../Common/include/toolboxes/MMS/CMMSIncEulerSolution.hpp"
@@ -94,6 +95,15 @@ CSolver::CSolver(LINEAR_SOLVER_MODE linear_solver_mode) : System(linear_solver_m
 
   IterLinSolver = 0;
 
+  /*--- Initialize Adaptive CFL state ---*/
+  consecutiveReduceIters = 0;
+  consecutiveIncreaseIters = 0;
+  oscillationCounter = 0;
+  previousOscillation = false;
+  NonLinRes_Counter = 0;
+  Old_Func = 0.0;
+  New_Func = 0.0;
+
   /*--- Initialize pointer for any verification solution. ---*/
   VerificationSolution  = nullptr;
 
@@ -1701,253 +1711,856 @@ void CSolver::ResetCFLAdapt() {
   Old_Func = 0;
   New_Func = 0;
   NonLinRes_Counter = 0;
+  consecutiveReduceIters = 0;
+  consecutiveIncreaseIters = 0;
+  oscillationCounter = 0;
+  previousOscillation = false;
+  peaks.clear();
+  valleys.clear();
 }
 
 
-void CSolver::AdaptCFLNumber(CGeometry **geometry,
-                             CSolver   ***solver_container,
-                             CConfig   *config) {
 
-  /* Adapt the CFL number on all multigrid levels using an
-   exponential progression with under-relaxation approach. */
+su2double CSolver::ComputeAdjustedCFL(su2double currentCFL,
+                                       su2double underRelaxation,
+                                       bool reduceCFL,
+                                       bool resetCFL,
+                                       bool canIncrease,
+                                       unsigned long iter,
+                                       su2double startingIter,
+                                       su2double CFLMin,
+                                       su2double CFLMax,
+                                       su2double CFLFactorDecrease,
+                                       su2double CFLFactorIncrease) {
+  /* If we detect a stalled nonlinear residual, force CFL to minimum */
+  if (resetCFL) {
+    return CFLMin;
+  }
+
+  /* Determine CFL adjustment factor based on under-relaxation and convergence flags.
+     Only apply under-relaxation based reduction after starting iteration. */
+  su2double CFLFactor = 1.0;
+  if ((iter >= startingIter && underRelaxation < 0.1) || reduceCFL) {
+    CFLFactor = CFLFactorDecrease;
+  } else if ((iter >= startingIter && underRelaxation >= 0.1 && underRelaxation < 1.0) || !canIncrease) {
+    CFLFactor = 1.0;
+  } else {
+    CFLFactor = CFLFactorIncrease;
+  }
+
+  /* Apply the adjustment factor */
+  su2double CFL = currentCFL * CFLFactor;
+
+  /* Clamp to min/max limits */
+  CFL = max(CFLMin, min(CFLMax, CFL));
 
-  vector<su2double> MGFactor(config->GetnMGLevels()+1,1.0);
+  return CFL;
+}
+
+void CSolver::ApplyCFLToAllPoints(CGeometry *geometry,
+                                   CSolver **solver_container,
+                                   CConfig *config,
+                                   unsigned short iMesh,
+                                   bool reduceCFL,
+                                   bool resetCFL,
+                                   bool canIncrease,
+                                   su2double startingIter) {
+  const bool fullComms = (config->GetComm_Level() == COMM_FULL);
+  const unsigned long iter = config->GetMultizone_Problem() ? config->GetOuterIter() : config->GetInnerIter();
+
+  CSolver *solverFlow = solver_container[FLOW_SOL];
+  CSolver *solverTurb = solver_container[TURB_SOL];
+  CSolver *solverSpecies = solver_container[SPECIES_SOL];
+
+  const su2double CFLMin = config->GetCFL_AdaptParam(2);
+  const su2double CFLMax = config->GetCFL_AdaptParam(3);
   const su2double CFLFactorDecrease = config->GetCFL_AdaptParam(0);
   const su2double CFLFactorIncrease = config->GetCFL_AdaptParam(1);
-  const su2double CFLMin            = config->GetCFL_AdaptParam(2);
-  const su2double CFLMax            = config->GetCFL_AdaptParam(3);
-  const su2double acceptableLinTol  = config->GetCFL_AdaptParam(4);
-  const su2double startingIter      = config->GetCFL_AdaptParam(5);
-  const bool fullComms              = (config->GetComm_Level() == COMM_FULL);
+  const su2double CFLTurbReduction = config->GetCFLRedCoeff_Turb();
+  const su2double CFLSpeciesReduction = config->GetCFLRedCoeff_Species();
 
-  /* Number of iterations considered to check for stagnation. */
-  const auto Res_Count = min(100ul, config->GetnInner_Iter()-1);
+  su2double myCFLMin = 1e30, myCFLMax = 0.0, myCFLSum = 0.0;
 
-  static bool reduceCFL, resetCFL, canIncrease;
+  SU2_OMP_MASTER
+  if (fullComms) {
+    Min_CFL_Local = 1e30;
+    Max_CFL_Local = 0.0;
+    Avg_CFL_Local = 0.0;
+  }
+  END_SU2_OMP_MASTER
 
-  for (unsigned short iMesh = 0; iMesh <= config->GetnMGLevels(); iMesh++) {
+  SU2_OMP_FOR_STAT(roundUpDiv(geometry->GetnPointDomain(), omp_get_max_threads()))
+  for (unsigned long iPoint = 0; iPoint < geometry->GetnPointDomain(); iPoint++) {
+    /* Get current local CFL and under-relaxation parameters */
+    su2double currentCFL = solverFlow->GetNodes()->GetLocalCFL(iPoint);
+    su2double underRelaxationFlow = solverFlow->GetNodes()->GetUnderRelaxation(iPoint);
+    su2double underRelaxationTurb = 1.0;
+    if (solverTurb)
+      underRelaxationTurb = solverTurb->GetNodes()->GetUnderRelaxation(iPoint);
+    const su2double underRelaxation = min(underRelaxationFlow, underRelaxationTurb);
+
+    /* Compute adjusted CFL using the dedicated function */
+    su2double CFL = ComputeAdjustedCFL(currentCFL, underRelaxation,
+                                        reduceCFL, resetCFL, canIncrease,
+                                        iter, startingIter,
+                                        CFLMin, CFLMax,
+                                        CFLFactorDecrease, CFLFactorIncrease);
+
+    /* Debug output for first point every 50 iterations or at iteration 1 */
+    if (iPoint == 0 && (config->GetInnerIter() % 50 == 0 || config->GetInnerIter() == 1) && rank == MASTER_NODE) {
+      cout << "Point " << iPoint << " Iter=" << iter << " startingIter=" << startingIter
+           << " - underRelax=" << underRelaxation
+           << " CFL_before=" << currentCFL << " CFL_after=" << CFL
+           << " reduceCFL=" << reduceCFL << " canIncrease=" << canIncrease << endl;
+    }
 
-    /* Store the mean flow, and turbulence solvers more clearly. */
+    /* Set the adjusted CFL for all solvers */
+    solverFlow->GetNodes()->SetLocalCFL(iPoint, CFL);
+    if (solverTurb) {
+      solverTurb->GetNodes()->SetLocalCFL(iPoint, CFL * CFLTurbReduction);
+    }
+    if (solverSpecies) {
+      solverSpecies->GetNodes()->SetLocalCFL(iPoint, CFL * CFLSpeciesReduction);
+    }
 
-    CSolver *solverFlow = solver_container[iMesh][FLOW_SOL];
-    CSolver *solverTurb = solver_container[iMesh][TURB_SOL];
-    CSolver *solverSpecies = solver_container[iMesh][SPECIES_SOL];
+    /* Store min/max/sum for reporting */
+    if (fullComms) {
+      myCFLMin = min(CFL, myCFLMin);
+      myCFLMax = max(CFL, myCFLMax);
+      myCFLSum += CFL;
+    }
+  }
+  END_SU2_OMP_FOR
 
-    /* Compute the reduction factor for CFLs on the coarse levels. */
+  /* Reduce the min/max/avg local CFL numbers */
+  if (fullComms) {
+    SU2_OMP_CRITICAL
+    { /* OpenMP reduction */
+      Min_CFL_Local = min(Min_CFL_Local, myCFLMin);
+      Max_CFL_Local = max(Max_CFL_Local, myCFLMax);
+      Avg_CFL_Local += myCFLSum;
+    }
+    END_SU2_OMP_CRITICAL
 
-    if (iMesh == MESH_0) {
-      MGFactor[iMesh] = 1.0;
-    } else {
-      const su2double CFLRatio = config->GetCFL(iMesh)/config->GetCFL(iMesh-1);
-      MGFactor[iMesh] = MGFactor[iMesh-1]*CFLRatio;
+    BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS
+    { /* MPI reduction and config update */
+      PerformCFLReductions(geometry, config, iMesh);
+      if (rank == MASTER_NODE) {
+        cout << "imesh=" << iMesh << " Avg CFL=" << Avg_CFL_Local << endl;
+      }
     }
+    END_SU2_OMP_SAFE_GLOBAL_ACCESS
+  }
+}
 
-    /* Check whether we achieved the requested reduction in the linear
-     solver residual within the specified number of linear iterations. */
+void CSolver::PerformCFLReductions(CGeometry *geometry, CConfig *config, unsigned short iMesh) {
+  su2double myCFLMin = Min_CFL_Local, myCFLMax = Max_CFL_Local, myCFLSum = Avg_CFL_Local;
 
-    su2double linResTurb = 0.0;
-    su2double linResSpecies = 0.0;
-    if ((iMesh == MESH_0) && solverTurb) linResTurb = solverTurb->GetResLinSolver();
-    if ((iMesh == MESH_0) && solverSpecies) linResSpecies = solverSpecies->GetResLinSolver();
+  SU2_MPI::Allreduce(&myCFLMin, &Min_CFL_Local, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());
+  SU2_MPI::Allreduce(&myCFLMax, &Max_CFL_Local, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm());
+  SU2_MPI::Allreduce(&myCFLSum, &Avg_CFL_Local, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
 
-    /* Max linear residual between flow and turbulence/species transport. */
-    const su2double linRes = max(solverFlow->GetResLinSolver(), max(linResTurb, linResSpecies));
+  Avg_CFL_Local /= su2double(geometry->GetGlobal_nPointDomain());
 
-    /* Tolerance limited to an acceptable value. */
-    const su2double linTol = max(acceptableLinTol, config->GetLinear_Solver_Error());
+  config->SetCFL(iMesh, Avg_CFL_Local);
+}
 
-    /* Check that we are meeting our nonlinear residual reduction target
-     over time so that we do not get stuck in limit cycles, this is done
-     on the fine grid and applied to all others. */
+void CSolver::SynchronizeCFLFlags(bool &reduceCFL, bool &resetCFL, bool &canIncrease) {
+#ifdef HAVE_MPI
+  /* Synchronize CFL decision flags across all MPI ranks to ensure consistent global strategy.
+     This prevents different ranks from making conflicting CFL decisions which would cause:
+     - Inconsistent CFL values for corresponding points across domain decomposition
+     - Potential divergence when some ranks reduce CFL while others increase
+     - Multigrid issues when coarse levels don't respond to fine grid instabilities
+
+     Logic:
+     - reduceCFL: Use logical OR - if ANY rank needs to reduce, ALL should reduce (safety)
+     - resetCFL: Use logical OR - if ANY rank detects catastrophic divergence, ALL should reset
+     - canIncrease: Use logical AND - only increase if ALL ranks are stable (conservative) */
+
+  int reduceCFL_int = reduceCFL ? 1 : 0;
+  int resetCFL_int = resetCFL ? 1 : 0;
+  int canIncrease_int = canIncrease ? 1 : 0;
+
+  SU2_MPI::Allreduce(MPI_IN_PLACE, &reduceCFL_int, 1, MPI_INT, MPI_LOR, SU2_MPI::GetComm());
+  SU2_MPI::Allreduce(MPI_IN_PLACE, &resetCFL_int, 1, MPI_INT, MPI_LOR, SU2_MPI::GetComm());
+  SU2_MPI::Allreduce(MPI_IN_PLACE, &canIncrease_int, 1, MPI_INT, MPI_LAND, SU2_MPI::GetComm());
+
+  reduceCFL = (reduceCFL_int != 0);
+  resetCFL = (resetCFL_int != 0);
+  canIncrease = (canIncrease_int != 0);
+#endif
+  /* No synchronization needed for serial builds - flags remain as computed locally */
+}
 
-    BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS
-    { /* Only the master thread updates the shared variables. */
+void CSolver::ApplyCFLToCoarseGrid(CGeometry *geometry, CSolver **solver_container,
+                                   CConfig *config, unsigned short iMesh,
+                                   bool reduceCFL, bool resetCFL, bool canIncrease,
+                                   su2double startingIter) {
+  const unsigned long iter = config->GetMultizone_Problem() ? config->GetOuterIter() : config->GetInnerIter();
+
+  /* Base strategy: use 1.5x multiplier from fine grid */
+  const su2double baseFactor = 1.5;
+  su2double CFL_base = baseFactor * config->GetCFL(iMesh - 1);
+
+  /* Apply same adaptation logic as fine grid to respond to stability issues.
+     This prevents coarse grids from being too aggressive when fine grid struggles. */
+  const su2double CFLMin = config->GetCFL_AdaptParam(2);
+  const su2double CFLMax = config->GetCFL_AdaptParam(3);
+  const su2double CFLFactorDecrease = config->GetCFL_AdaptParam(0);
+  const su2double CFLFactorIncrease = config->GetCFL_AdaptParam(1);
+
+  su2double CFL = CFL_base;
+
+  /* Apply fine grid stability flags to coarse grid */
+  if (resetCFL && iter >= startingIter) {
+    CFL = CFLMin;
+  } else if (reduceCFL && iter >= startingIter) {
+    CFL = max(CFLMin, CFL_base * CFLFactorDecrease);
+  } else if (canIncrease && iter >= startingIter) {
+    CFL = min(CFLMax, CFL_base * CFLFactorIncrease);
+  }
+
+  /* Clamp to bounds */
+  CFL = max(CFLMin, min(CFLMax, CFL));
+
+  config->SetCFL(iMesh, CFL);
+
+  CSolver *solverFlow = solver_container[FLOW_SOL];
+  CSolver *solverTurb = solver_container[TURB_SOL];
+  CSolver *solverSpecies = solver_container[SPECIES_SOL];
 
-    /* Check if we should decrease or if we can increase, the 20% is to avoid flip-flopping. */
-    resetCFL = linRes > 0.99;
-    unsigned long iter = config->GetMultizone_Problem() ? config->GetOuterIter() : config->GetInnerIter();
+  const su2double CFLTurbReduction = config->GetCFLRedCoeff_Turb();
+  const su2double CFLSpeciesReduction = config->GetCFLRedCoeff_Species();
 
-    /* only change CFL number when larger than starting iteration */
-    reduceCFL = (linRes > 1.2*linTol) && (iter >= startingIter);
+  SU2_OMP_FOR_STAT(roundUpDiv(geometry->GetnPointDomain(), omp_get_max_threads()))
+  for (unsigned long iPoint = 0; iPoint < geometry->GetnPointDomain(); iPoint++) {
+    solverFlow->GetNodes()->SetLocalCFL(iPoint, CFL);
+    if (solverTurb) solverTurb->GetNodes()->SetLocalCFL(iPoint, CFL * CFLTurbReduction);
+    if (solverSpecies) solverSpecies->GetNodes()->SetLocalCFL(iPoint, CFL * CFLSpeciesReduction);
+  }
+  END_SU2_OMP_FOR
+
+  if (rank == MASTER_NODE && iter % 50 == 0) {
+    cout << "Coarse Grid " << iMesh << " - CFL=" << CFL
+         << " (base=" << CFL_base << ", reduceCFL=" << reduceCFL
+         << ", resetCFL=" << resetCFL << ", canIncrease=" << canIncrease << ")" << endl;
+  }
+}
+
+bool CSolver::DetectFlipFlop(const CFLAdaptParams &params, CConfig *config) {
+  unsigned long signChanges = 0;
+  su2double totalChange = 0.0;
+  auto prev = NonLinRes_Series.front();
+
+  for (const auto& val : NonLinRes_Series) {
+    totalChange += val;
+    signChanges += (prev > 0) ^ (val > 0);
+    prev = val;
+  }
+
+  bool flipFlopDetected = (signChanges > params.Res_Count/4) && (totalChange > -0.5);
+  unsigned long iter = config->GetMultizone_Problem() ? config->GetOuterIter() : config->GetInnerIter();
+
+  if (rank == MASTER_NODE && iter % 50 == 0) {
+    cout << "FlipFlop Debug - Iter " << iter << ": signChanges=" << signChanges
+         << " threshold=" << params.Res_Count/4 << " totalChange=" << totalChange
+         << " flipFlopDetected=" << flipFlopDetected << endl;
+  }
+
+  if (totalChange > 2.0) return true;
 
-    canIncrease = (linRes < linTol) && (iter >= startingIter);
+  return flipFlopDetected;
+}
+
+void CSolver::DetectFastDivergence(const CFLAdaptParams &params, CConfig *config,
+                                    su2double New_Func, su2double Old_Func,
+                                    bool &reduceCFL, bool &resetCFL) {
+  const unsigned long currIter = config->GetInnerIter();
+
+  /* 1. SINGLE-ITERATION SPIKE DETECTION: Catch catastrophic jumps immediately */
+  su2double singleIterChange = New_Func - Old_Func;
+  bool spikeDetected = false;
+  bool catastrophicSpike = false;
+
+  if (singleIterChange > 0.15) {  /* 0.15 log10 jump in one iteration */
+    spikeDetected = true;
+    reduceCFL = true;
+    if (singleIterChange > 0.3) {  /* 0.3+ log units = catastrophic */
+      catastrophicSpike = true;
+      resetCFL = true;
+    }
+  }
+
+  /* 2. SHORT-WINDOW TREND DETECTION: Catch degradation over 10 iterations */
+  bool shortTermDivergence = false;
+  su2double recentChange = 0.0;
+  su2double recentSlope = 0.0;
+
+  if (currIter >= 10) {
+    const unsigned long SHORT_WINDOW = 10;
 
-    if ((iMesh == MESH_0) && (Res_Count > 0)) {
-      Old_Func = New_Func;
-      if (NonLinRes_Series.empty()) NonLinRes_Series.resize(Res_Count,0.0);
+    /* Sum the last 10 delta values */
+    for (unsigned long i = 0; i < SHORT_WINDOW; i++) {
+      unsigned long idx = (NonLinRes_Counter + params.Res_Count - 1 - i) % params.Res_Count;
+      recentChange += NonLinRes_Series[idx];
+    }
+
+    /* Average change per iteration over the short window */
+    recentSlope = recentChange / su2double(SHORT_WINDOW);
 
-      /* Sum the RMS residuals for all equations. */
+    /* Trigger if: residuals increasing (actual divergence) */
+    if (recentChange > 0.1) {  /* Net increase over 10 iterations */
+      shortTermDivergence = true;
+      reduceCFL = true;
+    }
+    /* Note: Removed stall detection here - it was too aggressive for late-stage convergence.
+       The stagnation guard in TrackResidualHistory handles this more robustly over a longer window. */
+  }
 
-      New_Func = 0.0;
-      unsigned short totalVars = 0;
-      for (unsigned short iVar = 0; iVar < solverFlow->GetnVar(); iVar++) {
-        New_Func += log10(solverFlow->GetRes_RMS(iVar));
+  /* Debug output for fast detection mechanisms */
+  if (rank == MASTER_NODE && currIter % 50 == 0) {
+    cout << "FastDivergence Debug - Iter " << currIter
+         << ": singleIterChange=" << singleIterChange
+         << " spikeDetected=" << spikeDetected
+         << " catastrophicSpike=" << catastrophicSpike
+         << " recentChange(10iter)=" << recentChange
+         << " recentSlope=" << recentSlope
+         << " shortTermDivergence=" << shortTermDivergence << endl;
+  }
+}
+
+CSolver::CFLAdaptParams CSolver::InitializeCFLAdaptParams(CConfig *config) {
+  CFLAdaptParams params;
+  params.CFLFactorDecrease = config->GetCFL_AdaptParam(0);
+  params.CFLFactorIncrease = config->GetCFL_AdaptParam(1);
+  params.CFLMin = config->GetCFL_AdaptParam(2);
+  params.CFLMax = config->GetCFL_AdaptParam(3);
+  params.acceptableLinTol = config->GetCFL_AdaptParam(4);
+  params.startingIter = config->GetCFL_AdaptParam(5);
+  params.Res_Count = min(100ul, config->GetnInner_Iter()-1);
+  params.stallWindow = min(100ul, params.Res_Count);
+
+  return params;
+}
+
+su2double CSolver::ComputeMaxLinearResidual(CSolver *solverFlow, CSolver *solverTurb, CSolver *solverSpecies) {
+  /* Compute the maximum linear residual across all active solvers. */
+
+  su2double linResTurb = 0.0;
+  su2double linResSpecies = 0.0;
+  if (solverTurb) linResTurb = solverTurb->GetResLinSolver();
+  if (solverSpecies) linResSpecies = solverSpecies->GetResLinSolver();
+
+  return max(solverFlow->GetResLinSolver(), max(linResTurb, linResSpecies));
+}
+
+void CSolver::DetermineLinearSolverBasedCFLFlags(const CFLAdaptParams &params, CConfig *config,
+                                                 su2double linRes, su2double linTol,
+                                                 bool &reduceCFL, bool &resetCFL, bool &canIncrease) {
+  /* Determine CFL adjustment flags based on linear solver performance with consecutive iteration damping. */
+
+  const unsigned long DAMPING_ITERS_REDUCE = 5;
+  const unsigned long DAMPING_ITERS_INCREASE = 10;
+  // static unsigned long consecutiveReduceIters = 0; // Removed static
+  // static unsigned long consecutiveIncreaseIters = 0; // Removed static
+
+  unsigned long iter = config->GetMultizone_Problem() ? config->GetOuterIter() : config->GetInnerIter();
+
+  /* Check if we should decrease or if we can increase, the 20% is to avoid flip-flopping. */
+  /* Only reset CFL after starting iteration to allow initial convergence analysis */
+  resetCFL = (linRes > 0.99) && (iter >= params.startingIter);
+
+  /* only change CFL number when larger than starting iteration */
+  bool shouldReduce = (linRes > 1.2*linTol) && (iter >= params.startingIter);
+  bool canIncrease_raw = (linRes < linTol) && (iter >= params.startingIter);
+
+  /* Apply consecutive iteration damping */
+  if (shouldReduce) {
+    consecutiveReduceIters++;
+    consecutiveIncreaseIters = 0;
+  } else if (canIncrease_raw) {
+    consecutiveIncreaseIters++;
+    consecutiveReduceIters = 0;
+  } else {
+    consecutiveReduceIters = 0;
+    consecutiveIncreaseIters = 0;
+  }
+
+  reduceCFL = (consecutiveReduceIters >= DAMPING_ITERS_REDUCE);
+  canIncrease = (consecutiveIncreaseIters >= DAMPING_ITERS_INCREASE);
+
+  if (rank == MASTER_NODE && iter % 50 == 0) {
+    cout << "CFL Debug - Iter " << iter << ": linRes=" << linRes << " linTol=" << linTol
+         << " reduceCFL=" << reduceCFL << " (consec=" << consecutiveReduceIters << "/" << DAMPING_ITERS_REDUCE << ")"
+         << " canIncrease=" << canIncrease << " (consec=" << consecutiveIncreaseIters << "/" << DAMPING_ITERS_INCREASE << ")"
+         << " resetCFL=" << resetCFL << endl;
+  }
+}
+
+void CSolver::TrackResidualHistory(const CFLAdaptParams &params, CConfig *config,
+                                   CSolver **solver_container, unsigned short iMesh,
+                                   su2double &New_Func, su2double &Old_Func,
+                                   bool &reduceCFL, bool &resetCFL) {
+  /* Track residual history and detect catastrophic divergence and flip-flop oscillations. */
+
+  Old_Func = New_Func;
+  if (NonLinRes_Series.empty()) NonLinRes_Series.resize(params.Res_Count, 0.0);
+
+  /* Get pointers to solvers on this mesh level */
+  CSolver *solverFlow = solver_container[FLOW_SOL];
+  CSolver *solverTurb = (iMesh == MESH_0) ? solver_container[TURB_SOL] : nullptr;
+  CSolver *solverSpecies = (iMesh == MESH_0) ? solver_container[SPECIES_SOL] : nullptr;
+
+  /* Compute aggregate residual metric. Handle communication level to ensure consistency across MPI ranks.
+     In COMM_FULL mode, residuals are already globally reduced, so we can use them directly.
+     In COMM_REDUCED mode, residuals are local only, so we need to synchronize across ranks. */
+  const bool useGlobalResiduals = (config->GetComm_Level() == COMM_FULL);
+  unsigned short totalVars = 0;
+
+  if (useGlobalResiduals) {
+    /* COMM_FULL: Residuals already globally averaged - use directly */
+    New_Func = 0.0;
+    for (unsigned short iVar = 0; iVar < solverFlow->GetnVar(); iVar++) {
+      New_Func += log10(solverFlow->GetRes_RMS(iVar));
+      ++totalVars;
+    }
+    if (solverTurb) {
+      for (unsigned short iVar = 0; iVar < solverTurb->GetnVar(); iVar++) {
+        New_Func += log10(solverTurb->GetRes_RMS(iVar));
         ++totalVars;
       }
-      if ((iMesh == MESH_0) && solverTurb) {
-        for (unsigned short iVar = 0; iVar < solverTurb->GetnVar(); iVar++) {
-          New_Func += log10(solverTurb->GetRes_RMS(iVar));
-          ++totalVars;
-        }
+    }
+    if (solverSpecies) {
+      for (unsigned short iVar = 0; iVar < solverSpecies->GetnVar(); iVar++) {
+        New_Func += log10(solverSpecies->GetRes_RMS(iVar));
+        ++totalVars;
       }
-      if ((iMesh == MESH_0) && solverSpecies) {
-        for (unsigned short iVar = 0; iVar < solverSpecies->GetnVar(); iVar++) {
-          New_Func += log10(solverSpecies->GetRes_RMS(iVar));
-          ++totalVars;
-        }
+    }
+    New_Func /= totalVars;
+  } else {
+    /* COMM_REDUCED: Residuals are local only - need MPI synchronization for consistent CFL decisions */
+    su2double New_Func_Local = 0.0;
+    for (unsigned short iVar = 0; iVar < solverFlow->GetnVar(); iVar++) {
+      New_Func_Local += log10(solverFlow->GetRes_RMS(iVar));
+      ++totalVars;
+    }
+    if (solverTurb) {
+      for (unsigned short iVar = 0; iVar < solverTurb->GetnVar(); iVar++) {
+        New_Func_Local += log10(solverTurb->GetRes_RMS(iVar));
+        ++totalVars;
       }
-      New_Func /= totalVars;
+    }
+    if (solverSpecies) {
+      for (unsigned short iVar = 0; iVar < solverSpecies->GetnVar(); iVar++) {
+        New_Func_Local += log10(solverSpecies->GetRes_RMS(iVar));
+        ++totalVars;
+      }
+    }
+    New_Func_Local /= totalVars;
 
-      /* Compute the difference in the nonlinear residuals between the
-       current and previous iterations, taking care with very low initial
-       residuals (due to initialization). */
+    /* Synchronize across MPI ranks to ensure all ranks make identical CFL decisions */
+    SU2_MPI::Allreduce(&New_Func_Local, &New_Func, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
+    New_Func /= su2double(size);
+  }
 
-      if ((config->GetInnerIter() == 1) && (New_Func - Old_Func > 10)) {
-        Old_Func = New_Func;
-      }
-      NonLinRes_Series[NonLinRes_Counter] = New_Func - Old_Func;
+  /* Compute the difference in the nonlinear residuals between the
+     current and previous iterations, taking care with very low initial
+     residuals (due to initialization). */
+  if ((config->GetInnerIter() == 1) && (New_Func - Old_Func > 10)) {
+    Old_Func = New_Func;
+  }
+  NonLinRes_Series[NonLinRes_Counter] = New_Func - Old_Func;
+
+  /* Increment the counter, if we hit the max size, then start over. */
+  NonLinRes_Counter++;
+  if (NonLinRes_Counter == params.Res_Count) NonLinRes_Counter = 0;
+
+  /* Fast flip-flop detection using XOR sign changes (from old code).
+     This catches rapid oscillations that the peak-valley logic might miss. */
+  if (config->GetInnerIter() >= params.Res_Count) {
+    bool flipFlopOrDivergence = DetectFlipFlop(params, config);
+    reduceCFL |= flipFlopOrDivergence;
+
+    /* Check for catastrophic divergence and reset if needed */
+    su2double totalChange = 0.0;
+    for (const auto& val : NonLinRes_Series) {
+      totalChange += val;
+    }
+    if (totalChange > 2.0) { // orders of magnitude divergence
+      resetCFL = true;
+      NonLinRes_Counter = 0;
+      for (auto& val : NonLinRes_Series) val = 0.0;
+    }
+  }
+}
 
-      /* Increment the counter, if we hit the max size, then start over. */
+void CSolver::DetectPeakValley(const CFLAdaptParams &params, CConfig *config,
+                               su2double New_Func, bool &reduceCFL, bool &resetCFL) {
+  /* Detect oscillations and divergence using peak-valley progression
+     plus slope-based guards. This works alongside the flip-flop check above.
+     - Maintain a history window (Res_Count up to 400 iters).
+     - Extract peaks/valleys with minimum separation to avoid noise.
+     - Require new peaks/valleys to progress downward; otherwise flag.
+     - Use least-squares slope on last 3 peaks/valleys to catch divergence.
+     - Forget extrema older than a lookback window (300 iters). */
+
+  const unsigned long EXT_WINDOW = 300;
+  const unsigned long MIN_SEPARATION = 30;    /* iters between extrema to avoid noise */
+  const su2double VALUE_TOL = 0.05;            /* log10 residual tolerance for peak/valley improvement */
+  const su2double SLOPE_FLAT = -1.0e-3;        /* slope >= this => stagnation */
+  const su2double SLOPE_UP   =  0.1;           /* slope > this => divergence (log10 per iter) */
+
+  /* Counter to avoid reacting to every single iteration */
+  // static unsigned long oscillationCounter = 0; // Removed static
+  // static bool previousOscillation = false; // Removed static
+
+  /* Reconstruct New_Func history from the delta buffer. */
+  vector<su2double> funcHistory;
+  funcHistory.reserve(params.Res_Count);
+  su2double curr = New_Func;
+  funcHistory.push_back(curr);
+  for (unsigned long i = 1; i < params.Res_Count; i++) {
+    unsigned long idx = (NonLinRes_Counter + params.Res_Count - i) % params.Res_Count;
+    curr -= NonLinRes_Series[idx];
+    funcHistory.push_back(curr);
+  }
+  reverse(funcHistory.begin(), funcHistory.end());
 
-      NonLinRes_Counter++;
-      if (NonLinRes_Counter == Res_Count) NonLinRes_Counter = 0;
+  /* Map history index to iteration number. */
+  const unsigned long histSize = funcHistory.size();
+  const unsigned long currIter = config->GetInnerIter();
+  const unsigned long startIter = currIter - histSize + 1;
 
-      /* Detect flip-flop convergence to reduce CFL and large increases
-       to reset to minimum value, in that case clear the history. */
+  /* Find peaks and valleys using deques for automatic size management. */
+  // static deque<pair<unsigned long, su2double>> peaks; // Removed static
+  // static deque<pair<unsigned long, su2double>> valleys; // Removed static
 
-      if (config->GetInnerIter() >= Res_Count) {
-        unsigned long signChanges = 0;
-        su2double totalChange = 0.0;
-        auto prev = NonLinRes_Series.front();
-        for (const auto& val : NonLinRes_Series) {
-          totalChange += val;
-          signChanges += (prev > 0) ^ (val > 0);
-          prev = val;
-        }
-        reduceCFL |= (signChanges > Res_Count/4) && (totalChange > -0.5);
+  /* Clear old extrema that are outside the lookback window */
+  while (!peaks.empty() && currIter > EXT_WINDOW && peaks.front().first + EXT_WINDOW < currIter) {
+    peaks.pop_front();
+  }
+  while (!valleys.empty() && currIter > EXT_WINDOW && valleys.front().first + EXT_WINDOW < currIter) {
+    valleys.pop_front();
+  }
 
-        if (totalChange > 2.0) { // orders of magnitude
-          resetCFL = true;
-          NonLinRes_Counter = 0;
-          for (auto& val : NonLinRes_Series) val = 0.0;
-        }
-      }
+  unsigned long lastExtIdx = 0;
+  for (unsigned long i = 1; i + 1 < histSize; i++) {
+    if (i - lastExtIdx < MIN_SEPARATION) continue;
+    su2double a = funcHistory[i-1];
+    su2double b = funcHistory[i];
+    su2double c = funcHistory[i+1];
+    unsigned long iter_i = startIter + i;
+    /* Drop extrema that fall outside lookback window. */
+    if (currIter > EXT_WINDOW && iter_i + EXT_WINDOW < currIter) continue;
+    if (b > a && b > c) {
+      peaks.push_back(make_pair(iter_i, b));
+      if (peaks.size() > 3) peaks.pop_front();
+      lastExtIdx = i;
+    } else if (b < a && b < c) {
+      valleys.push_back(make_pair(iter_i, b));
+      if (valleys.size() > 3) valleys.pop_front();
+      lastExtIdx = i;
     }
-    } /* End safe global access, now all threads update the CFL number. */
-    END_SU2_OMP_SAFE_GLOBAL_ACCESS
+  }
 
-    /* Loop over all points on this grid and apply CFL adaption. */
+  auto slopeLSQ = [](const deque<pair<unsigned long, su2double>>& pts) {
+    if (pts.size() < 2) return 0.0;
+
+    /* Compute least-squares linear regression slope:
+       slope = [n*Σ(xy) - Σ(x)*Σ(y)] / [n*Σ(x²) - (Σ(x))²] */
+    su2double n = su2double(pts.size());
+    su2double sumX = 0.0, sumY = 0.0, sumXY = 0.0, sumX2 = 0.0;
+
+    for (const auto& pt : pts) {
+      su2double x = su2double(pt.first);   // iteration number
+      su2double y = pt.second;              // log10 residual value
+      sumX += x;
+      sumY += y;
+      sumXY += x * y;
+      sumX2 += x * x;
+    }
 
-    su2double myCFLMin = 1e30, myCFLMax = 0.0, myCFLSum = 0.0;
-    const su2double CFLTurbReduction = config->GetCFLRedCoeff_Turb();
-    const su2double CFLSpeciesReduction = config->GetCFLRedCoeff_Species();
+    su2double denominator = n * sumX2 - sumX * sumX;
+    if (fabs(denominator) < 1e-12) return 0.0;  // Avoid division by zero
 
-    SU2_OMP_MASTER
-    if ((iMesh == MESH_0) && fullComms) {
-      Min_CFL_Local = 1e30;
-      Max_CFL_Local = 0.0;
-      Avg_CFL_Local = 0.0;
-    }
-    END_SU2_OMP_MASTER
+    su2double slope = (n * sumXY - sumX * sumY) / denominator;
+    return slope;
+  };
 
-    SU2_OMP_FOR_STAT(roundUpDiv(geometry[iMesh]->GetnPointDomain(),omp_get_max_threads()))
-    for (unsigned long iPoint = 0; iPoint < geometry[iMesh]->GetnPointDomain(); iPoint++) {
+  bool peaksStagnant = false, valleysStagnant = false;
+  if (peaks.size() >= 2) {
+    peaksStagnant = peaks.back().second >= peaks[peaks.size()-2].second - VALUE_TOL;
+  }
+  if (valleys.size() >= 2) {
+    valleysStagnant = valleys.back().second >= valleys[valleys.size()-2].second - VALUE_TOL;
+  }
 
-      /* Get the current local flow CFL number at this point. */
+  /* Slope of last 3 peaks/valleys (only if enough points). */
+  su2double slopePeaks = 0.0, slopeValleys = 0.0;
+  bool haveSlopePeaks = (peaks.size() >= 2);
+  bool haveSlopeValleys = (valleys.size() >= 2);
+  if (haveSlopePeaks && peaks.back().first > peaks.front().first) {
+    slopePeaks = slopeLSQ(peaks);
+  }
+  if (haveSlopeValleys && valleys.back().first > valleys.front().first) {
+    slopeValleys = slopeLSQ(valleys);
+  }
 
-      su2double CFL = solverFlow->GetNodes()->GetLocalCFL(iPoint);
+  /* If slope is exactly zero (extrema at same iteration), it's not meaningful - ignore it */
+  bool slopeFlat = false;
+  if (haveSlopePeaks && fabs(slopePeaks) > 1e-12 && slopePeaks >= SLOPE_FLAT) slopeFlat = true;
+  if (haveSlopeValleys && fabs(slopeValleys) > 1e-12 && slopeValleys >= SLOPE_FLAT) slopeFlat = true;
+
+  bool slopeUp = (haveSlopePeaks && slopePeaks > SLOPE_UP) || (haveSlopeValleys && slopeValleys > SLOPE_UP);
+
+  /* Check overall trend: compute average of ALL peaks and valleys to get robust trend.
+     In log10 space: convergence means residuals become more negative (decrease).
+     So we want: (avgNew - avgOld) < 0 for convergence. But we'll compute per-iteration
+     rate as (avgNew - avgOld)/dt, making NEGATIVE = convergence. */
+  su2double overallSlope = 0.0;
+  if (peaks.size() >= 2 && valleys.size() >= 2) {
+    /* Average all peaks and valleys separately */
+    su2double avgOldPeaks = 0.0, avgOldValleys = 0.0;
+    su2double avgNewPeaks = 0.0, avgNewValleys = 0.0;
+    unsigned long cntOld = 0, cntNew = 0;
+
+    /* Split extrema into old half and new half */
+    unsigned long midIter = (peaks.front().first + peaks.back().first) / 2;
+
+    for (const auto& p : peaks) {
+      if (p.first <= midIter) { avgOldPeaks += p.second; cntOld++; }
+      else { avgNewPeaks += p.second; cntNew++; }
+    }
+    for (const auto& v : valleys) {
+      if (v.first <= midIter) { avgOldValleys += v.second; cntOld++; }
+      else { avgNewValleys += v.second; cntNew++; }
+    }
 
-      /* Get the current under-relaxation parameters that were computed
-       during the previous nonlinear update. If we have a turbulence model,
-       take the minimum under-relaxation parameter between the mean flow
-       and turbulence systems. */
+    if (cntOld > 0 && cntNew > 0) {
+      su2double avgOld = (avgOldPeaks + avgOldValleys) / cntOld;
+      su2double avgNew = (avgNewPeaks + avgNewValleys) / cntNew;
+      /* Slope = (avgNew - avgOld) / dt: NEGATIVE = convergence (residuals decreasing) */
+      unsigned long dt = peaks.back().first - peaks.front().first;
+      if (dt > 0) overallSlope = (avgNew - avgOld) / su2double(dt);
+    }
+  }
 
-      su2double underRelaxationFlow = solverFlow->GetNodes()->GetUnderRelaxation(iPoint);
-      su2double underRelaxationTurb = 1.0;
-      if ((iMesh == MESH_0) && solverTurb)
-        underRelaxationTurb = solverTurb->GetNodes()->GetUnderRelaxation(iPoint);
-      const su2double underRelaxation = min(underRelaxationFlow,underRelaxationTurb);
+  /* Only act when we have at least two peaks and two valleys to avoid premature locking. */
+  bool haveData = (peaks.size() >= 2 && valleys.size() >= 2);
+  bool progressionFail = haveData && peaksStagnant && valleysStagnant;
 
-      /* If we apply a small under-relaxation parameter for stability,
-       then we should reduce the CFL before the next iteration. If we
-       are able to add the entire nonlinear update (under-relaxation = 1)
-       then we schedule an increase the CFL number for the next iteration. */
+  /* If all slopes are zero (extrema at same time or too close), don't treat as oscillation */
+  bool allSlopesZero = (fabs(slopePeaks) < 1e-12 && fabs(slopeValleys) < 1e-12 && fabs(overallSlope) < 1e-12);
 
-      su2double CFLFactor = 1.0;
-      if (underRelaxation < 0.1 || reduceCFL) {
-        CFLFactor = CFLFactorDecrease;
-      } else if ((underRelaxation >= 0.1 && underRelaxation < 1.0) || !canIncrease) {
-        CFLFactor = 1.0;
-      } else {
-        CFLFactor = CFLFactorIncrease;
-      }
+  /* Oscillation is only bad if slopes are problematic AND overall trend is not improving.
+     overallSlope = (avgNew - avgOld)/dt: NEGATIVE = convergence, POSITIVE = divergence */
+  bool oscillationDetected = false;
+  if (haveData && !allSlopesZero) {
+    oscillationDetected = (progressionFail || (slopeFlat && overallSlope > -0.001) || slopeUp);
+  }
 
-      /* Check if we are hitting the min or max and adjust. */
+  if (rank == MASTER_NODE && currIter % 50 == 0 && haveData) {
+    cout << "Oscillation Debug - Iter " << currIter << ": peaks=" << peaks.size() << " valleys=" << valleys.size()
+         << " slopePeaks=" << slopePeaks << " slopeValleys=" << slopeValleys
+         << " overallSlope=" << overallSlope << " (neg=converge, pos=diverge)"
+         << " slopeFlat=" << slopeFlat << " progressionFail=" << progressionFail
+         << " oscillationDetected=" << oscillationDetected << endl;
+  }
 
-      if (CFL*CFLFactor <= CFLMin) {
-        CFL       = CFLMin;
-        CFLFactor = MGFactor[iMesh];
-      } else if (CFL*CFLFactor >= CFLMax) {
-        CFL       = CFLMax;
-        CFLFactor = MGFactor[iMesh];
-      }
+  /* If overall trend shows strong convergence (negative slope), don't block CFL increase */
+  if (overallSlope < -0.001) {
+    oscillationDetected = false;
+  }
 
-      /* If we detect a stalled nonlinear residual, then force the CFL
-       for all points to the minimum temporarily to restart the ramp. */
+  /* Compute totalChange early to use for stronger override logic */
+  su2double totalChange = 0.0;
+  for (const auto& v : NonLinRes_Series) totalChange += v;
 
-      if (resetCFL) {
-        CFL       = CFLMin;
-        CFLFactor = MGFactor[iMesh];
-      }
+  /* STRONG OVERRIDE: If totalChange shows convergence over buffer, ignore oscillation detection.
+     This prevents false positives from noisy extrema when actual trend is good.
+     Relaxed from -0.1 to -0.01 to handle slow late-stage convergence. */
+  if (totalChange < -0.01) {  /* At least 0.01 log10 improvement over buffer */
+    oscillationDetected = false;
+  }
 
-      /* Apply the adjustment to the CFL and store local values. */
+  /* Also disable slopeUp detection if buffer shows net convergence - prevents false positives
+     from noise in individual valley slopes when overall trend is clearly downward */
+  if (totalChange < 0.0 && slopeUp) {
+    oscillationDetected = false;
+  }
 
-      CFL *= CFLFactor;
-      solverFlow->GetNodes()->SetLocalCFL(iPoint, CFL);
-      if ((iMesh == MESH_0) && solverTurb) {
-        solverTurb->GetNodes()->SetLocalCFL(iPoint, CFL * CFLTurbReduction);
-      }
-      if ((iMesh == MESH_0) && solverSpecies) {
-        solverSpecies->GetNodes()->SetLocalCFL(iPoint, CFL * CFLSpeciesReduction);
-      }
+  /* Damping: only reduce CFL if oscillation persists for multiple iterations */
+  if (oscillationDetected) {
+    if (previousOscillation) {
+      oscillationCounter++;
+    } else {
+      oscillationCounter = 1;
+    }
+  } else {
+    /* Decay counter aggressively when not detecting oscillation */
+    if (oscillationCounter > 0) {
+      oscillationCounter = max(0ul, oscillationCounter - 5);
+    }
+  }
+  previousOscillation = oscillationDetected;
+
+  /* Only act on oscillation if it persists for at least 3 checks */
+  bool persistentOscillation = (oscillationCounter >= 3);
+
+  /* EMERGENCY RESET: If counter is very high but we have clear convergence, reset completely.
+     This handles cases where stale detections accumulated and won't decay fast enough.
+     Use -0.01 threshold to match the main override logic. */
+  if (oscillationCounter > 50 && totalChange < -0.01) {
+    oscillationCounter = 0;
+    persistentOscillation = false;
+    if (rank == MASTER_NODE && currIter % 50 == 0) {
+      cout << "  Emergency reset: oscillation counter cleared due to strong convergence" << endl;
+    }
+  }
+  if (rank == MASTER_NODE && currIter % 50 == 0) {
+    cout << "Persistence Debug - Iter " << currIter << ": oscillationCounter=" << oscillationCounter
+         << " persistentOscillation=" << persistentOscillation << endl;
+  }
+  if (persistentOscillation) {
+    reduceCFL |= true;
+  }
 
-      /* Store min and max CFL for reporting on the fine grid. */
+  /* Strong divergence safety: if the total change over the full buffer is large positive, reset. */
+  if (rank == MASTER_NODE && currIter % 50 == 0) {
+    cout << "Divergence Debug - Iter " << currIter << ": totalChange=" << totalChange
+         << " slopeUp=" << slopeUp << endl;
+  }
+  if (totalChange > 2.0 || (slopeUp && totalChange > 0.0)) { // 2 orders worse OR slope issues with net divergence
+    resetCFL = true;
+    NonLinRes_Counter = 0;
+    for (auto& val : NonLinRes_Series) val = 0.0;
+  }
 
-      if ((iMesh == MESH_0) && fullComms) {
-        myCFLMin = min(CFL,myCFLMin);
-        myCFLMax = max(CFL,myCFLMax);
-        myCFLSum += CFL;
-      }
+  /* Stagnation guard: if over a recent window there is essentially no improvement,
+     lower CFL to escape the stall. Only trigger if we're truly stalled (near zero or
+     positive change) and not conflicting with other indicators showing good progress. */
+  const unsigned long STALL_WINDOW = min(100ul, params.Res_Count);
+  if (config->GetInnerIter() >= STALL_WINDOW) {
+    su2double stallChange = 0.0;
+    for (unsigned long i = 0; i < STALL_WINDOW; i++) {
+      unsigned long idx = (NonLinRes_Counter + params.Res_Count - 1 - i) % params.Res_Count;
+      stallChange += NonLinRes_Series[idx];
+    }
+    /* Only flag as stalled if change is very small or positive, AND we're not already
+       seeing good convergence indicators (canIncrease means linear solver is doing well).
+       Tightened threshold from -0.1 to -0.01 to avoid false positives on slow but steady convergence. */
+    const su2double STALL_TOL = -0.01; /* about 0.01 log10 improvement threshold over window */
+    bool isStalled = (stallChange > STALL_TOL);
+
+    /* Note: canIncrease not passed to this function, so we remove the canIncrease check from stagnation logic.
+       The stagnation guard will now trigger based solely on the stallChange metric. */
+    if (rank == MASTER_NODE && currIter % 50 == 0) {
+      cout << "Stagnation Debug - Iter " << currIter << ": stallChange=" << stallChange
+           << " STALL_TOL=" << STALL_TOL << " isStalled=" << isStalled
+           << " will_reduce=" << (isStalled || (stallChange > 0.0)) << endl;
+    }
 
+    if (isStalled || stallChange > 0.0) {
+      reduceCFL = true;
     }
-    END_SU2_OMP_FOR
+  }
+}
 
-    /* Reduce the min/max/avg local CFL numbers. */
+void CSolver::AdaptCFLNumber(CGeometry **geometry,
+                             CSolver   ***solver_container,
+                             CConfig   *config) {
 
-    if ((iMesh == MESH_0) && fullComms) {
-      SU2_OMP_CRITICAL
-      { /* OpenMP reduction. */
-        Min_CFL_Local = min(Min_CFL_Local,myCFLMin);
-        Max_CFL_Local = max(Max_CFL_Local,myCFLMax);
-        Avg_CFL_Local += myCFLSum;
-      }
-      END_SU2_OMP_CRITICAL
+  /* Adapt the CFL number on all multigrid levels using an
+   exponential progression with under-relaxation approach. */
+
+  const CFLAdaptParams params = InitializeCFLAdaptParams(config);
+  //const bool fullComms = (config->GetComm_Level() == COMM_FULL);
+
+  bool reduceCFL = false;
+  bool resetCFL = false;
+  bool canIncrease = false;
+
+  for (unsigned short iMesh = 0; iMesh <= config->GetnMGLevels(); iMesh++) {
+    if (iMesh == MESH_0) {
+
+      /* Store the mean flow, and turbulence solvers more clearly. */
+
+      CSolver *solverFlow = solver_container[iMesh][FLOW_SOL];
+      CSolver *solverTurb = solver_container[iMesh][TURB_SOL];
+      CSolver *solverSpecies = solver_container[iMesh][SPECIES_SOL];
+
+      /* Check whether we achieved the requested reduction in the linear
+         solver residual within the specified number of linear iterations. */
+      const su2double linRes = ComputeMaxLinearResidual(solverFlow, solverTurb, solverSpecies);
+
+      /* Tolerance limited to an acceptable value. */
+      const su2double linTol = max(params.acceptableLinTol, config->GetLinear_Solver_Error());
+
+      /* Check that we are meeting our nonlinear residual reduction target
+       over time so that we do not get stuck in limit cycles, this is done
+       on the fine grid and applied to all others. */
 
       BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS
-      { /* MPI reduction. */
-        myCFLMin = Min_CFL_Local; myCFLMax = Max_CFL_Local; myCFLSum = Avg_CFL_Local;
-        SU2_MPI::Allreduce(&myCFLMin, &Min_CFL_Local, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());
-        SU2_MPI::Allreduce(&myCFLMax, &Max_CFL_Local, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm());
-        SU2_MPI::Allreduce(&myCFLSum, &Avg_CFL_Local, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
-        Avg_CFL_Local /= su2double(geometry[iMesh]->GetGlobal_nPointDomain());
+      { /* Only the master thread updates the shared variables. */
+
+      unsigned long iter = config->GetMultizone_Problem() ? config->GetOuterIter() : config->GetInnerIter();
+
+      /* Determine CFL adjustment flags based on linear solver performance */
+      DetermineLinearSolverBasedCFLFlags(params, config, linRes, linTol, reduceCFL, resetCFL, canIncrease);
+
+     if (params.Res_Count > 0) {
+        /* Track residual history and detect flip-flop oscillations */
+        TrackResidualHistory(params, config, solver_container[iMesh], iMesh,
+                             New_Func, Old_Func, reduceCFL, resetCFL);
+
+        /* FAST DIVERGENCE DETECTION: Catch issues in 1-10 iterations before they become catastrophic.
+           These checks run before the longer-term oscillation detection below.
+           Only activate after startingIter to allow initial convergence analysis. */
+
+        if (iter >= params.startingIter) {
+          DetectFastDivergence(params, config, New_Func, Old_Func, reduceCFL, resetCFL);
+        }
+
+        /* Detect oscillations and divergence using peak-valley progression.
+           Only activate after startingIter to allow initial convergence analysis. */
+
+        if (iter >= params.startingIter && config->GetInnerIter() >= 3) {
+          DetectPeakValley(params, config, New_Func, reduceCFL, resetCFL);
+        }
       }
-      END_SU2_OMP_SAFE_GLOBAL_ACCESS
+
+    /* Synchronize CFL decision flags across all MPI ranks */
+    SynchronizeCFLFlags(reduceCFL, resetCFL, canIncrease);
+
+    if (rank == MASTER_NODE && iter % 50 == 0) {
+      cout << "MPI-Synchronized CFL Flags - Iter " << iter
+           << ": reduceCFL=" << reduceCFL
+           << " resetCFL=" << resetCFL
+           << " canIncrease=" << canIncrease << endl;
     }
 
-  }
+    } /* End safe global access, now all threads update the CFL number. */
+    END_SU2_OMP_SAFE_GLOBAL_ACCESS
 
+    /* Apply CFL adaptation to all points on this grid */
+    ApplyCFLToAllPoints(geometry[iMesh], solver_container[iMesh], config, iMesh,
+                        reduceCFL, resetCFL, canIncrease, params.startingIter);
+  } else {
+    /* For coarse grids, apply CFL respecting the fine grid stability flags.
+       This ensures that if fine grid detected divergence/oscillation, coarse grids
+       also reduce CFL instead of blindly using an aggressive multiplier. */
+    ApplyCFLToCoarseGrid(geometry[iMesh], solver_container[iMesh], config, iMesh,
+                         reduceCFL, resetCFL, canIncrease, params.startingIter);
+  }
+}
 }
 
+
+
+
+
+
 void CSolver::SetResidual_RMS(const CGeometry *geometry, const CConfig *config) {
 
   if (geometry->GetMGLevel() != MESH_0) return;
diff --git a/SU2_CFD/src/solvers/CSolverFactory.cpp b/SU2_CFD/src/solvers/CSolverFactory.cpp
index 2246431be24b..75fca2c856fc 100644
--- a/SU2_CFD/src/solvers/CSolverFactory.cpp
+++ b/SU2_CFD/src/solvers/CSolverFactory.cpp
@@ -310,6 +310,7 @@ CSolver* CSolverFactory::CreateSubSolver(SUB_SOLVER_TYPE kindSolver, CSolver **s
     case SUB_SOLVER_TYPE::TURB_SST:
       genericSolver = CreateTurbSolver(kindTurbModel, solver, geometry, config, iMGLevel, false);
       metaData.integrationType = INTEGRATION_TYPE::SINGLEGRID;
+      //metaData.integrationType = INTEGRATION_TYPE::MULTIGRID;
       break;
     case SUB_SOLVER_TYPE::TEMPLATE:
       genericSolver = new CTemplateSolver(geometry, config);
diff --git a/TestCases/wallfunctions/flatplate/compressible_SA/turb_SA_flatplate.cfg b/TestCases/wallfunctions/flatplate/compressible_SA/turb_SA_flatplate.cfg
index e44ebd97bdb4..20bfd1347bcf 100644
--- a/TestCases/wallfunctions/flatplate/compressible_SA/turb_SA_flatplate.cfg
+++ b/TestCases/wallfunctions/flatplate/compressible_SA/turb_SA_flatplate.cfg
@@ -100,5 +100,5 @@ MARKER_PLOTTING= ( wall )
 MARKER_MONITORING= ( wall )
 %
 OUTPUT_FILES= RESTART, PARAVIEW_MULTIBLOCK
-VOLUME_OUTPUT= RESIDUAL, PRIMITIVE, RESIDUAL
+VOLUME_OUTPUT= RESIDUAL, PRIMITIVE
 OUTPUT_WRT_FREQ= 1000
diff --git a/TestCases/wallfunctions/flatplate/compressible_SST/turb_SST_flatplate.cfg b/TestCases/wallfunctions/flatplate/compressible_SST/turb_SST_flatplate.cfg
index b2f9c515502f..122ce950cd01 100644
--- a/TestCases/wallfunctions/flatplate/compressible_SST/turb_SST_flatplate.cfg
+++ b/TestCases/wallfunctions/flatplate/compressible_SST/turb_SST_flatplate.cfg
@@ -100,5 +100,5 @@ MARKER_PLOTTING= ( wall )
 MARKER_MONITORING= ( wall )
 %
 OUTPUT_FILES= RESTART, PARAVIEW_MULTIBLOCK
-VOLUME_OUTPUT= RESIDUAL, PRIMITIVE, RESIDUAL
+VOLUME_OUTPUT= RESIDUAL, PRIMITIVE
 OUTPUT_WRT_FREQ= 1000
diff --git a/config_template.cfg b/config_template.cfg
index 970609876827..b5508130027c 100644
--- a/config_template.cfg
+++ b/config_template.cfg
@@ -1625,12 +1625,23 @@ MG_POST_SMOOTH= ( 0, 0, 0, 0 )
 % Jacobi implicit smoothing of the correction
 MG_CORRECTION_SMOOTH= ( 0, 0, 0, 0 )
 %
+% Smoothing coefficient for multigrid correction smoothing (1.25 by default)
+MG_SMOOTH_COEFF= 1.25
+%
 % Damping factor for the residual restriction
 MG_DAMP_RESTRICTION= 0.75
 %
 % Damping factor for the correction prolongation
 MG_DAMP_PROLONGATION= 0.75
 
+% Minimum number of global mesh points (finest grid) used to decide
+% whether additional multigrid levels are meaningful. During startup the
+% code may compute an "effective" number of MG levels based on the global
+% finest-grid point count and this threshold. Smaller meshes may reduce the
+% effective MG depth automatically. Default = 1000
+MG_MIN_MESHSIZE= 1000
+
+
 % -------------------------- MESH SMOOTHING -----------------------------%
 %
 % Before each computation, implicitly smooth the nodal coordinates