From 7569e5202ae1862e47998bacd06dc82ed212c84e Mon Sep 17 00:00:00 2001 From: Kirthi Shankar Sivamani Date: Thu, 1 Jan 2026 14:31:43 +0000 Subject: [PATCH] Fix barrier ID Signed-off-by: Kirthi Shankar Sivamani --- .../group_row_cast_col_hadamard_transform_cast_fusion.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/transformer_engine/common/hadamard_transform/group_row_cast_col_hadamard_transform_cast_fusion.cu b/transformer_engine/common/hadamard_transform/group_row_cast_col_hadamard_transform_cast_fusion.cu index 3932b328ae..175217a3c2 100644 --- a/transformer_engine/common/hadamard_transform/group_row_cast_col_hadamard_transform_cast_fusion.cu +++ b/transformer_engine/common/hadamard_transform/group_row_cast_col_hadamard_transform_cast_fusion.cu @@ -983,7 +983,9 @@ __launch_bounds__(512, 1) __global__ static void group_row_col_rht_gemm_device( Tensor tQAgSFA = thr_r2g_SFA.partition_S(gSFA_mn); Tensor tQArSFA = make_tensor_like(tQAgSFA(_, _, _, _0{}, _0{})); - int row_quant_barrier_id = 10; + // Will result in barrier_id=10 passed to bar.sync instr as cutlass adds 8 + // in order to go over the reserved named barrier count. + constexpr int row_quant_barrier_id = 2; cutlass::arch::NamedBarrier::sync(NumEpilogueRowQuantThreadCount, row_quant_barrier_id); int group_idx = GetGroupIdx(&args, scheduler.tile_n_base() * size<1>(epilogue_tiler));