Fix bug of deepseek-v3 under DP+EP mode with large batchsize/seqlen (#6449)

This commit is contained in:
likesen-alibaba
2025-07-10 16:19:56 +08:00
committed by GitHub
parent 5748241549
commit 4a0d19198b
2 changed files with 6 additions and 6 deletions

View File

@@ -160,8 +160,8 @@ def _per_token_group_quant_fp8_colmajor(
"""
# Map the program id to the row of X and Y it should compute.
g_id = tl.program_id(0)
y_ptr += g_id * group_size
y_q_ptr += g_id * group_size
y_ptr += g_id.to(tl.int64) * group_size
y_q_ptr += g_id.to(tl.int64) * group_size
# Convert g_id the flattened block coordinate to 2D so we can index
# into the output y_scales matrix