fix: temporary solution for DeepSeek V2 H100 layout conversion issue (#1060)

Co-authored-by: ispobock <ISPObaoke@163.com>
This commit is contained in:
Yineng Zhang
2024-08-13 13:48:54 +08:00
committed by GitHub
parent 162f3ccb01
commit 65915f9f3e

View File

@@ -275,7 +275,9 @@ def extend_attention_fwd(
BLOCK_DPE = 0
BLOCK_DV = Lv
if CUDA_CAPABILITY[0] >= 8:
if CUDA_CAPABILITY[0] >= 9:
BLOCK_M, BLOCK_N = (128, 64)
elif CUDA_CAPABILITY[0] >= 8:
BLOCK_M, BLOCK_N = (128, 128) if Lq <= 128 else (64, 64)
else:
BLOCK_M, BLOCK_N = (64, 64) if Lq <= 128 else (32, 32)