diff --git a/python/sglang/srt/layers/attention/triton_ops/extend_attention.py b/python/sglang/srt/layers/attention/triton_ops/extend_attention.py index 74ad872bc..608f9bab0 100644 --- a/python/sglang/srt/layers/attention/triton_ops/extend_attention.py +++ b/python/sglang/srt/layers/attention/triton_ops/extend_attention.py @@ -329,8 +329,8 @@ def extend_attention_fwd( BLOCK_DV = triton.next_power_of_2(Lv) if is_hip_: - BLOCK_M, BLOCK_N = (32, 32) - num_warps = 2 + BLOCK_M, BLOCK_N = (64, 64) + num_warps = 4 else: if is_cuda_available and CUDA_CAPABILITY[0] >= 9: