Fix 2-gpu CI test and suppress some warnings (#4930)

This commit is contained in:
Lianmin Zheng
2025-03-30 12:51:44 -07:00
committed by GitHub
parent f842853a40
commit 9adf178cc2
4 changed files with 21 additions and 19 deletions

View File

@@ -30,9 +30,6 @@ from sglang.srt.distributed import (
tensor_model_parallel_all_reduce,
)
from sglang.srt.layers.activation import SiluAndMul
from sglang.srt.layers.attention.triton_ops.rocm_mla_decode_rope import (
decode_attention_fwd_grouped_rope,
)
from sglang.srt.layers.dp_attention import (
dp_gather_partial,
dp_scatter,
@@ -83,6 +80,11 @@ if _is_cuda:
else:
from vllm import _custom_ops as ops
if _is_hip:
from sglang.srt.layers.attention.triton_ops.rocm_mla_decode_rope import (
decode_attention_fwd_grouped_rope,
)
expert_distribution_recorder = ExpertDistributionRecorder()