refactor qwen moe code, use communicator to support tp+dp (#6581)

This commit is contained in:
Yi Zhang
2025-05-26 14:01:10 +08:00
committed by GitHub
parent fc419b62e8
commit 65f091310c
5 changed files with 79 additions and 380 deletions

View File

@@ -95,6 +95,7 @@ from sglang.srt.utils import (
get_int_env_var,
is_cuda,
is_hip,
is_non_idle_and_non_empty,
log_info_on_rank0,
)
@@ -206,14 +207,6 @@ class MoEGate(nn.Module):
return logits
def is_non_idle_and_non_empty(forward_mode, hidden_states):
return (
(forward_mode is not None)
and not forward_mode.is_idle()
and hidden_states.shape[0] > 0
)
class DeepseekV2MoE(nn.Module):
def __init__(