refactor qwen moe code, use communicator to support tp+dp (#6581)
This commit is contained in:
@@ -95,6 +95,7 @@ from sglang.srt.utils import (
|
||||
get_int_env_var,
|
||||
is_cuda,
|
||||
is_hip,
|
||||
is_non_idle_and_non_empty,
|
||||
log_info_on_rank0,
|
||||
)
|
||||
|
||||
@@ -206,14 +207,6 @@ class MoEGate(nn.Module):
|
||||
return logits
|
||||
|
||||
|
||||
def is_non_idle_and_non_empty(forward_mode, hidden_states):
|
||||
return (
|
||||
(forward_mode is not None)
|
||||
and not forward_mode.is_idle()
|
||||
and hidden_states.shape[0] > 0
|
||||
)
|
||||
|
||||
|
||||
class DeepseekV2MoE(nn.Module):
|
||||
|
||||
def __init__(
|
||||
|
||||
Reference in New Issue
Block a user