Support updating expert locations dynamically (#6388)

This commit is contained in:
fzyzcjy
2025-05-22 12:59:33 +08:00
committed by GitHub
parent 121f92c583
commit fc992a09f9
5 changed files with 723 additions and 0 deletions

View File

@@ -317,6 +317,13 @@ class DeepseekV2MoE(nn.Module):
def _enable_deepep_moe(self):
return global_server_args_dict["enable_deepep_moe"]
def get_moe_weights(self):
return [
x.data
for name, x in self.experts.named_parameters()
if name not in ["correction_bias"]
]
def op_gate(self, state):
if (not self._enable_deepep_moe) or is_non_idle_and_non_empty(
state.forward_batch.forward_mode, state.hidden_states_mlp_input
@@ -1599,6 +1606,14 @@ class DeepseekV2ForCausalLM(nn.Module):
self_attn.w_vc = w_vc.contiguous()
self_attn.use_deep_gemm_bmm = True
# TODO support nextn later
if not is_nextn:
self.routed_experts_weights_of_layer = {
layer_id: layer.mlp.get_moe_weights()
for layer_id, layer in enumerate(self.model.layers)
if isinstance(layer.mlp, DeepseekV2MoE)
}
def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]], is_nextn=False):
if is_nextn:
if hasattr(self.config, "num_nextn_predict_layers"):