Fix judgment condition for enabling Deepseek V3/R1 shared expert fusion optimization (#7371)
This commit is contained in:
@@ -1731,12 +1731,12 @@ class DeepseekV2ForCausalLM(nn.Module):
|
|||||||
disable_reason = None
|
disable_reason = None
|
||||||
if (
|
if (
|
||||||
not _is_cuda
|
not _is_cuda
|
||||||
or torch.cuda.get_device_capability("cuda") < (9, 0)
|
or torch.cuda.get_device_capability("cuda") < (8, 0)
|
||||||
or self.config.architectures[0] != architecture
|
or self.config.architectures[0] != architecture
|
||||||
or self.config.n_routed_experts != 256
|
or self.config.n_routed_experts != 256
|
||||||
or self.config.n_shared_experts != 1
|
or self.config.n_shared_experts != 1
|
||||||
):
|
):
|
||||||
disable_reason = "Only Deepseek V3/R1 on NV-platform with capability >= 90 can use shared experts fusion optimization."
|
disable_reason = "Only Deepseek V3/R1 on NV-platform with capability >= 80 can use shared experts fusion optimization."
|
||||||
elif (
|
elif (
|
||||||
global_server_args_dict["enable_deepep_moe"]
|
global_server_args_dict["enable_deepep_moe"]
|
||||||
or global_server_args_dict["enable_ep_moe"]
|
or global_server_args_dict["enable_ep_moe"]
|
||||||
@@ -2040,7 +2040,7 @@ class DeepseekV2ForCausalLM(nn.Module):
|
|||||||
|
|
||||||
if self.num_fused_shared_experts > 0:
|
if self.num_fused_shared_experts > 0:
|
||||||
assert self.num_fused_shared_experts == 1
|
assert self.num_fused_shared_experts == 1
|
||||||
logger.info("Shared experts fusion optimization enabled.")
|
log_info_on_rank0(logger, "Shared experts fusion optimization enabled.")
|
||||||
|
|
||||||
params_dict = dict(self.named_parameters())
|
params_dict = dict(self.named_parameters())
|
||||||
weight_names = []
|
weight_names = []
|
||||||
|
|||||||
Reference in New Issue
Block a user