[Fix] ensure DeepGEMM is only enabled for FP8_W8A8 models (#8110)

This commit is contained in:
hzh0425
2025-07-17 12:33:29 +08:00
committed by GitHub
parent 9069884b51
commit 5c08a36cbf

View File

@@ -1272,6 +1272,12 @@ class DeepEPMoE(EPMoE):
routed_scaling_factor=routed_scaling_factor,
)
self.deepep_mode = deepep_mode
if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM:
assert self.use_fp8_w8a8, (
"DeepGEMM requires an fp8_w8a8 model; "
"alternatively, you can disable DeepGEMM by turning off the ENABLE_JIT_DEEPGEMM environment variable."
)
if self.deepep_mode.enable_low_latency():
assert (
deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM