Fix error when disabling new DeepGEMM (#7198)
This commit is contained in:
@@ -584,8 +584,10 @@ class _DeepEPDispatcherImplLowLatency(_DeepEPDispatcherImplBase):
|
|||||||
use_fp8=use_fp8,
|
use_fp8=use_fp8,
|
||||||
async_finish=not self.return_recv_hook,
|
async_finish=not self.return_recv_hook,
|
||||||
return_recv_hook=self.return_recv_hook,
|
return_recv_hook=self.return_recv_hook,
|
||||||
round_scale=deep_gemm_wrapper.DEEPGEMM_V202506,
|
round_scale=deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM
|
||||||
use_ue8m0=deep_gemm_wrapper.DEEPGEMM_V202506,
|
and deep_gemm_wrapper.DEEPGEMM_V202506,
|
||||||
|
use_ue8m0=deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM
|
||||||
|
and deep_gemm_wrapper.DEEPGEMM_V202506,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return packed_recv_hidden, packed_recv_count, event, hook
|
return packed_recv_hidden, packed_recv_count, event, hook
|
||||||
|
|||||||
@@ -1914,7 +1914,10 @@ class DeepseekV2ForCausalLM(nn.Module):
|
|||||||
self_attn.w_vc = bind_or_assign(self_attn.w_vc, w_vc.contiguous())
|
self_attn.w_vc = bind_or_assign(self_attn.w_vc, w_vc.contiguous())
|
||||||
self_attn.use_deep_gemm_bmm = True
|
self_attn.use_deep_gemm_bmm = True
|
||||||
|
|
||||||
if deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0:
|
if (
|
||||||
|
deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM
|
||||||
|
and deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0
|
||||||
|
):
|
||||||
self._weight_requant_ue8m0()
|
self._weight_requant_ue8m0()
|
||||||
|
|
||||||
def _weight_requant_ue8m0(self):
|
def _weight_requant_ue8m0(self):
|
||||||
|
|||||||
Reference in New Issue
Block a user