fix nz for quantization (#4943)

quantization ops rely on NZ by force, we should remove the nz check for it.

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-12-12 14:54:41 +08:00
committed by GitHub
parent 5932abc446
commit 01a13a9b77
4 changed files with 11 additions and 14 deletions

View File

@@ -81,10 +81,9 @@ def is_enable_nz(dtype: Optional[torch.dtype] = torch.int8,
"vllm_config must be provided when _ENABLE_NZ is None")
_ENABLE_NZ = envs_ascend.VLLM_ASCEND_ENABLE_NZ and vllm_config.model_config.hf_config.model_type != "qwen3_next"
_IS_EAGLE_MODE = (
vllm_config.speculative_config is not None and
getattr(vllm_config.speculative_config, 'method', None) in ("eagle", "eagle3")
)
_IS_EAGLE_MODE = (vllm_config.speculative_config is not None
and getattr(vllm_config.speculative_config, 'method',
None) in ("eagle", "eagle3"))
if dtype in [torch.float16, torch.bfloat16]:
return _ENABLE_NZ if _IS_EAGLE_MODE else False