For nz unset in bf16&fp16 (#4495)

### What this PR does / why we need it? disable NZ for float weight case. This is only a quick fix for dev branch. For main branch, we'll consider more case to make it more common. ### Does this PR introduce _any_ user-facing change?  ### How was this patch tested? qwen2.5 32B <img width="441" height="221" alt="image" src="https://github.com/user-attachments/assets/7ae18ffd-1ce2-43d9-9960-be45250ad0da" /> --------- Signed-off-by: 刘哲续 <liuzhexu1@huawei.com> Co-authored-by: 刘哲续 <liuzhexu1@huawei.com>
2025-11-28 17:32:25 +08:00
parent 96c362361e
commit 71acc8ddeb
10 changed files with 16 additions and 14 deletions
--- a/vllm_ascend/utils.py
+++ b/vllm_ascend/utils.py
@@ -71,13 +71,16 @@ def is_310p():
    return _IS_310P


-def is_enable_nz(vllm_config: Optional[VllmConfig] = None) -> bool:
+def is_enable_nz(dtype: Optional[torch.dtype] = torch.int8,
+                 vllm_config: Optional[VllmConfig] = None) -> bool:
    global _ENABLE_NZ
    if _ENABLE_NZ is None:
        if not vllm_config:
            raise ValueError(
                "vllm_config must be provided when _ENABLE_NZ is None")
        _ENABLE_NZ = envs_ascend.VLLM_ASCEND_ENABLE_NZ and vllm_config.model_config.hf_config.model_type != "qwen3_next"
+    if dtype in [torch.float16, torch.bfloat16]:
+        return False
    return _ENABLE_NZ