For nz unset in bf16&fp16 (#4495)

<!--  Thanks for sending a pull request!

BEFORE SUBMITTING, PLEASE READ
https://docs.vllm.ai/en/latest/contributing/overview.html

-->
### What this PR does / why we need it?
disable NZ for float weight case. This is only a quick fix for dev
branch.

For main branch, we'll consider more case to make it more common.


### Does this PR introduce _any_ user-facing change?
<!--
Note that it means *any* user-facing change including all aspects such
as API, interface or other behavior changes.
Documentation-only updates are not considered user-facing changes.
-->

### How was this patch tested?
qwen2.5 32B
<img width="441" height="221" alt="image"
src="https://github.com/user-attachments/assets/7ae18ffd-1ce2-43d9-9960-be45250ad0da"
/>

---------

Signed-off-by: 刘哲续 <liuzhexu1@huawei.com>
Co-authored-by: 刘哲续 <liuzhexu1@huawei.com>
This commit is contained in:
henryxuxu0716
2025-11-28 17:32:25 +08:00
committed by GitHub
parent 96c362361e
commit 71acc8ddeb
10 changed files with 16 additions and 14 deletions

View File

@@ -71,13 +71,16 @@ def is_310p():
return _IS_310P
def is_enable_nz(vllm_config: Optional[VllmConfig] = None) -> bool:
def is_enable_nz(dtype: Optional[torch.dtype] = torch.int8,
vllm_config: Optional[VllmConfig] = None) -> bool:
global _ENABLE_NZ
if _ENABLE_NZ is None:
if not vllm_config:
raise ValueError(
"vllm_config must be provided when _ENABLE_NZ is None")
_ENABLE_NZ = envs_ascend.VLLM_ASCEND_ENABLE_NZ and vllm_config.model_config.hf_config.model_type != "qwen3_next"
if dtype in [torch.float16, torch.bfloat16]:
return False
return _ENABLE_NZ