For nz unset in bf16&fp16 (#4495)

<!--  Thanks for sending a pull request!

BEFORE SUBMITTING, PLEASE READ
https://docs.vllm.ai/en/latest/contributing/overview.html

-->
### What this PR does / why we need it?
disable NZ for float weight case. This is only a quick fix for dev
branch.

For main branch, we'll consider more case to make it more common.


### Does this PR introduce _any_ user-facing change?
<!--
Note that it means *any* user-facing change including all aspects such
as API, interface or other behavior changes.
Documentation-only updates are not considered user-facing changes.
-->

### How was this patch tested?
qwen2.5 32B
<img width="441" height="221" alt="image"
src="https://github.com/user-attachments/assets/7ae18ffd-1ce2-43d9-9960-be45250ad0da"
/>

---------

Signed-off-by: 刘哲续 <liuzhexu1@huawei.com>
Co-authored-by: 刘哲续 <liuzhexu1@huawei.com>
This commit is contained in:
henryxuxu0716
2025-11-28 17:32:25 +08:00
committed by GitHub
parent 96c362361e
commit 71acc8ddeb
10 changed files with 16 additions and 14 deletions

View File

@@ -284,7 +284,7 @@ class AscendQwen2_5_VisionTransformer(Qwen2_5_VisionTransformer):
dim=2)
qkv_weight_final = qkv_weight_padded.reshape(-1, self.hidden_size)
if is_enable_nz():
if is_enable_nz(qkv_weight_final.dtype):
qkv_weight_final_copy = torch.empty_like(qkv_weight_final).copy_(
qkv_weight_final)
qkv_weight_final_copy = torch_npu.npu_format_cast(
@@ -300,7 +300,7 @@ class AscendQwen2_5_VisionTransformer(Qwen2_5_VisionTransformer):
(0, self.half_pad_hidden_size_per_attention_head, 0, 0)).reshape(
self.hidden_size, -1)
if is_enable_nz():
if is_enable_nz(out_weight.dtype):
out_weight_copy = torch.empty_like(out_weight).copy_(out_weight)
out_weight_copy = torch_npu.npu_format_cast(
out_weight_copy, ACL_FORMAT_FRACTAL_ND)

View File

@@ -268,7 +268,7 @@ class AscendQwen2VisionTransformer(Qwen2VisionTransformer):
dim=2)
qkv_weight_final = qkv_weight_padded.reshape(-1, self.hidden_size)
if is_enable_nz():
if is_enable_nz(qkv_weight_final.dtype):
qkv_weight_final_copy = torch.empty_like(qkv_weight_final).copy_(
qkv_weight_final)
qkv_weight_final_copy = torch_npu.npu_format_cast(
@@ -284,7 +284,7 @@ class AscendQwen2VisionTransformer(Qwen2VisionTransformer):
(0, self.half_pad_hidden_size_per_attention_head, 0, 0)).reshape(
self.hidden_size, -1)
if is_enable_nz():
if is_enable_nz(out_weight.dtype):
out_weight_copy = torch.empty_like(out_weight).copy_(out_weight)
out_weight_copy = torch_npu.npu_format_cast(
out_weight_copy, ACL_FORMAT_FRACTAL_ND)