For nz unset in bf16&fp16 (#4495)
<!-- Thanks for sending a pull request! BEFORE SUBMITTING, PLEASE READ https://docs.vllm.ai/en/latest/contributing/overview.html --> ### What this PR does / why we need it? disable NZ for float weight case. This is only a quick fix for dev branch. For main branch, we'll consider more case to make it more common. ### Does this PR introduce _any_ user-facing change? <!-- Note that it means *any* user-facing change including all aspects such as API, interface or other behavior changes. Documentation-only updates are not considered user-facing changes. --> ### How was this patch tested? qwen2.5 32B <img width="441" height="221" alt="image" src="https://github.com/user-attachments/assets/7ae18ffd-1ce2-43d9-9960-be45250ad0da" /> --------- Signed-off-by: 刘哲续 <liuzhexu1@huawei.com> Co-authored-by: 刘哲续 <liuzhexu1@huawei.com>
This commit is contained in:
@@ -284,7 +284,7 @@ class AscendQwen2_5_VisionTransformer(Qwen2_5_VisionTransformer):
|
||||
dim=2)
|
||||
qkv_weight_final = qkv_weight_padded.reshape(-1, self.hidden_size)
|
||||
|
||||
if is_enable_nz():
|
||||
if is_enable_nz(qkv_weight_final.dtype):
|
||||
qkv_weight_final_copy = torch.empty_like(qkv_weight_final).copy_(
|
||||
qkv_weight_final)
|
||||
qkv_weight_final_copy = torch_npu.npu_format_cast(
|
||||
@@ -300,7 +300,7 @@ class AscendQwen2_5_VisionTransformer(Qwen2_5_VisionTransformer):
|
||||
(0, self.half_pad_hidden_size_per_attention_head, 0, 0)).reshape(
|
||||
self.hidden_size, -1)
|
||||
|
||||
if is_enable_nz():
|
||||
if is_enable_nz(out_weight.dtype):
|
||||
out_weight_copy = torch.empty_like(out_weight).copy_(out_weight)
|
||||
out_weight_copy = torch_npu.npu_format_cast(
|
||||
out_weight_copy, ACL_FORMAT_FRACTAL_ND)
|
||||
|
||||
@@ -268,7 +268,7 @@ class AscendQwen2VisionTransformer(Qwen2VisionTransformer):
|
||||
dim=2)
|
||||
qkv_weight_final = qkv_weight_padded.reshape(-1, self.hidden_size)
|
||||
|
||||
if is_enable_nz():
|
||||
if is_enable_nz(qkv_weight_final.dtype):
|
||||
qkv_weight_final_copy = torch.empty_like(qkv_weight_final).copy_(
|
||||
qkv_weight_final)
|
||||
qkv_weight_final_copy = torch_npu.npu_format_cast(
|
||||
@@ -284,7 +284,7 @@ class AscendQwen2VisionTransformer(Qwen2VisionTransformer):
|
||||
(0, self.half_pad_hidden_size_per_attention_head, 0, 0)).reshape(
|
||||
self.hidden_size, -1)
|
||||
|
||||
if is_enable_nz():
|
||||
if is_enable_nz(out_weight.dtype):
|
||||
out_weight_copy = torch.empty_like(out_weight).copy_(out_weight)
|
||||
out_weight_copy = torch_npu.npu_format_cast(
|
||||
out_weight_copy, ACL_FORMAT_FRACTAL_ND)
|
||||
|
||||
Reference in New Issue
Block a user