[Ascend]optimize Qwen3 on Ascend (#10574)

Co-authored-by: c30031083 <chenxu140@huawei.com>
This commit is contained in:
ronnie_zheng
2025-09-23 03:18:36 +03:00
committed by GitHub
parent 095093ee5a
commit e22f3a5ec9
6 changed files with 81 additions and 2 deletions

View File

@@ -638,6 +638,7 @@ class NPU_W8A8LinearMethodImpl:
layer.weight.data = layer.weight.data.transpose(0, 1).contiguous()
layer.weight_scale.data = torch.flatten(layer.weight_scale.data)
layer.weight_offset.data = torch.flatten(layer.weight_offset.data)
layer.weight.data = torch_npu.npu_format_cast(layer.weight.data, 29)
class NPU_W8A8LinearMethodMTImpl:
@@ -830,6 +831,7 @@ class NPU_W8A8DynamicLinearMethodImpl:
layer.weight_scale.data = layer.weight_scale.data.flatten()
layer.weight_scale_fp32 = layer.weight_scale.data.to(torch.float32)
layer.weight_offset.data = layer.weight_offset.data.flatten()
layer.weight.data = torch_npu.npu_format_cast(layer.weight.data, 29)
class NPU_W8A8DynamicLinearMethod(LinearMethodBase):