[0.11.0] [Cherry-pick #4058] Fixes Qwen3-Next enable nz accuracy problem (#4056)

### What this PR does / why we need it?
- Fixes Qwen3-Next enable nz accuracy problem

---------

Signed-off-by: wxsIcey <1790571317@qq.com>
Signed-off-by: Icey <1790571317@qq.com>
This commit is contained in:
Icey
2025-11-10 20:56:39 +08:00
committed by GitHub
parent ebd45b6596
commit c5fe179cef
7 changed files with 37 additions and 9 deletions

View File

@@ -60,6 +60,7 @@ class TestAscendW4A8DynamicLinearMethod(TestBase):
self.assertEqual(params["scale_bias"].dtype, torch.float32)
self.assertEqual(params["scale_bias"].shape, (32, 16))
@patch('vllm_ascend.utils._ENABLE_NZ', True)
@patch('torch_npu.npu_convert_weight_to_int4pack')
@patch('torch.Tensor.npu')
def test_process_weights_after_loading(self, mock_npu,
@@ -260,6 +261,7 @@ class TestAscendW4A8DynamicFusedMoEMethod(TestBase):
requires_grad=False)
return layer
@patch('vllm_ascend.utils._ENABLE_NZ', True)
@patch('torch_npu.npu_format_cast')
@patch('torch_npu.npu_quantize')
@patch('torch.Tensor.npu')