[refactor] Remove unnecessary attributes from set_ascend_forward_context (#5204)

### What this PR does / why we need it? Remove unnecessary attributes from set_ascend_forward_context 1.prefetch_stream 2.weight_prefetch_method ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? - vLLM version: v0.12.0 - vLLM main: ad32e3e19c Signed-off-by: Wang Kunpeng <1289706727@qq.com>
2025-12-23 08:49:52 +08:00
parent 95e8a52156
commit c3a8d13ca7
10 changed files with 55 additions and 83 deletions
--- a/tests/ut/quantization/test_w8a8.py
+++ b/tests/ut/quantization/test_w8a8.py
@@ -63,21 +63,18 @@ class TestAscendW8A8LinearMethod(TestBase):
        self.assertEqual(params['weight_scale'].shape, (10, 1))
        self.assertEqual(params['weight_offset'].shape, (10, 1))

-    @patch("vllm_ascend.quantization.w8a8.get_forward_context")
+    @patch("vllm_ascend.quantization.w8a8.get_weight_prefetch_method")
    @patch("torch.ops.vllm.quantize")
    @patch("torch_npu.npu_quant_matmul")
    def test_apply_with_x_not_int8(self, mock_npu_quant_matmul, mock_quantize,
-                                   mock_get_forward_context):
+                                   mock_get_weight_prefetch_method):
        layer = MagicMock()
        layer.aclnn_input_scale = 0.1
        layer.aclnn_input_offset = 0.2
        layer.weight = torch.randn(128, 256)
        layer.deq_scale = 0.3

-        mock_forward_context = MagicMock()
-        mock_get_forward_context.return_value = mock_forward_context
-        mock_weight_prefetch_method = MagicMock()
-        mock_forward_context.weight_prefetch_method = mock_weight_prefetch_method
+        mock_get_weight_prefetch_method.return_value = MagicMock()

        x = torch.randn(32, 128)
        bias = torch.randn(256)