[refactor] Remove unnecessary attributes from set_ascend_forward_context (#5204)
### What this PR does / why we need it?
Remove unnecessary attributes from set_ascend_forward_context
1.prefetch_stream
2.weight_prefetch_method
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
Signed-off-by: Wang Kunpeng <1289706727@qq.com>
This commit is contained in:
@@ -63,21 +63,18 @@ class TestAscendW8A8LinearMethod(TestBase):
|
||||
self.assertEqual(params['weight_scale'].shape, (10, 1))
|
||||
self.assertEqual(params['weight_offset'].shape, (10, 1))
|
||||
|
||||
@patch("vllm_ascend.quantization.w8a8.get_forward_context")
|
||||
@patch("vllm_ascend.quantization.w8a8.get_weight_prefetch_method")
|
||||
@patch("torch.ops.vllm.quantize")
|
||||
@patch("torch_npu.npu_quant_matmul")
|
||||
def test_apply_with_x_not_int8(self, mock_npu_quant_matmul, mock_quantize,
|
||||
mock_get_forward_context):
|
||||
mock_get_weight_prefetch_method):
|
||||
layer = MagicMock()
|
||||
layer.aclnn_input_scale = 0.1
|
||||
layer.aclnn_input_offset = 0.2
|
||||
layer.weight = torch.randn(128, 256)
|
||||
layer.deq_scale = 0.3
|
||||
|
||||
mock_forward_context = MagicMock()
|
||||
mock_get_forward_context.return_value = mock_forward_context
|
||||
mock_weight_prefetch_method = MagicMock()
|
||||
mock_forward_context.weight_prefetch_method = mock_weight_prefetch_method
|
||||
mock_get_weight_prefetch_method.return_value = MagicMock()
|
||||
|
||||
x = torch.randn(32, 128)
|
||||
bias = torch.randn(256)
|
||||
|
||||
Reference in New Issue
Block a user