[1/N][Refactor][Quantization] remove redundant quantizer class (#2680)
### What this PR does / why we need it?
AscendQuantizer/LLMQuantizer class is used to select quant method based
on quant config and some other arguments,
but it is more simple and clean replacing these classes with map. So i
remove them.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
ut and e2e test
- vLLM version: v0.10.1.1
- vLLM main:
6997a25ac6
Signed-off-by: 22dimensions <waitingwind@foxmail.com>
This commit is contained in:
@@ -156,33 +156,22 @@ class TestAscendKVCacheMethod(TestBase):
|
||||
def setUp(self):
|
||||
# Setup common test fixtures
|
||||
self.mock_quant_config = MagicMock(spec=AscendQuantConfig)
|
||||
self.mock_quant_config.quant_description = {"some_config": "value"}
|
||||
self.prefix = "attention_layer"
|
||||
self.mock_quant_config.quant_description = {"kv_quant_type": "C8"}
|
||||
self.prefix = "layer.attn"
|
||||
|
||||
# Mock the quantizer and quant_method
|
||||
self.mock_quantizer = MagicMock()
|
||||
# Mock quant_method
|
||||
self.mock_quant_method = MagicMock()
|
||||
|
||||
# Patch the AscendQuantizer
|
||||
self.quantizer_patcher = patch(
|
||||
'vllm_ascend.quantization.quant_config.AscendQuantizer.get_quantizer',
|
||||
return_value=self.mock_quantizer)
|
||||
self.mock_get_quantizer = self.quantizer_patcher.start()
|
||||
|
||||
self.mock_quantizer.build_attention_method.return_value = self.mock_quant_method
|
||||
self.patcher = patch(
|
||||
'vllm_ascend.quantization.quant_config.get_quant_method')
|
||||
self.mock_get_quant_method = self.patcher.start()
|
||||
self.mock_get_quant_method.return_value = self.mock_quant_method
|
||||
|
||||
# Create instance
|
||||
self.kv_cache_method = AscendKVCacheMethod(self.mock_quant_config,
|
||||
self.prefix)
|
||||
|
||||
def tearDown(self):
|
||||
self.quantizer_patcher.stop()
|
||||
|
||||
def test_init(self):
|
||||
"""Test initialization with proper quantizer setup."""
|
||||
self.mock_get_quantizer.assert_called_once_with(
|
||||
self.mock_quant_config.quant_description, self.prefix)
|
||||
self.mock_quantizer.build_attention_method.assert_called_once()
|
||||
self.patcher.stop()
|
||||
|
||||
def test_create_weights(self):
|
||||
"""Test create_weights delegates to quant_method."""
|
||||
|
||||
Reference in New Issue
Block a user