[1/N][Refactor][Quantization] remove redundant quantizer class (#2680)

### What this PR does / why we need it?

AscendQuantizer/LLMQuantizer class is used to select quant method based
on quant config and some other arguments,
but it is more simple and clean replacing these classes with map. So i
remove them.

### Does this PR introduce _any_ user-facing change?
No 

### How was this patch tested?

ut and e2e test


- vLLM version: v0.10.1.1
- vLLM main:
6997a25ac6

Signed-off-by: 22dimensions <waitingwind@foxmail.com>
This commit is contained in:
22dimensions
2025-09-04 11:35:14 +08:00
committed by GitHub
parent d4370ebc42
commit 37f5a29cd4
10 changed files with 321 additions and 554 deletions

View File

@@ -156,33 +156,22 @@ class TestAscendKVCacheMethod(TestBase):
def setUp(self):
# Setup common test fixtures
self.mock_quant_config = MagicMock(spec=AscendQuantConfig)
self.mock_quant_config.quant_description = {"some_config": "value"}
self.prefix = "attention_layer"
self.mock_quant_config.quant_description = {"kv_quant_type": "C8"}
self.prefix = "layer.attn"
# Mock the quantizer and quant_method
self.mock_quantizer = MagicMock()
# Mock quant_method
self.mock_quant_method = MagicMock()
# Patch the AscendQuantizer
self.quantizer_patcher = patch(
'vllm_ascend.quantization.quant_config.AscendQuantizer.get_quantizer',
return_value=self.mock_quantizer)
self.mock_get_quantizer = self.quantizer_patcher.start()
self.mock_quantizer.build_attention_method.return_value = self.mock_quant_method
self.patcher = patch(
'vllm_ascend.quantization.quant_config.get_quant_method')
self.mock_get_quant_method = self.patcher.start()
self.mock_get_quant_method.return_value = self.mock_quant_method
# Create instance
self.kv_cache_method = AscendKVCacheMethod(self.mock_quant_config,
self.prefix)
def tearDown(self):
self.quantizer_patcher.stop()
def test_init(self):
"""Test initialization with proper quantizer setup."""
self.mock_get_quantizer.assert_called_once_with(
self.mock_quant_config.quant_description, self.prefix)
self.mock_quantizer.build_attention_method.assert_called_once()
self.patcher.stop()
def test_create_weights(self):
"""Test create_weights delegates to quant_method."""