[1/N][Refactor][Quantization] remove redundant quantizer class (#2680)

### What this PR does / why we need it? AscendQuantizer/LLMQuantizer class is used to select quant method based on quant config and some other arguments, but it is more simple and clean replacing these classes with map. So i remove them. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? ut and e2e test - vLLM version: v0.10.1.1 - vLLM main: 6997a25ac6 Signed-off-by: 22dimensions <waitingwind@foxmail.com>
2025-09-04 11:35:14 +08:00
parent d4370ebc42
commit 37f5a29cd4
10 changed files with 321 additions and 554 deletions
--- a/tests/ut/quantization/test_utils.py
+++ b/tests/ut/quantization/test_utils.py
@@ -0,0 +1,62 @@
+import types
+
+from tests.ut.base import TestBase
+from vllm_ascend.quantization.utils import (ASCEND_QUANTIZATION_METHOD_MAP,
+                                            get_quant_method)
+
+
+class TestGetQuantMethod(TestBase):
+
+    def setUp(self):
+        self.original_quantization_method_map = ASCEND_QUANTIZATION_METHOD_MAP.copy(
+        )
+        for quant_type, layer_map in ASCEND_QUANTIZATION_METHOD_MAP.items():
+            for layer_type in layer_map.keys():
+                ASCEND_QUANTIZATION_METHOD_MAP[quant_type][
+                    layer_type] = types.new_class(f"{quant_type}_{layer_type}")
+
+    def tearDown(self):
+        # Restore original map
+        ASCEND_QUANTIZATION_METHOD_MAP.clear()
+        ASCEND_QUANTIZATION_METHOD_MAP.update(
+            self.original_quantization_method_map)
+
+    def test_linear_quant_methods(self):
+        for quant_type, layer_map in ASCEND_QUANTIZATION_METHOD_MAP.items():
+            if "linear" in layer_map.keys():
+                prefix = "linear_layer"
+                cls = layer_map["linear"]
+                method = get_quant_method({"linear_layer.weight": quant_type},
+                                          prefix, "linear")
+                self.assertIsInstance(method, cls)
+
+    def test_moe_quant_methods(self):
+        for quant_type, layer_map in ASCEND_QUANTIZATION_METHOD_MAP.items():
+            if "moe" in layer_map.keys():
+                prefix = "layer"
+                cls = layer_map["moe"]
+                method = get_quant_method({"layer.weight": quant_type}, prefix,
+                                          "moe")
+                self.assertIsInstance(method, cls)
+
+    def test_with_fa_quant_type(self):
+        quant_description = {"fa_quant_type": "C8"}
+        method = get_quant_method(quant_description, ".attn", "attention")
+        self.assertIsInstance(
+            method, ASCEND_QUANTIZATION_METHOD_MAP["C8"]["attention"])
+
+    def test_with_kv_quant_type(self):
+        quant_description = {"kv_quant_type": "C8"}
+        method = get_quant_method(quant_description, ".attn", "attention")
+        self.assertIsInstance(
+            method, ASCEND_QUANTIZATION_METHOD_MAP["C8"]["attention"])
+
+    def test_invalid_layer_type(self):
+        quant_description = {"linear_layer.weight": "W8A8"}
+        with self.assertRaises(NotImplementedError):
+            get_quant_method(quant_description, "linear_layer", "unsupported")
+
+    def test_invalid_quant_type(self):
+        quant_description = {"linear_layer.weight": "UNKNOWN"}
+        with self.assertRaises(NotImplementedError):
+            get_quant_method(quant_description, "linear_layer", "linear")