Revert "[Feature][Quant] Auto-detect quantization format from model f… (#6873)

This reverts commit 3953dcf784. to keep the basic functions available --------- Signed-off-by: wangli <wangli858794774@gmail.com>
2026-03-10 11:27:32 +08:00
parent 40f7d93f1a
commit 33234aa0c5
7 changed files with 12 additions and 584 deletions
--- a/tests/ut/test_platform.py
+++ b/tests/ut/test_platform.py
@@ -125,14 +125,13 @@ class TestNPUPlatform(TestBase):
        self.assertIsNone(self.platform.inference_mode())
        mock_inference_mode.assert_called_once()

-    @patch("vllm_ascend.quantization.utils.maybe_auto_detect_quantization")
    @patch("vllm_ascend.ascend_config.init_ascend_config")
    @patch("vllm_ascend.utils.update_aclgraph_sizes")
    @patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType.A3)
    @patch("os.environ", {})
    @patch("vllm_ascend.core.recompute_scheduler.RecomputeSchedulerConfig.initialize_from_config")
    def test_check_and_update_config_basic_config_update(
-        self, mock_init_recompute, mock_soc_version, mock_update_acl, mock_init_ascend, mock_auto_detect
+        self, mock_init_recompute, mock_soc_version, mock_update_acl, mock_init_ascend
    ):
        mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config()
        vllm_config = TestNPUPlatform.mock_vllm_config()
@@ -156,12 +155,11 @@ class TestNPUPlatform(TestBase):

        mock_init_ascend.assert_called_once_with(vllm_config)

-    @patch("vllm_ascend.quantization.utils.maybe_auto_detect_quantization")
    @patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType.A3)
    @patch("vllm_ascend.ascend_config.init_ascend_config")
    @patch("vllm_ascend.core.recompute_scheduler.RecomputeSchedulerConfig.initialize_from_config")
    def test_check_and_update_config_no_model_config_warning(
-        self, mock_init_recompute, mock_init_ascend, mock_soc_version, mock_auto_detect
+        self, mock_init_recompute, mock_init_ascend, mock_soc_version
    ):
        mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config()
        vllm_config = TestNPUPlatform.mock_vllm_config()
@@ -183,11 +181,10 @@ class TestNPUPlatform(TestBase):

        self.assertTrue("Model config is missing" in cm.output[0])

-    @patch("vllm_ascend.quantization.utils.maybe_auto_detect_quantization")
    @patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType.A3)
    @patch("vllm_ascend.ascend_config.init_ascend_config")
    @patch("vllm_ascend.core.recompute_scheduler.RecomputeSchedulerConfig.initialize_from_config")
-    def test_check_and_update_config_enforce_eager_mode(self, mock_init_recompute, mock_init_ascend, mock_soc_version, mock_auto_detect):
+    def test_check_and_update_config_enforce_eager_mode(self, mock_init_recompute, mock_init_ascend, mock_soc_version):
        mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config()
        vllm_config = TestNPUPlatform.mock_vllm_config()
        vllm_config.model_config.enforce_eager = True
@@ -218,12 +215,11 @@ class TestNPUPlatform(TestBase):
            CUDAGraphMode.NONE,
        )

-    @patch("vllm_ascend.quantization.utils.maybe_auto_detect_quantization")
    @patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType.A3)
    @patch("vllm_ascend.ascend_config.init_ascend_config")
    @patch("vllm_ascend.core.recompute_scheduler.RecomputeSchedulerConfig.initialize_from_config")
    def test_check_and_update_config_unsupported_compilation_level(
-        self, mock_init_recompute, mock_init_ascend, mock_soc_version, mock_auto_detect
+        self, mock_init_recompute, mock_init_ascend, mock_soc_version
    ):
        mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config()
        vllm_config = TestNPUPlatform.mock_vllm_config()
@@ -257,10 +253,9 @@ class TestNPUPlatform(TestBase):
            )

    @pytest.mark.skip("Revert me when vllm support setting cudagraph_mode on oot platform")
-    @patch("vllm_ascend.quantization.utils.maybe_auto_detect_quantization")
    @patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType.A3)
    @patch("vllm_ascend.ascend_config.init_ascend_config")
-    def test_check_and_update_config_unsupported_cudagraph_mode(self, mock_init_ascend, mock_soc_version, mock_auto_detect):
+    def test_check_and_update_config_unsupported_cudagraph_mode(self, mock_init_ascend, mock_soc_version):
        mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config()
        vllm_config = TestNPUPlatform.mock_vllm_config()
        vllm_config.model_config.enforce_eager = False
@@ -282,12 +277,11 @@ class TestNPUPlatform(TestBase):
                CUDAGraphMode.NONE,
            )

-    @patch("vllm_ascend.quantization.utils.maybe_auto_detect_quantization")
    @patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType.A3)
    @patch("vllm_ascend.ascend_config.init_ascend_config")
    @patch("vllm_ascend.core.recompute_scheduler.RecomputeSchedulerConfig.initialize_from_config")
    def test_check_and_update_config_cache_config_block_size(
-        self, mock_init_recompute, mock_init_ascend, mock_soc_version, mock_auto_detect
+        self, mock_init_recompute, mock_init_ascend, mock_soc_version
    ):
        mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config()
        vllm_config = TestNPUPlatform.mock_vllm_config()
@@ -307,12 +301,11 @@ class TestNPUPlatform(TestBase):

        self.assertEqual(vllm_config.cache_config.block_size, 128)

-    @patch("vllm_ascend.quantization.utils.maybe_auto_detect_quantization")
    @patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType.A3)
    @patch("vllm_ascend.ascend_config.init_ascend_config")
    @patch("vllm_ascend.core.recompute_scheduler.RecomputeSchedulerConfig.initialize_from_config")
    def test_check_and_update_config_v1_worker_class_selection(
-        self, mock_init_recompute, mock_init_ascend, mock_soc_version, mock_auto_detect
+        self, mock_init_recompute, mock_init_ascend, mock_soc_version
    ):
        mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config()
        vllm_config = TestNPUPlatform.mock_vllm_config()
@@ -343,11 +336,10 @@ class TestNPUPlatform(TestBase):
            "vllm_ascend.xlite.xlite_worker.XliteWorker",
        )

-    @patch("vllm_ascend.quantization.utils.maybe_auto_detect_quantization")
    @patch("vllm_ascend.ascend_config.init_ascend_config")
    @patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType._310P)
    @patch("vllm_ascend.core.recompute_scheduler.RecomputeSchedulerConfig.initialize_from_config")
-    def test_check_and_update_config_310p_no_custom_ops(self, mock_init_recompute, mock_soc_version, mock_init_ascend, mock_auto_detect):
+    def test_check_and_update_config_310p_no_custom_ops(self, mock_init_recompute, mock_soc_version, mock_init_ascend):
        mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config()
        vllm_config = TestNPUPlatform.mock_vllm_config()
        vllm_config.compilation_config.custom_ops = []