Revert "[Feat] Unquantized linear nz support (#2619)" (#2896)

### What this PR does / why we need it? This reverts commit 7b2ecc1e9a. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed - vLLM version: main - vLLM main: 64d90c3e4f Closes: https://github.com/vllm-project/vllm-ascend/issues/2890 Closes: https://github.com/vllm-project/vllm-ascend/issues/2887 Closes: https://github.com/vllm-project/vllm-ascend/issues/2885 Signed-off-by: Yikun Jiang <yikunkero@gmail.com>
2025-09-12 20:51:12 +08:00
parent fc2bcbe21c
commit 756b8a1946
4 changed files with 10 additions and 111 deletions
--- a/tests/ut/quantization/test_quant_config.py
+++ b/tests/ut/quantization/test_quant_config.py
@@ -4,10 +4,10 @@ import torch
 from vllm.attention.layer import Attention
 from vllm.model_executor.layers.fused_moe import FusedMoE
 from vllm.model_executor.layers.fused_moe.config import FusedMoEConfig
-from vllm.model_executor.layers.linear import LinearBase
+from vllm.model_executor.layers.linear import (LinearBase,
+                                               UnquantizedLinearMethod)

 from tests.ut.base import TestBase
-from vllm_ascend.ops.linear import AscendUnquantizedLinearMethod
 from vllm_ascend.quantization.quant_config import (AscendKVCacheMethod,
                                                   AscendQuantConfig)
 from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD
@@ -79,7 +79,7 @@ class TestAscendQuantConfig(TestBase):
                          'is_layer_skipped_ascend',
                          return_value=True):
            method = self.ascend_config.get_quant_method(linear_layer, ".attn")
-            self.assertIsInstance(method, AscendUnquantizedLinearMethod)
+            self.assertIsInstance(method, UnquantizedLinearMethod)

        # Test quantized layer
        with patch.object(self.ascend_config, 'is_layer_skipped_ascend', return_value=False), \