Revert "[Perf][1/N] w8a8c8 support in dsv3.2/glm5 (#7029)" (#7288)

### What this PR does / why we need it? This reverts commit 7ed9e9de69, which introduces an issue that the patch doesn't work with recompute scheduler enabled. - vLLM version: v0.17.0 - vLLM main: 4034c3d32e --------- Signed-off-by: MengqingCao <cmq0113@163.com>
2026-03-15 20:19:09 +08:00
parent 29f195a91c
commit 0c299f79b9
24 changed files with 79 additions and 4281 deletions
--- a/vllm_ascend/ascend_config.py
+++ b/vllm_ascend/ascend_config.py
@@ -134,12 +134,9 @@ class AscendConfig:
            bool(additional_config.get("enable_async_exponential", False)) and not vllm_is_batch_invariant()
        )

-        use_sparse = hasattr(vllm_config.model_config, "hf_text_config") and hasattr(
-            vllm_config.model_config.hf_text_config, "index_topk"
-        )
-
        self.enable_kv_nz = additional_config.get("enable_kv_nz", False)
        if self.enable_kv_nz:
+            use_sparse = hasattr(vllm_config.model_config.hf_text_config, "index_topk")
            if not vllm_config.model_config.is_deepseek_mla or use_sparse:
                raise RuntimeError("enable_kv_nz is only supported for mla currently.")
            if vllm_config.kv_transfer_config is None or not vllm_config.kv_transfer_config.is_kv_consumer:
@@ -147,17 +144,6 @@ class AscendConfig:
                    "enable_kv_nz is only supported in pd scenario and can only be used in D node."
                )

-        from vllm_ascend.utils import AscendDeviceType, get_ascend_device_type
-
-        # Disable Sparse C8 for A5
-        # A5 has not been fully validated for this path and may carry hidden risks.
-        # TODO(rjg-lyh): Enable A5 support after sufficient validation.
-        self.enable_sparse_c8 = (
-            additional_config.get("enable_sparse_c8", False)
-            and use_sparse
-            and get_ascend_device_type() != AscendDeviceType.A5
-        )
-
    def _construct_weight_prefetch_config(self, additional_config):
        weight_prefetch_config = additional_config.get("weight_prefetch_config", {})
        self.weight_prefetch_config = WeightPrefetchConfig(weight_prefetch_config)