vllm-ascend support chunked prefill (#1172)

### What this PR does / why we need it? vllm-ascend support chunked prefill for MLA --------- Signed-off-by: fems14 <1804143737@qq.com>
2025-06-14 22:31:16 +08:00
parent a3b5af8307
commit ab5d110fcc
5 changed files with 303 additions and 20 deletions
--- a/vllm_ascend/ascend_config.py
+++ b/vllm_ascend/ascend_config.py
@@ -39,6 +39,8 @@ class AscendConfig:
        self.expert_tensor_parallel_size = int(
            additional_config.get("expert_tensor_parallel_size", 0))
        self.expert_map_path = additional_config.get("expert_map_path", None)
+        self.chunked_prefill_for_mla = additional_config.get(
+            "chunked_prefill_for_mla", False)


 class TorchairGraphConfig: