[refactor] refactor deepseek-related files (#2849)

### What this PR does / why we need it? This PR deletes ~2K lines of code about deepseek modeling. It falls back CustomDeepseekV2 modules to original vllm implementations and adapts some modifications in vllm about deepseek and moe. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? E2E vllm serving with torchair graph mode and eager mode. - vLLM version: v0.10.2 - vLLM main: 759ef49b15 --------- Signed-off-by: linfeng-yuan <1102311262@qq.com> Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com> Co-authored-by: yiz-liu <136800916+yiz-liu@users.noreply.github.com> Co-authored-by: Yizhou Liu <liu_yizhou@outlook.com>
2025-09-16 14:13:07 +08:00
parent 18ca7861f6
commit 1c5900327b
18 changed files with 295 additions and 1899 deletions
--- a/vllm_ascend/platform.py
+++ b/vllm_ascend/platform.py
@@ -238,7 +238,7 @@ class NPUPlatform(Platform):
            compilation_config.level = CompilationLevel.NO_COMPILATION

        if parallel_config and parallel_config.worker_cls == "auto":
-            if ascend_config.torchair_graph_config.enabled:
+            if ascend_config.torchair_graph_config.enabled or ascend_config.enable_shared_expert_dp:
                parallel_config.worker_cls = "vllm_ascend.torchair.torchair_worker.NPUTorchairWorker"
            else:
                parallel_config.worker_cls = "vllm_ascend.worker.worker_v1.NPUWorker"
@@ -289,7 +289,12 @@ class NPUPlatform(Platform):
        if not use_v1:
            raise ValueError("vLLM Ascend does not support V0 engine.")

-        use_torchair = get_ascend_config().torchair_graph_config.enabled
+        ascend_config = get_ascend_config()
+
+        if use_mla and ascend_config.enable_shared_expert_dp:
+            return "vllm_ascend.torchair.torchair_mla.AscendMLATorchairBackend"
+
+        use_torchair = ascend_config.torchair_graph_config.enabled
        # choose attention backend based on use_mla and use_torchair
        backend_map = {
            (True, True):