[CORE]initial support for torchair with non-mla backend (#1506)

### What this PR does / why we need it? This PR supports torchair graph mode with non-mla backend on both 800IA2 and 300I Duo platforms. The main change is to add `attention_v1_torchair.py` to support specific attention related operations that are required by torchair. ### Does this PR introduce _any_ user-facing change? Before this PR, vLLM-Ascend only allows deepseek to use torchair. Now we can also use it with pangu. Besides, we add a support model list to control which type of models that can use torchair. ### How was this patch tested? We have test it with PanguProMoE on both 800IA2 and 300I Duo platforms, and model generates answer normally. --------- Signed-off-by: angazenn <zengyanjia@huawei.com> Signed-off-by: tianyitang <tangtianyi4@huawei.com> Co-authored-by: angazenn <zengyanjia@huawei.com> Co-authored-by: tianyitang <tangtianyi4@huawei.com>
2025-07-03 22:21:42 +08:00
parent 9fbd8017c0
commit a5f33590d3
19 changed files with 1130 additions and 84 deletions
--- a/vllm_ascend/ascend_config.py
+++ b/vllm_ascend/ascend_config.py
@@ -18,6 +18,15 @@ from typing import Optional
 import vllm.envs as envs
 from vllm.logger import logger

+TORCHAIR_MODEL_LIST = ["deepseek", "pangu"]
+
+
+def check_torchair_supported(model_type: str):
+    for supported_model in TORCHAIR_MODEL_LIST:
+        if supported_model in model_type.lower():
+            return True
+    return False
+

 class AscendConfig:
    """
@@ -141,10 +150,10 @@ def check_ascend_config(vllm_config, enforce_eager):
                # torchair_graph is supported for deepseek model only currently.
                if vllm_config.model_config:
                    model_type = vllm_config.model_config.hf_config.model_type
-                    if "deepseek" not in model_type:
+                    if not check_torchair_supported(model_type):
                        raise NotImplementedError(
-                            "Torchair graph mode only works with deepseek model."
-                        )
+                            "Torchair graph mode only works with following model types:"
+                            f"{TORCHAIR_MODEL_LIST}.")
            # aclgraph case
            else:
                # aclgraph doesn't work with deepseek model and only qwen model is well tested.