[Feature]: Support 310P device run qwen2.5/3 dense and qwen2.5vl models (#5776)

### What this PR does / why we need it? Add basic 310p support. Only dense models work with eager mode now. - vLLM version: v0.13.0 - vLLM main: 2f4e6548ef --------- Signed-off-by: Tflowers-0129 <2906339855@qq.com> Signed-off-by: Shaoxu Cheng <2906339855@qq.com>
2026-01-17 11:49:18 +08:00
parent 7feb74590b
commit 1ffca8673f
17 changed files with 682 additions and 23 deletions
--- a/vllm_ascend/utils.py
+++ b/vllm_ascend/utils.py
@@ -74,6 +74,10 @@ _GRAPH_PRINT_STREAM_LOCK = Lock()
 _HAS_ROPE = None


+def is_310p():
+    return get_ascend_device_type() == AscendDeviceType._310P
+
+
 def _print_callback_on_stream(*args):
    """Callback function to print arguments on the dedicated print stream."""
    global _GRAPH_PRINT_STREAM
@@ -713,6 +717,22 @@ def register_ascend_customop(vllm_config: VllmConfig | None = None):
        "ApplyRotaryEmb": AscendApplyRotaryEmb,
    }

+    # 310P: override selected ops with 310P implementations (keep minimal changes outside _310p)
+    if is_310p():
+        from vllm_ascend._310p.ops.activation import AscendSiluAndMul310
+        from vllm_ascend._310p.ops.mm_encoder_attention import AscendMMEncoderAttention310
+        from vllm_ascend._310p.ops.rotary_embedding import (
+            AscendMRotaryEmbedding310,
+        )
+
+        REGISTERED_ASCEND_OPS.update(
+            {
+                "SiluAndMul": AscendSiluAndMul310,
+                "MMEncoderAttention": AscendMMEncoderAttention310,
+                "MRotaryEmbedding": AscendMRotaryEmbedding310,
+            }
+        )
+
    for name, op_cls in REGISTERED_ASCEND_OPS.items():
        CustomOp.register_oot(_decorated_op_cls=op_cls, name=name)