[Feature]: Support 310P device run qwen2.5/3 dense and qwen2.5vl models (#5776)

### What this PR does / why we need it?
Add basic 310p support. Only dense models work with eager mode now.

- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef

---------

Signed-off-by: Tflowers-0129 <2906339855@qq.com>
Signed-off-by: Shaoxu Cheng <2906339855@qq.com>
This commit is contained in:
Shaoxu Cheng
2026-01-17 11:49:18 +08:00
committed by GitHub
parent 7feb74590b
commit 1ffca8673f
17 changed files with 682 additions and 23 deletions

View File

@@ -74,6 +74,10 @@ _GRAPH_PRINT_STREAM_LOCK = Lock()
_HAS_ROPE = None
def is_310p():
return get_ascend_device_type() == AscendDeviceType._310P
def _print_callback_on_stream(*args):
"""Callback function to print arguments on the dedicated print stream."""
global _GRAPH_PRINT_STREAM
@@ -713,6 +717,22 @@ def register_ascend_customop(vllm_config: VllmConfig | None = None):
"ApplyRotaryEmb": AscendApplyRotaryEmb,
}
# 310P: override selected ops with 310P implementations (keep minimal changes outside _310p)
if is_310p():
from vllm_ascend._310p.ops.activation import AscendSiluAndMul310
from vllm_ascend._310p.ops.mm_encoder_attention import AscendMMEncoderAttention310
from vllm_ascend._310p.ops.rotary_embedding import (
AscendMRotaryEmbedding310,
)
REGISTERED_ASCEND_OPS.update(
{
"SiluAndMul": AscendSiluAndMul310,
"MMEncoderAttention": AscendMMEncoderAttention310,
"MRotaryEmbedding": AscendMRotaryEmbedding310,
}
)
for name, op_cls in REGISTERED_ASCEND_OPS.items():
CustomOp.register_oot(_decorated_op_cls=op_cls, name=name)