[Feature]: Support 310P device run qwen2.5/3 dense and qwen2.5vl models (#5776)
### What this PR does / why we need it?
Add basic 310p support. Only dense models work with eager mode now.
- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef
---------
Signed-off-by: Tflowers-0129 <2906339855@qq.com>
Signed-off-by: Shaoxu Cheng <2906339855@qq.com>
This commit is contained in:
@@ -74,6 +74,10 @@ _GRAPH_PRINT_STREAM_LOCK = Lock()
|
||||
_HAS_ROPE = None
|
||||
|
||||
|
||||
def is_310p():
|
||||
return get_ascend_device_type() == AscendDeviceType._310P
|
||||
|
||||
|
||||
def _print_callback_on_stream(*args):
|
||||
"""Callback function to print arguments on the dedicated print stream."""
|
||||
global _GRAPH_PRINT_STREAM
|
||||
@@ -713,6 +717,22 @@ def register_ascend_customop(vllm_config: VllmConfig | None = None):
|
||||
"ApplyRotaryEmb": AscendApplyRotaryEmb,
|
||||
}
|
||||
|
||||
# 310P: override selected ops with 310P implementations (keep minimal changes outside _310p)
|
||||
if is_310p():
|
||||
from vllm_ascend._310p.ops.activation import AscendSiluAndMul310
|
||||
from vllm_ascend._310p.ops.mm_encoder_attention import AscendMMEncoderAttention310
|
||||
from vllm_ascend._310p.ops.rotary_embedding import (
|
||||
AscendMRotaryEmbedding310,
|
||||
)
|
||||
|
||||
REGISTERED_ASCEND_OPS.update(
|
||||
{
|
||||
"SiluAndMul": AscendSiluAndMul310,
|
||||
"MMEncoderAttention": AscendMMEncoderAttention310,
|
||||
"MRotaryEmbedding": AscendMRotaryEmbedding310,
|
||||
}
|
||||
)
|
||||
|
||||
for name, op_cls in REGISTERED_ASCEND_OPS.items():
|
||||
CustomOp.register_oot(_decorated_op_cls=op_cls, name=name)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user