vllm-ascend support Ascend950 with Qwen dense model. (#4228)

### What this PR does / why we need it?
vllm-ascend support Ascend950 with Qwen dense model
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?


- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c

---------

Signed-off-by: wangyao <iwangyao@outlook.com>
Co-authored-by: weijinqian0 <1184188277@qq.com>
This commit is contained in:
wangyao-i
2025-12-12 15:50:57 +08:00
committed by GitHub
parent 716c4dacfe
commit 0983c5510a
4 changed files with 59 additions and 12 deletions

View File

@@ -52,8 +52,9 @@ from vllm_ascend.device_allocator.camem import CaMemAllocator
from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel
from vllm_ascend.ops.triton.triton_utils import init_device_properties_triton
from vllm_ascend.platform import NPUPlatform
from vllm_ascend.utils import (check_ascend_device_type, enable_sp,
is_enable_nz, register_ascend_customop)
from vllm_ascend.utils import (AscendDeviceType, check_ascend_device_type,
enable_sp, get_ascend_device_type, is_enable_nz,
register_ascend_customop)
from vllm_ascend.worker.model_runner_v1 import NPUModelRunner
torch._dynamo.trace_rules.clear_lru_cache() # noqa: E402
@@ -87,7 +88,8 @@ class NPUWorker(WorkerBase):
# Register ops when worker init.
from vllm_ascend import ops
ops.register_dummy_fusion_op()
_register_atb_extensions()
if get_ascend_device_type() != AscendDeviceType._910_95:
_register_atb_extensions()
register_ascend_customop(vllm_config)
# init ascend config and soc version
init_ascend_config(vllm_config)
@@ -356,7 +358,8 @@ class NPUWorker(WorkerBase):
self.model_runner.capture_model()
# Call ATB matmul to warm up; otherwise, the first operation (ReshapeAndCache)
# may cause performance degradation at runtime.
self._warm_up_atb()
if get_ascend_device_type() != AscendDeviceType._910_95:
self._warm_up_atb()
# Reset the seed to ensure that the random state is not affected by
# the model initialization and profiling.
NPUPlatform.seed_everything(self.model_config.seed)