vllm-ascend support Ascend950 with Qwen dense model. (#4228)
### What this PR does / why we need it?
vllm-ascend support Ascend950 with Qwen dense model
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: wangyao <iwangyao@outlook.com>
Co-authored-by: weijinqian0 <1184188277@qq.com>
This commit is contained in:
@@ -52,8 +52,9 @@ from vllm_ascend.device_allocator.camem import CaMemAllocator
|
||||
from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel
|
||||
from vllm_ascend.ops.triton.triton_utils import init_device_properties_triton
|
||||
from vllm_ascend.platform import NPUPlatform
|
||||
from vllm_ascend.utils import (check_ascend_device_type, enable_sp,
|
||||
is_enable_nz, register_ascend_customop)
|
||||
from vllm_ascend.utils import (AscendDeviceType, check_ascend_device_type,
|
||||
enable_sp, get_ascend_device_type, is_enable_nz,
|
||||
register_ascend_customop)
|
||||
from vllm_ascend.worker.model_runner_v1 import NPUModelRunner
|
||||
|
||||
torch._dynamo.trace_rules.clear_lru_cache() # noqa: E402
|
||||
@@ -87,7 +88,8 @@ class NPUWorker(WorkerBase):
|
||||
# Register ops when worker init.
|
||||
from vllm_ascend import ops
|
||||
ops.register_dummy_fusion_op()
|
||||
_register_atb_extensions()
|
||||
if get_ascend_device_type() != AscendDeviceType._910_95:
|
||||
_register_atb_extensions()
|
||||
register_ascend_customop(vllm_config)
|
||||
# init ascend config and soc version
|
||||
init_ascend_config(vllm_config)
|
||||
@@ -356,7 +358,8 @@ class NPUWorker(WorkerBase):
|
||||
self.model_runner.capture_model()
|
||||
# Call ATB matmul to warm up; otherwise, the first operation (ReshapeAndCache)
|
||||
# may cause performance degradation at runtime.
|
||||
self._warm_up_atb()
|
||||
if get_ascend_device_type() != AscendDeviceType._910_95:
|
||||
self._warm_up_atb()
|
||||
# Reset the seed to ensure that the random state is not affected by
|
||||
# the model initialization and profiling.
|
||||
NPUPlatform.seed_everything(self.model_config.seed)
|
||||
|
||||
Reference in New Issue
Block a user