[CI] Upgrade vLLM version (#3139)

Upgrade vLLM version to the newest commit.
- Fix the break change introduced by
969b4da3a6
- Add a patch to quick fix torhcair
de94289a98
- fix the ut error introduced by
de94289a98

Close: https://github.com/vllm-project/vllm-ascend/issues/3138


- vLLM version: v0.10.2
- vLLM main:
f225ea7dd9

---------

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
Signed-off-by: MengqingCao <cmq0113@163.com>
Co-authored-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
wangxiyuan
2025-09-25 07:36:51 +08:00
committed by GitHub
parent 464270e4ca
commit a055183821
9 changed files with 105 additions and 15 deletions

View File

@@ -28,6 +28,20 @@ from vllm_ascend.worker.worker_v1 import NPUWorker
class NPUTorchairWorker(NPUWorker):
"""Torchair worker bases on NPUWorker. Only torchair specified code should be added in this class."""
def __init__(self,
vllm_config,
local_rank,
rank,
distributed_init_method,
is_driver_worker=False,
**kwargs):
super().__init__(vllm_config, local_rank, rank,
distributed_init_method, is_driver_worker, **kwargs)
from vllm.model_executor.layers.linear import \
WEIGHT_LOADER_V2_SUPPORTED
if "UnquantizedLinearMethod" in WEIGHT_LOADER_V2_SUPPORTED:
WEIGHT_LOADER_V2_SUPPORTED.remove("UnquantizedLinearMethod")
def determine_available_memory(self) -> int:
"""Override determine_available_memory to use cached torchair kv_cache_bytes."""