[CI] Upgrade vLLM version (#3139)
Upgrade vLLM version to the newest commit. - Fix the break change introduced by969b4da3a6- Add a patch to quick fix torhcairde94289a98- fix the ut error introduced byde94289a98Close: https://github.com/vllm-project/vllm-ascend/issues/3138 - vLLM version: v0.10.2 - vLLM main:f225ea7dd9--------- Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> Signed-off-by: MengqingCao <cmq0113@163.com> Co-authored-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
@@ -28,6 +28,20 @@ from vllm_ascend.worker.worker_v1 import NPUWorker
|
||||
class NPUTorchairWorker(NPUWorker):
|
||||
"""Torchair worker bases on NPUWorker. Only torchair specified code should be added in this class."""
|
||||
|
||||
def __init__(self,
|
||||
vllm_config,
|
||||
local_rank,
|
||||
rank,
|
||||
distributed_init_method,
|
||||
is_driver_worker=False,
|
||||
**kwargs):
|
||||
super().__init__(vllm_config, local_rank, rank,
|
||||
distributed_init_method, is_driver_worker, **kwargs)
|
||||
from vllm.model_executor.layers.linear import \
|
||||
WEIGHT_LOADER_V2_SUPPORTED
|
||||
if "UnquantizedLinearMethod" in WEIGHT_LOADER_V2_SUPPORTED:
|
||||
WEIGHT_LOADER_V2_SUPPORTED.remove("UnquantizedLinearMethod")
|
||||
|
||||
def determine_available_memory(self) -> int:
|
||||
"""Override determine_available_memory to use cached torchair kv_cache_bytes."""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user