[CI] Upgrade vLLM version (#3139)

Upgrade vLLM version to the newest commit. - Fix the break change introduced by 969b4da3a6 - Add a patch to quick fix torhcair de94289a98 - fix the ut error introduced by de94289a98 Close: https://github.com/vllm-project/vllm-ascend/issues/3138 - vLLM version: v0.10.2 - vLLM main: f225ea7dd9 --------- Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> Signed-off-by: MengqingCao <cmq0113@163.com> Co-authored-by: MengqingCao <cmq0113@163.com>
2025-09-25 07:36:51 +08:00
parent 464270e4ca
commit a055183821
9 changed files with 105 additions and 15 deletions
--- a/vllm_ascend/torchair/torchair_worker.py
+++ b/vllm_ascend/torchair/torchair_worker.py
@@ -28,6 +28,20 @@ from vllm_ascend.worker.worker_v1 import NPUWorker
 class NPUTorchairWorker(NPUWorker):
    """Torchair worker bases on NPUWorker. Only torchair specified code should be added in this class."""

+    def __init__(self,
+                 vllm_config,
+                 local_rank,
+                 rank,
+                 distributed_init_method,
+                 is_driver_worker=False,
+                 **kwargs):
+        super().__init__(vllm_config, local_rank, rank,
+                         distributed_init_method, is_driver_worker, **kwargs)
+        from vllm.model_executor.layers.linear import \
+            WEIGHT_LOADER_V2_SUPPORTED
+        if "UnquantizedLinearMethod" in WEIGHT_LOADER_V2_SUPPORTED:
+            WEIGHT_LOADER_V2_SUPPORTED.remove("UnquantizedLinearMethod")
+
    def determine_available_memory(self) -> int:
        """Override determine_available_memory to use cached torchair kv_cache_bytes."""