[CI] Upgrade vllm to newest commit (#3182)

### What this PR does / why we need it? Upgrade vLLM to newest commit - Fix the aclgraph doesn't work problem, caused by 24fab45d96 - Fix PoolerOutput import error, caused by 755ed7b05b - Fix the aclgraph weight load error to keep the same with torchair fix. 4492e3a554 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? All test should pass - vLLM version: v0.10.2 - vLLM main: 52d0cb8458 --------- Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-09-26 06:18:15 +08:00
parent 0794f64a18
commit 2930e4a6bd
9 changed files with 49 additions and 53 deletions
--- a/vllm_ascend/torchair/torchair_worker.py
+++ b/vllm_ascend/torchair/torchair_worker.py
@@ -28,20 +28,6 @@ from vllm_ascend.worker.worker_v1 import NPUWorker
 class NPUTorchairWorker(NPUWorker):
    """Torchair worker bases on NPUWorker. Only torchair specified code should be added in this class."""

-    def __init__(self,
-                 vllm_config,
-                 local_rank,
-                 rank,
-                 distributed_init_method,
-                 is_driver_worker=False,
-                 **kwargs):
-        super().__init__(vllm_config, local_rank, rank,
-                         distributed_init_method, is_driver_worker, **kwargs)
-        from vllm.model_executor.layers.linear import \
-            WEIGHT_LOADER_V2_SUPPORTED
-        if "UnquantizedLinearMethod" in WEIGHT_LOADER_V2_SUPPORTED:
-            WEIGHT_LOADER_V2_SUPPORTED.remove("UnquantizedLinearMethod")
-
    def determine_available_memory(self) -> int:
        """Override determine_available_memory to use cached torchair kv_cache_bytes."""