Drop vLLM 0.13.0 support (#6069)

### What this PR does / why we need it? Drop vLLM 0.13.0 support, upgrade to 0.14.0 - vLLM version: v0.13.0 - vLLM main: d68209402d --------- Signed-off-by: hfadzxy <starmoon_zhang@163.com>
2026-01-23 09:45:08 +08:00
parent 27a513b672
commit 819a4459ce
39 changed files with 86 additions and 272 deletions
--- a/vllm_ascend/worker/worker.py
+++ b/vllm_ascend/worker/worker.py
@@ -58,16 +58,13 @@ from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel
 from vllm_ascend.ops.triton.triton_utils import init_device_properties_triton
 from vllm_ascend.utils import (AscendDeviceType, check_ascend_device_type,
                               enable_sp, get_ascend_device_type,
-                               register_ascend_customop, vllm_version_is)
+                               register_ascend_customop)
 from vllm_ascend.worker.model_runner_v1 import NPUModelRunner

 torch._dynamo.trace_rules.clear_lru_cache()  # noqa: E402
 from torch._dynamo.variables import TorchInGraphFunctionVariable  # noqa: E402

-if vllm_version_is("0.13.0"):
-    from vllm.model_executor.utils import set_random_seed
-else:
-    from vllm.utils.torch_utils import set_random_seed
+from vllm.utils.torch_utils import set_random_seed

 torch_non_c_binding_in_graph_functions_npu = dict.fromkeys(
    ["torch.npu.current_stream"],
@@ -121,13 +118,6 @@ class NPUWorker(WorkerBase):
            self.cache_dtype = STR_DTYPE_TO_TORCH_DTYPE[
                self.cache_config.cache_dtype]

-        if vllm_version_is('0.13.0'):
-            if self.model_config.trust_remote_code:
-                # note: lazy import to avoid importing torch before initializing
-                from vllm.utils.import_utils import init_cached_hf_modules
-
-                init_cached_hf_modules()
-
        self.profiler = self._init_profiler()
        if vllm_config.model_config and vllm_config.model_config.enable_sleep_mode:
            # Buffers saved before sleep