Upgrade to new vllm commit (#3719)

### What this PR does / why we need it? Upgrade to new vllm commit: c9461e05a4 - Fix many imports, caused by https://github.com/vllm-project/vllm/pull/26908 - Fix import ```sha256```, caused by https://github.com/vllm-project/vllm/pull/27169 - Remove ```SchedulerConfig.send_delta_data```, caused by https://github.com/vllm-project/vllm/pull/27142 - Fix ```FusedMoE``` because of dual stream execution, caused by https://github.com/vllm-project/vllm/pull/26440 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with new added/existing test. - vLLM version: v0.11.0rc3 - vLLM main: 17c540a993 --------- Signed-off-by: MengqingCao <cmq0113@163.com> Signed-off-by: Icey <1790571317@qq.com> Co-authored-by: MengqingCao <cmq0113@163.com>
2025-10-25 15:36:32 +08:00
parent 226f832c0b
commit d9cdc65854
37 changed files with 229 additions and 71 deletions
--- a/vllm_ascend/worker/worker_v1.py
+++ b/vllm_ascend/worker/worker_v1.py
@@ -35,7 +35,6 @@ from vllm.logger import logger
 from vllm.lora.request import LoRARequest
 from vllm.sequence import IntermediateTensors
 from vllm.tasks import SupportedTask
-from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, GiB_bytes
 from vllm.v1.core.sched.output import SchedulerOutput
 from vllm.v1.kv_cache_interface import KVCacheConfig, KVCacheSpec
 from vllm.v1.outputs import (EMPTY_MODEL_RUNNER_OUTPUT, AsyncModelRunnerOutput,
@@ -51,7 +50,7 @@ from vllm_ascend.platform import NPUPlatform
 from vllm_ascend.utils import (init_ascend_soc_version,
                               prefill_context_parallel_enable,
                               register_ascend_customop, sleep_mode_enabled,
-                               try_register_lib)
+                               try_register_lib, vllm_version_is)
 from vllm_ascend.worker.model_runner_v1 import NPUModelRunner

 torch._dynamo.trace_rules.clear_lru_cache()  # noqa: E402
@@ -66,6 +65,12 @@ torch_non_c_binding_in_graph_functions_npu[
 torch._dynamo.trace_rules.torch_name_rule_map.append(
    torch_non_c_binding_in_graph_functions_npu)  # noqa: E402

+if vllm_version_is("0.11.0"):
+    from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, GiB_bytes
+else:
+    from vllm.utils.mem_constants import GiB_bytes
+    from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
+

 class NPUWorker(WorkerBase):