[Quickfix] update CachedRequestState as NewRequestData changed (#2367)

### What this PR does / why we need it? 1. update `CachedRequestState` as `NewRequestData` changed in https://github.com/vllm-project/vllm/pull/22570 2. drop maintenance of vllm v0.10.0 in the branch main ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with existing test. - vLLM version: v0.10.0 - vLLM main: 92ff41abea --------- Signed-off-by: MengqingCao <cmq0113@163.com>
2025-08-15 07:35:27 +08:00
parent 2ad7e1251e
commit 61866b8ac6
18 changed files with 77 additions and 285 deletions
--- a/vllm_ascend/worker/worker_v1.py
+++ b/vllm_ascend/worker/worker_v1.py
@@ -34,6 +34,7 @@ from vllm.distributed.parallel_state import get_pp_group, get_tp_group
 from vllm.logger import logger
 from vllm.lora.request import LoRARequest
 from vllm.sequence import IntermediateTensors
+from vllm.tasks import SupportedTask
 from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, GiB_bytes
 from vllm.v1.core.sched.output import SchedulerOutput
 from vllm.v1.kv_cache_interface import KVCacheConfig, KVCacheSpec
@@ -45,12 +46,9 @@ from vllm_ascend.device_allocator.camem import CaMemAllocator
 from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel
 from vllm_ascend.platform import NPUPlatform
 from vllm_ascend.utils import (init_ascend_soc_version, sleep_mode_enabled,
-                               try_register_lib, vllm_version_is)
+                               try_register_lib)
 from vllm_ascend.worker.model_runner_v1 import NPUModelRunner

-if not vllm_version_is("0.10.0"):
-    from vllm.tasks import SupportedTask
-

 class NPUWorker(WorkerBase):

@@ -209,26 +207,15 @@ class NPUWorker(WorkerBase):
            if not has_kv_transfer_group():
                return None

-            is_legacy = vllm_version_is("0.10.0")
-
-            if is_legacy:
-                finished_sending = output.finished_sending
-                finished_recving = output.finished_recving
-            else:
-                kv_connector_output = output.kv_connector_output
-                finished_sending = kv_connector_output.finished_sending
-                finished_recving = kv_connector_output.finished_recving
+            kv_connector_output = output.kv_connector_output
+            finished_sending = kv_connector_output.finished_sending
+            finished_recving = kv_connector_output.finished_recving

            if not finished_sending and not finished_recving:
                return EMPTY_MODEL_RUNNER_OUTPUT

            new_output = copy.copy(EMPTY_MODEL_RUNNER_OUTPUT)
-
-            if is_legacy:
-                new_output.finished_sending = finished_sending
-                new_output.finished_recving = finished_recving
-            else:
-                new_output.kv_connector_output = kv_connector_output
+            new_output.kv_connector_output = kv_connector_output
            return new_output

        assert isinstance(output, ModelRunnerOutput)