[Quickfix] update CachedRequestState as NewRequestData changed (#2367)

### What this PR does / why we need it? 1. update `CachedRequestState` as `NewRequestData` changed in https://github.com/vllm-project/vllm/pull/22570 2. drop maintenance of vllm v0.10.0 in the branch main ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with existing test. - vLLM version: v0.10.0 - vLLM main: 92ff41abea --------- Signed-off-by: MengqingCao <cmq0113@163.com>
2025-08-15 07:35:27 +08:00
parent 2ad7e1251e
commit 61866b8ac6
18 changed files with 77 additions and 285 deletions
--- a/tests/ut/kv_connector/test_remote_decode_lifecycle.py
+++ b/tests/ut/kv_connector/test_remote_decode_lifecycle.py
@@ -25,7 +25,6 @@ from tests.ut.kv_connector.utils import (assert_scheduler_empty,
                                         create_model_runner_output,
                                         create_request, create_scheduler,
                                         create_vllm_config)
-from vllm_ascend.utils import vllm_version_is


 def test_basic_lifecycle():
@@ -103,13 +102,10 @@ def test_basic_lifecycle():

    # (3b): execute_model()
    model_runner_output = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
-    if vllm_version_is("0.10.0"):
-        model_runner_output.finished_sending = [request_id]
-    else:
-        from vllm.v1.worker.kv_connector_model_runner_mixin import \
-            KVConnectorOutput  # type: ignore  # noqa
-        model_runner_output.kv_connector_output = KVConnectorOutput(
-            finished_sending=[request_id])
+    from vllm.v1.worker.kv_connector_model_runner_mixin import \
+        KVConnectorOutput  # type: ignore  # noqa
+    model_runner_output.kv_connector_output = KVConnectorOutput(
+        finished_sending=[request_id])

    # (3c): update_from_output()
    scheduler.update_from_output(scheduler_output, model_runner_output)
@@ -164,13 +160,10 @@ def test_prefix_cache_lifecycle():
    scheduler_output = scheduler.schedule()
    scheduler.schedule()
    model_runner_output = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
-    if vllm_version_is("0.10.0"):
-        model_runner_output.finished_sending = [request_remote.request_id]
-    else:
-        from vllm.v1.worker.kv_connector_model_runner_mixin import \
-            KVConnectorOutput  # noqa
-        model_runner_output.kv_connector_output = KVConnectorOutput(
-            finished_sending=[request_remote.request_id])
+    from vllm.v1.worker.kv_connector_model_runner_mixin import \
+        KVConnectorOutput  # noqa
+    model_runner_output.kv_connector_output = KVConnectorOutput(
+        finished_sending=[request_remote.request_id])
    scheduler.update_from_output(scheduler_output, model_runner_output)
    _ = scheduler.schedule()
    assert_scheduler_empty(scheduler)