Support v0.10.1 (#2584)

### What this PR does / why we need it? This patch also supports v0.10.1 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - CI passed - test 0.10.1: https://github.com/vllm-project/vllm-ascend/pull/2583 - vLLM version: v0.10.1.1 - vLLM main: 321938e9ac Signed-off-by: Yikun Jiang <yikunkero@gmail.com>
2025-08-28 18:47:53 +08:00
parent 6c973361fc
commit 175f6bc445
8 changed files with 40 additions and 38 deletions
--- a/tests/ut/core/test_scheduler.py
+++ b/tests/ut/core/test_scheduler.py
@@ -21,7 +21,7 @@ from tests.ut.base import TestBase
 from vllm_ascend.core.scheduler import AscendScheduler
 from vllm_ascend.utils import vllm_version_is

-if not vllm_version_is("0.10.1.1"):
+if not (vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1")):
    from vllm.v1.outputs import DraftTokenIds
 else:
    DraftTokenIds = None
@@ -78,7 +78,7 @@ def make_output(scheduler):
    }
    sampled_token_ids = [[1000]] * len(scheduler.running)
    logprobs = None
-    if vllm_version_is("0.10.1.1"):
+    if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
        modelrunner_output = ModelRunnerOutput(
            req_ids=req_ids,
            req_id_to_index=req_id_to_index,
@@ -297,7 +297,7 @@ class TestAscendScheduler(TestBase):
            scheduler.running.append(req)
            req.status = RequestStatus.RUNNING

-        if vllm_version_is("0.10.1.1"):
+        if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
            scheduler_output = SchedulerOutput(
                scheduled_new_reqs=[],
                scheduled_cached_reqs=[],
@@ -384,7 +384,7 @@ class TestAscendScheduler(TestBase):
            scheduler.running.append(req)
            req.status = RequestStatus.RUNNING

-        if vllm_version_is("0.10.1.1"):
+        if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
            scheduler_output = SchedulerOutput(
                scheduled_new_reqs=[],
                scheduled_cached_reqs=[],
@@ -468,7 +468,7 @@ class TestAscendScheduler(TestBase):
            scheduler.running.append(req)
            req.status = RequestStatus.RUNNING

-        if vllm_version_is("0.10.1.1"):
+        if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
            scheduler_output = SchedulerOutput(
                scheduled_new_reqs=[],
                scheduled_cached_reqs=[],
@@ -549,7 +549,7 @@ class TestAscendScheduler(TestBase):
        scheduler.requests[requests[0].request_id] = requests[0]
        scheduler.running.append(requests[0])

-        if vllm_version_is("0.10.1.1"):
+        if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
            scheduler_output = SchedulerOutput(
                scheduled_new_reqs=[],
                scheduled_cached_reqs=[],
@@ -645,7 +645,7 @@ class TestAscendScheduler(TestBase):
                512)

            # Model output of the first request.
-            if vllm_version_is("0.10.1.1"):
+            if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
                model_runner_output = ModelRunnerOutput(
                    req_ids=[requests[0].request_id],
                    req_id_to_index={requests[0].request_id: 0},
@@ -671,7 +671,7 @@ class TestAscendScheduler(TestBase):
            # request is still running.
            scheduler.schedule()
            # Model output of the second request.
-            if vllm_version_is("0.10.1.1"):
+            if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
                model_runner_output = ModelRunnerOutput(
                    req_ids=[requests[1].request_id],
                    req_id_to_index={requests[1].request_id: 0},
@@ -739,7 +739,7 @@ class TestAscendScheduler(TestBase):
                req_id = requests[i].request_id
                self.assertEqual(output.num_scheduled_tokens[req_id], 1)
                self.assertNotIn(req_id, output.scheduled_spec_decode_tokens)
-            if vllm_version_is("0.10.1.1"):
+            if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
                model_runner_output = ModelRunnerOutput(
                    req_ids=req_ids,
                    req_id_to_index=req_to_index,
@@ -760,7 +760,7 @@ class TestAscendScheduler(TestBase):

            engine_core_outputs = scheduler.update_from_output(
                output, model_runner_output)
-            if not vllm_version_is("0.10.1.1"):
+            if not (vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1")):
                scheduler.update_draft_token_ids(draft_token_ids)

            for i in range(len(requests)):
@@ -797,7 +797,7 @@ class TestAscendScheduler(TestBase):
                else:
                    self.assertNotIn(req_id,
                                     output.scheduled_spec_decode_tokens)
-            if vllm_version_is("0.10.1.1"):
+            if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
                model_runner_output = ModelRunnerOutput(
                    req_ids=req_ids,
                    req_id_to_index=req_to_index,
--- a/tests/ut/kv_connector/utils.py
+++ b/tests/ut/kv_connector/utils.py
@@ -200,7 +200,7 @@ def create_model_runner_output(
    kv_connector_output = KVConnectorOutput(finished_sending=finished_sending,
                                            finished_recving=finished_recving)
    extra_args = {"kv_connector_output": kv_connector_output}
-    if vllm_version_is("0.10.1.1"):
+    if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
        model_runner_output = ModelRunnerOutput(
            req_ids=req_ids,
            req_id_to_index=req_id_to_index,