Support v0.10.1 (#2584)
### What this PR does / why we need it?
This patch also supports v0.10.1
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
- CI passed
- test 0.10.1: https://github.com/vllm-project/vllm-ascend/pull/2583
- vLLM version: v0.10.1.1
- vLLM main:
321938e9ac
Signed-off-by: Yikun Jiang <yikunkero@gmail.com>
This commit is contained in:
@@ -21,7 +21,7 @@ from tests.ut.base import TestBase
|
||||
from vllm_ascend.core.scheduler import AscendScheduler
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
if not vllm_version_is("0.10.1.1"):
|
||||
if not (vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1")):
|
||||
from vllm.v1.outputs import DraftTokenIds
|
||||
else:
|
||||
DraftTokenIds = None
|
||||
@@ -78,7 +78,7 @@ def make_output(scheduler):
|
||||
}
|
||||
sampled_token_ids = [[1000]] * len(scheduler.running)
|
||||
logprobs = None
|
||||
if vllm_version_is("0.10.1.1"):
|
||||
if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
|
||||
modelrunner_output = ModelRunnerOutput(
|
||||
req_ids=req_ids,
|
||||
req_id_to_index=req_id_to_index,
|
||||
@@ -297,7 +297,7 @@ class TestAscendScheduler(TestBase):
|
||||
scheduler.running.append(req)
|
||||
req.status = RequestStatus.RUNNING
|
||||
|
||||
if vllm_version_is("0.10.1.1"):
|
||||
if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
|
||||
scheduler_output = SchedulerOutput(
|
||||
scheduled_new_reqs=[],
|
||||
scheduled_cached_reqs=[],
|
||||
@@ -384,7 +384,7 @@ class TestAscendScheduler(TestBase):
|
||||
scheduler.running.append(req)
|
||||
req.status = RequestStatus.RUNNING
|
||||
|
||||
if vllm_version_is("0.10.1.1"):
|
||||
if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
|
||||
scheduler_output = SchedulerOutput(
|
||||
scheduled_new_reqs=[],
|
||||
scheduled_cached_reqs=[],
|
||||
@@ -468,7 +468,7 @@ class TestAscendScheduler(TestBase):
|
||||
scheduler.running.append(req)
|
||||
req.status = RequestStatus.RUNNING
|
||||
|
||||
if vllm_version_is("0.10.1.1"):
|
||||
if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
|
||||
scheduler_output = SchedulerOutput(
|
||||
scheduled_new_reqs=[],
|
||||
scheduled_cached_reqs=[],
|
||||
@@ -549,7 +549,7 @@ class TestAscendScheduler(TestBase):
|
||||
scheduler.requests[requests[0].request_id] = requests[0]
|
||||
scheduler.running.append(requests[0])
|
||||
|
||||
if vllm_version_is("0.10.1.1"):
|
||||
if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
|
||||
scheduler_output = SchedulerOutput(
|
||||
scheduled_new_reqs=[],
|
||||
scheduled_cached_reqs=[],
|
||||
@@ -645,7 +645,7 @@ class TestAscendScheduler(TestBase):
|
||||
512)
|
||||
|
||||
# Model output of the first request.
|
||||
if vllm_version_is("0.10.1.1"):
|
||||
if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
|
||||
model_runner_output = ModelRunnerOutput(
|
||||
req_ids=[requests[0].request_id],
|
||||
req_id_to_index={requests[0].request_id: 0},
|
||||
@@ -671,7 +671,7 @@ class TestAscendScheduler(TestBase):
|
||||
# request is still running.
|
||||
scheduler.schedule()
|
||||
# Model output of the second request.
|
||||
if vllm_version_is("0.10.1.1"):
|
||||
if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
|
||||
model_runner_output = ModelRunnerOutput(
|
||||
req_ids=[requests[1].request_id],
|
||||
req_id_to_index={requests[1].request_id: 0},
|
||||
@@ -739,7 +739,7 @@ class TestAscendScheduler(TestBase):
|
||||
req_id = requests[i].request_id
|
||||
self.assertEqual(output.num_scheduled_tokens[req_id], 1)
|
||||
self.assertNotIn(req_id, output.scheduled_spec_decode_tokens)
|
||||
if vllm_version_is("0.10.1.1"):
|
||||
if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
|
||||
model_runner_output = ModelRunnerOutput(
|
||||
req_ids=req_ids,
|
||||
req_id_to_index=req_to_index,
|
||||
@@ -760,7 +760,7 @@ class TestAscendScheduler(TestBase):
|
||||
|
||||
engine_core_outputs = scheduler.update_from_output(
|
||||
output, model_runner_output)
|
||||
if not vllm_version_is("0.10.1.1"):
|
||||
if not (vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1")):
|
||||
scheduler.update_draft_token_ids(draft_token_ids)
|
||||
|
||||
for i in range(len(requests)):
|
||||
@@ -797,7 +797,7 @@ class TestAscendScheduler(TestBase):
|
||||
else:
|
||||
self.assertNotIn(req_id,
|
||||
output.scheduled_spec_decode_tokens)
|
||||
if vllm_version_is("0.10.1.1"):
|
||||
if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
|
||||
model_runner_output = ModelRunnerOutput(
|
||||
req_ids=req_ids,
|
||||
req_id_to_index=req_to_index,
|
||||
|
||||
@@ -200,7 +200,7 @@ def create_model_runner_output(
|
||||
kv_connector_output = KVConnectorOutput(finished_sending=finished_sending,
|
||||
finished_recving=finished_recving)
|
||||
extra_args = {"kv_connector_output": kv_connector_output}
|
||||
if vllm_version_is("0.10.1.1"):
|
||||
if vllm_version_is("0.10.1.1") or vllm_version_is("0.10.1"):
|
||||
model_runner_output = ModelRunnerOutput(
|
||||
req_ids=req_ids,
|
||||
req_id_to_index=req_id_to_index,
|
||||
|
||||
Reference in New Issue
Block a user