[Quickfix] update CachedRequestState as NewRequestData changed (#2367)

### What this PR does / why we need it?
1. update `CachedRequestState` as `NewRequestData` changed in
https://github.com/vllm-project/vllm/pull/22570
2. drop maintenance of vllm v0.10.0 in the branch main

### Does this PR introduce _any_ user-facing change?
N/A

### How was this patch tested?
CI passed with existing test.


- vLLM version: v0.10.0
- vLLM main:
92ff41abea

---------

Signed-off-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
Mengqing Cao
2025-08-15 07:35:27 +08:00
committed by GitHub
parent 2ad7e1251e
commit 61866b8ac6
18 changed files with 77 additions and 285 deletions

View File

@@ -34,6 +34,7 @@ from vllm.distributed.parallel_state import get_pp_group, get_tp_group
from vllm.logger import logger
from vllm.lora.request import LoRARequest
from vllm.sequence import IntermediateTensors
from vllm.tasks import SupportedTask
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, GiB_bytes
from vllm.v1.core.sched.output import SchedulerOutput
from vllm.v1.kv_cache_interface import KVCacheConfig, KVCacheSpec
@@ -45,12 +46,9 @@ from vllm_ascend.device_allocator.camem import CaMemAllocator
from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel
from vllm_ascend.platform import NPUPlatform
from vllm_ascend.utils import (init_ascend_soc_version, sleep_mode_enabled,
try_register_lib, vllm_version_is)
try_register_lib)
from vllm_ascend.worker.model_runner_v1 import NPUModelRunner
if not vllm_version_is("0.10.0"):
from vllm.tasks import SupportedTask
class NPUWorker(WorkerBase):
@@ -209,26 +207,15 @@ class NPUWorker(WorkerBase):
if not has_kv_transfer_group():
return None
is_legacy = vllm_version_is("0.10.0")
if is_legacy:
finished_sending = output.finished_sending
finished_recving = output.finished_recving
else:
kv_connector_output = output.kv_connector_output
finished_sending = kv_connector_output.finished_sending
finished_recving = kv_connector_output.finished_recving
kv_connector_output = output.kv_connector_output
finished_sending = kv_connector_output.finished_sending
finished_recving = kv_connector_output.finished_recving
if not finished_sending and not finished_recving:
return EMPTY_MODEL_RUNNER_OUTPUT
new_output = copy.copy(EMPTY_MODEL_RUNNER_OUTPUT)
if is_legacy:
new_output.finished_sending = finished_sending
new_output.finished_recving = finished_recving
else:
new_output.kv_connector_output = kv_connector_output
new_output.kv_connector_output = kv_connector_output
return new_output
assert isinstance(output, ModelRunnerOutput)