Upgrade to vllm 0.17.0 corex v4.1 overlay

This commit is contained in:
2026-04-29 19:38:22 +08:00
parent 8fac6062e4
commit 938d0854a5
430 changed files with 35969 additions and 14511 deletions

View File

@@ -5,8 +5,6 @@ from dataclasses import dataclass
from functools import cached_property
from typing import TYPE_CHECKING
from vllm._bc_linter import bc_linter_include
if TYPE_CHECKING:
import numpy as np
import numpy.typing as npt
@@ -29,7 +27,6 @@ else:
Request = object
@bc_linter_include
@dataclass
class NewRequestData:
req_id: str
@@ -109,7 +106,6 @@ class NewRequestData:
)
@bc_linter_include
@dataclass
class CachedRequestData:
req_ids: list[str]
@@ -179,7 +175,6 @@ class CachedRequestData:
)
@bc_linter_include
@dataclass
class SchedulerOutput:
# list of the requests that are scheduled for the first time.
@@ -217,6 +212,9 @@ class SchedulerOutput:
# freed from the encoder cache.
free_encoder_mm_hashes: list[str]
# Request IDs that are resumed from preemption in this step.
scheduled_resumed_reqs: list[str] | None = None
# Request IDs that are preempted in this step.
# Only used for v2 model runner.
preempted_req_ids: set[str] | None = None
@@ -238,6 +236,11 @@ class SchedulerOutput:
# EC Cache Connector metadata
ec_connector_metadata: ECConnectorMetadata | None = None
# Block IDs freshly allocated from the pool during this scheduling step.
# The worker zeros the corresponding GPU memory before the blocks are used,
# preventing stale NaN/data from corrupting attention or SSM computation.
new_block_ids_to_zero: list[int] | None = None
@classmethod
def make_empty(cls) -> "SchedulerOutput":
return cls(