Upgrade to vllm 0.17.0 corex v4.1 overlay
This commit is contained in:
@@ -5,8 +5,6 @@ from dataclasses import dataclass
|
||||
from functools import cached_property
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from vllm._bc_linter import bc_linter_include
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
@@ -29,7 +27,6 @@ else:
|
||||
Request = object
|
||||
|
||||
|
||||
@bc_linter_include
|
||||
@dataclass
|
||||
class NewRequestData:
|
||||
req_id: str
|
||||
@@ -109,7 +106,6 @@ class NewRequestData:
|
||||
)
|
||||
|
||||
|
||||
@bc_linter_include
|
||||
@dataclass
|
||||
class CachedRequestData:
|
||||
req_ids: list[str]
|
||||
@@ -179,7 +175,6 @@ class CachedRequestData:
|
||||
)
|
||||
|
||||
|
||||
@bc_linter_include
|
||||
@dataclass
|
||||
class SchedulerOutput:
|
||||
# list of the requests that are scheduled for the first time.
|
||||
@@ -217,6 +212,9 @@ class SchedulerOutput:
|
||||
# freed from the encoder cache.
|
||||
free_encoder_mm_hashes: list[str]
|
||||
|
||||
# Request IDs that are resumed from preemption in this step.
|
||||
scheduled_resumed_reqs: list[str] | None = None
|
||||
|
||||
# Request IDs that are preempted in this step.
|
||||
# Only used for v2 model runner.
|
||||
preempted_req_ids: set[str] | None = None
|
||||
@@ -238,6 +236,11 @@ class SchedulerOutput:
|
||||
# EC Cache Connector metadata
|
||||
ec_connector_metadata: ECConnectorMetadata | None = None
|
||||
|
||||
# Block IDs freshly allocated from the pool during this scheduling step.
|
||||
# The worker zeros the corresponding GPU memory before the blocks are used,
|
||||
# preventing stale NaN/data from corrupting attention or SSM computation.
|
||||
new_block_ids_to_zero: list[int] | None = None
|
||||
|
||||
@classmethod
|
||||
def make_empty(cls) -> "SchedulerOutput":
|
||||
return cls(
|
||||
|
||||
Reference in New Issue
Block a user