forked from EngineX-Cambricon/enginex-mlu370-vllm
add qwen3
This commit is contained in:
77
vllm-v0.6.2/vllm/v1/engine/__init__.py
Normal file
77
vllm-v0.6.2/vllm/v1/engine/__init__.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import enum
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import msgspec
|
||||
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.multimodal import MultiModalDataDict, MultiModalPlaceholderDict
|
||||
from vllm.sampling_params import RequestOutputKind, SamplingParams
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetokenizerRequest:
|
||||
|
||||
request_id: str
|
||||
prompt: Optional[str]
|
||||
prompt_token_ids: List[int]
|
||||
skip_special_tokens: bool
|
||||
spaces_between_special_tokens: bool
|
||||
output_kind: RequestOutputKind
|
||||
|
||||
stop: List[str]
|
||||
include_stop_str_in_output: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class EngineCoreRequest:
|
||||
|
||||
# NOTE: prompt and prompt_token_ids should be DecoderOnlyInput,
|
||||
# but this object is currently not playing well with msgspec
|
||||
# due to circular imports and typing we have in data.py
|
||||
|
||||
request_id: str
|
||||
#NOTE(Nick): I don't think we need to pass prompt here since it should
|
||||
# always be tokenized?
|
||||
prompt: Optional[str]
|
||||
prompt_token_ids: List[int]
|
||||
mm_data: Optional[MultiModalDataDict]
|
||||
mm_placeholders: Optional[MultiModalPlaceholderDict]
|
||||
mm_processor_kwargs: Optional[Dict[str, Any]]
|
||||
sampling_params: SamplingParams
|
||||
eos_token_id: Optional[int]
|
||||
arrival_time: float
|
||||
lora_request: Optional[LoRARequest]
|
||||
|
||||
|
||||
class EngineCoreOutput(msgspec.Struct,
|
||||
array_like=True,
|
||||
omit_defaults=True,
|
||||
gc=False):
|
||||
|
||||
request_id: str
|
||||
new_token_ids: List[int]
|
||||
finished: bool
|
||||
finish_reason: Optional[str] = None
|
||||
stop_reason: Union[int, str, None] = None
|
||||
|
||||
|
||||
class EngineCoreOutputs(msgspec.Struct,
|
||||
array_like=True,
|
||||
omit_defaults=True,
|
||||
gc=False):
|
||||
|
||||
#NOTE(Nick): We could consider ways to make this more compact,
|
||||
# e.g. columnwise layout and using an int enum for finish/stop reason
|
||||
|
||||
# [num_reqs]
|
||||
outputs: List[EngineCoreOutput]
|
||||
|
||||
|
||||
class EngineCoreRequestType(enum.Enum):
|
||||
"""
|
||||
Request types defined as hex byte strings, so it can be sent over sockets
|
||||
without separate encoding step.
|
||||
"""
|
||||
ADD = b'\x00'
|
||||
ABORT = b'\x01'
|
||||
Reference in New Issue
Block a user