enginex-mlu370-vllm/vllm-v0.6.2/vllm/v1/engine/__init__.py

import enum
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Union

import msgspec

from vllm.lora.request import LoRARequest
from vllm.multimodal import MultiModalDataDict, MultiModalPlaceholderDict
from vllm.sampling_params import RequestOutputKind, SamplingParams


@dataclass
class DetokenizerRequest:

    request_id: str
    prompt: Optional[str]
    prompt_token_ids: List[int]
    skip_special_tokens: bool
    spaces_between_special_tokens: bool
    output_kind: RequestOutputKind

    stop: List[str]
    include_stop_str_in_output: bool


@dataclass
class EngineCoreRequest:

    # NOTE: prompt and prompt_token_ids should be DecoderOnlyInput,
    # but this object is currently not playing well with msgspec
    # due to circular imports and typing we have in data.py

    request_id: str
    #NOTE(Nick): I don't think we need to pass prompt here since it should
    # always be tokenized?
    prompt: Optional[str]
    prompt_token_ids: List[int]
    mm_data: Optional[MultiModalDataDict]
    mm_placeholders: Optional[MultiModalPlaceholderDict]
    mm_processor_kwargs: Optional[Dict[str, Any]]
    sampling_params: SamplingParams
    eos_token_id: Optional[int]
    arrival_time: float
    lora_request: Optional[LoRARequest]


class EngineCoreOutput(msgspec.Struct,
                       array_like=True,
                       omit_defaults=True,
                       gc=False):

    request_id: str
    new_token_ids: List[int]
    finished: bool
    finish_reason: Optional[str] = None
    stop_reason: Union[int, str, None] = None


class EngineCoreOutputs(msgspec.Struct,
                        array_like=True,
                        omit_defaults=True,
                        gc=False):

    #NOTE(Nick): We could consider ways to make this more compact,
    # e.g. columnwise layout and using an int enum for finish/stop reason

    # [num_reqs]
    outputs: List[EngineCoreOutput]


class EngineCoreRequestType(enum.Enum):
    """
    Request types defined as hex byte strings, so it can be sent over sockets
    without separate encoding step.
    """
    ADD = b'\x00'
    ABORT = b'\x01'