feat: update multimodal data handling in engine entrypoint (#8002)
Signed-off-by: Xinyuan Tong <justinning0323@outlook.com>
This commit is contained in:
@@ -8,7 +8,7 @@ from transformers.tokenization_utils_base import PreTrainedTokenizerBase
|
||||
from sglang.srt.managers.io_struct import (
|
||||
EmbeddingReqInput,
|
||||
GenerateReqInput,
|
||||
ImageDataItem,
|
||||
ImageDataInputItem,
|
||||
)
|
||||
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
||||
from sglang.srt.models.vila import VILAForConditionalGeneration
|
||||
@@ -42,7 +42,7 @@ class VILAMultimodalProcessor(BaseMultimodalProcessor):
|
||||
|
||||
async def process_mm_data_async(
|
||||
self,
|
||||
image_data: Optional[ImageDataItem | List[ImageDataItem]],
|
||||
image_data: Optional[ImageDataInputItem | List[ImageDataInputItem]],
|
||||
input_text: str | List[int],
|
||||
request_obj: GenerateReqInput | EmbeddingReqInput,
|
||||
max_req_input_len: int,
|
||||
|
||||
Reference in New Issue
Block a user