Support precomputed multimodal features for Qwen-VL and Gemma3 models. (#6136)

Co-authored-by: Yury Sulsky <ysulsky@tesla.com>
This commit is contained in:
Yury Sulsky
2025-05-16 12:26:15 -07:00
committed by GitHub
parent c23a7072b6
commit f19a9204cd
14 changed files with 592 additions and 125 deletions

View File

@@ -47,6 +47,7 @@ from sglang.srt.managers.io_struct import (
EmbeddingReqInput,
GenerateReqInput,
GetWeightsByNameReqInput,
ImageDataItem,
InitWeightsUpdateGroupReqInput,
ReleaseMemoryOccupationReqInput,
ResumeMemoryOccupationReqInput,
@@ -150,9 +151,9 @@ class Engine(EngineBase):
# See also python/sglang/srt/utils.py:load_image for more details.
image_data: Optional[
Union[
List[List[Union[Image, str]]],
List[Union[Image, str]],
Union[Image, str],
List[List[ImageDataItem]],
List[ImageDataItem],
ImageDataItem,
]
] = None,
return_logprob: Optional[Union[List[bool], bool]] = False,
@@ -221,9 +222,9 @@ class Engine(EngineBase):
# See also python/sglang/srt/utils.py:load_image for more details.
image_data: Optional[
Union[
List[List[Union[Image, str]]],
List[Union[Image, str]],
Union[Image, str],
List[List[ImageDataItem]],
List[ImageDataItem],
ImageDataItem,
]
] = None,
return_logprob: Optional[Union[List[bool], bool]] = False,