[Feat] Add modalities for vision server when handling pixel values for llava (#1346)

This commit is contained in:
Kaichen Zhang - NTU
2024-09-09 17:07:34 +08:00
committed by GitHub
parent 8e6bdf851c
commit 662ecd9368
11 changed files with 40 additions and 2 deletions

View File

@@ -140,12 +140,14 @@ class TestOpenAIVisionServer(unittest.TestCase):
"image_url": {
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png"
},
"modalities": "multi-images",
},
{
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png"
},
"modalities": "multi-images",
},
{
"type": "text",
@@ -192,6 +194,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
frame_format = {
"type": "image_url",
"image_url": {"url": "data:image/jpeg;base64,{}"},
"modalities": "video",
}
for base64_frame in base64_frames: