[Fix] Fixing the multi-images error for llava-onevision (#1205)
This commit is contained in:
committed by
GitHub
parent
bc4c7a3545
commit
66e7dcaf70
@@ -78,6 +78,51 @@ def image_stream_request_test(client):
|
|||||||
print("-" * 30)
|
print("-" * 30)
|
||||||
|
|
||||||
|
|
||||||
|
def multi_image_stream_request_test(client):
|
||||||
|
print(
|
||||||
|
"----------------------Multi-Images Stream Request Test----------------------"
|
||||||
|
)
|
||||||
|
stream_request = client.chat.completions.create(
|
||||||
|
model="default",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "I have shown you two images. Please describe the two images to me.",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
temperature=0.7,
|
||||||
|
max_tokens=1024,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
stream_response = ""
|
||||||
|
|
||||||
|
for chunk in stream_request:
|
||||||
|
if chunk.choices[0].delta.content is not None:
|
||||||
|
content = chunk.choices[0].delta.content
|
||||||
|
stream_response += content
|
||||||
|
sys.stdout.write(content)
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
print("-" * 30)
|
||||||
|
|
||||||
|
|
||||||
def video_stream_request_test(client, video_path):
|
def video_stream_request_test(client, video_path):
|
||||||
print("------------------------Video Stream Request Test----------------------")
|
print("------------------------Video Stream Request Test----------------------")
|
||||||
messages = prepare_video_messages(video_path)
|
messages = prepare_video_messages(video_path)
|
||||||
@@ -209,6 +254,7 @@ def main():
|
|||||||
client = create_openai_client("http://127.0.0.1:30000/v1")
|
client = create_openai_client("http://127.0.0.1:30000/v1")
|
||||||
|
|
||||||
image_stream_request_test(client)
|
image_stream_request_test(client)
|
||||||
|
multi_image_stream_request_test(client)
|
||||||
video_stream_request_test(client, video_path)
|
video_stream_request_test(client, video_path)
|
||||||
image_speed_test(client)
|
image_speed_test(client)
|
||||||
video_speed_test(client, video_path)
|
video_speed_test(client, video_path)
|
||||||
|
|||||||
@@ -744,7 +744,9 @@ def get_pixel_values(
|
|||||||
image,
|
image,
|
||||||
tuple(int(x * 255) for x in processor.image_processor.image_mean),
|
tuple(int(x * 255) for x in processor.image_processor.image_mean),
|
||||||
)
|
)
|
||||||
pixel_values = processor.image_processor(image)["pixel_values"][0]
|
pixel_values = processor.image_processor(image.convert("RGB"))[
|
||||||
|
"pixel_values"
|
||||||
|
][0]
|
||||||
elif image_aspect_ratio == "anyres" or "anyres_max" in image_aspect_ratio:
|
elif image_aspect_ratio == "anyres" or "anyres_max" in image_aspect_ratio:
|
||||||
pixel_values = process_anyres_image(
|
pixel_values = process_anyres_image(
|
||||||
image, processor.image_processor, image_grid_pinpoints
|
image, processor.image_processor, image_grid_pinpoints
|
||||||
|
|||||||
@@ -74,6 +74,48 @@ class TestOpenAIVisionServer(unittest.TestCase):
|
|||||||
assert response.usage.completion_tokens > 0
|
assert response.usage.completion_tokens > 0
|
||||||
assert response.usage.total_tokens > 0
|
assert response.usage.total_tokens > 0
|
||||||
|
|
||||||
|
def test_mult_images_chat_completion(self):
|
||||||
|
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="default",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "I have shown you two images. Please describe the two images to me.",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
temperature=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.choices[0].message.role == "assistant"
|
||||||
|
text = response.choices[0].message.content
|
||||||
|
assert isinstance(text, str)
|
||||||
|
assert "man" in text or "cab" in text, text
|
||||||
|
assert "logo" in text, text
|
||||||
|
assert response.id
|
||||||
|
assert response.created
|
||||||
|
assert response.usage.prompt_tokens > 0
|
||||||
|
assert response.usage.completion_tokens > 0
|
||||||
|
assert response.usage.total_tokens > 0
|
||||||
|
|
||||||
def prepare_video_messages(self, video_path):
|
def prepare_video_messages(self, video_path):
|
||||||
max_frames_num = 32
|
max_frames_num = 32
|
||||||
vr = VideoReader(video_path, ctx=cpu(0))
|
vr = VideoReader(video_path, ctx=cpu(0))
|
||||||
|
|||||||
Reference in New Issue
Block a user