diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index 199ea7c3a..3f25ad560 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -428,7 +428,7 @@ class TokenizerManager: async def _get_pixel_values(self, image_data): if isinstance(image_data, list) and len(image_data) > 0: - return await self._get_pixel_values_internal(image_data[0]) + return await self._get_pixel_values_internal(image_data) elif isinstance(image_data, str): return await self._get_pixel_values_internal(image_data) else: diff --git a/test/srt/test_vision_openai_server.py b/test/srt/test_vision_openai_server.py index a34571776..0003e4776 100644 --- a/test/srt/test_vision_openai_server.py +++ b/test/srt/test_vision_openai_server.py @@ -114,8 +114,8 @@ class TestOpenAIVisionServer(unittest.TestCase): text = response.choices[0].message.content assert isinstance(text, str) print(text) - assert "man" in text or "cab" in text, text - # assert "logo" in text, text + assert "man" in text and "taxi" in text, text + assert "logo" in text, text assert response.id assert response.created assert response.usage.prompt_tokens > 0