Frontend: better error message handling for FINISH_ABORT in scheduler.py (#2956)

2025-01-18 19:37:30 -08:00
parent 2bd18e2d76
commit 4d4cdb3fe7
5 changed files with 50 additions and 31 deletions
--- a/test/srt/test_vision_openai_server.py
+++ b/test/srt/test_vision_openai_server.py
@@ -392,34 +392,33 @@ class TestQWen2VLServerContextLengthIssue(unittest.TestCase):
    def test_chat_completion(self):
        client = openai.Client(api_key=self.api_key, base_url=self.base_url)

-        response = client.chat.completions.create(
-            model="default",
-            messages=[
-                {
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "image_url",
-                            "image_url": {
-                                "url": "https://github.com/sgl-project/sglang/blob/main/test/lang/example_image.png?raw=true"
+        with self.assertRaises(openai.BadRequestError) as cm:
+            client.chat.completions.create(
+                model="default",
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": "https://github.com/sgl-project/sglang/blob/main/test/lang/example_image.png?raw=true"
+                                },
                            },
-                        },
-                        {
-                            "type": "text",
-                            "text": "Give a lengthy description of this picture",
-                        },
-                    ],
-                },
-            ],
-            temperature=0,
-        )
+                            {
+                                "type": "text",
+                                "text": "Give a lengthy description of this picture",
+                            },
+                        ],
+                    },
+                ],
+                temperature=0,
+            )

-        assert response.choices[0].finish_reason == "abort"
-        assert response.id
-        assert response.created
-        assert response.usage.prompt_tokens > 0
-        assert response.usage.completion_tokens > 0
-        assert response.usage.total_tokens > 0
+        self.assertIn(
+            "Multimodal prompt is too long after expanding multimodal tokens.",
+            str(cm.exception),
+        )


 class TestMllamaServer(TestOpenAIVisionServer):