Frontend: better error message handling for FINISH_ABORT in scheduler.py (#2956)

This commit is contained in:
Chang Su
2025-01-18 19:37:30 -08:00
committed by GitHub
parent 2bd18e2d76
commit 4d4cdb3fe7
5 changed files with 50 additions and 31 deletions

View File

@@ -392,34 +392,33 @@ class TestQWen2VLServerContextLengthIssue(unittest.TestCase):
def test_chat_completion(self):
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
response = client.chat.completions.create(
model="default",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://github.com/sgl-project/sglang/blob/main/test/lang/example_image.png?raw=true"
with self.assertRaises(openai.BadRequestError) as cm:
client.chat.completions.create(
model="default",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://github.com/sgl-project/sglang/blob/main/test/lang/example_image.png?raw=true"
},
},
},
{
"type": "text",
"text": "Give a lengthy description of this picture",
},
],
},
],
temperature=0,
)
{
"type": "text",
"text": "Give a lengthy description of this picture",
},
],
},
],
temperature=0,
)
assert response.choices[0].finish_reason == "abort"
assert response.id
assert response.created
assert response.usage.prompt_tokens > 0
assert response.usage.completion_tokens > 0
assert response.usage.total_tokens > 0
self.assertIn(
"Multimodal prompt is too long after expanding multimodal tokens.",
str(cm.exception),
)
class TestMllamaServer(TestOpenAIVisionServer):