refactor: move image processors to separate files (#4229)

This commit is contained in:
Mick
2025-03-12 03:35:35 +08:00
committed by GitHub
parent 0f2a2e3c19
commit ff2ce0b86f
22 changed files with 1085 additions and 955 deletions

View File

@@ -193,10 +193,10 @@ class TestMiniCPMVLogits(VisionLLMLogitsBase):
**{
"pixel_values": [inputs["pixel_values"]],
"tgt_sizes": [inputs["tgt_sizes"]],
"im_start_id": [self.tokenizer.im_start_id],
"im_end_id": [self.tokenizer.im_end_id],
"slice_start_id": [self.tokenizer.slice_start_id],
"slice_end_id": [self.tokenizer.slice_end_id],
"im_start_id": self.tokenizer.im_start_id,
"im_end_id": self.tokenizer.im_end_id,
"slice_start_id": self.tokenizer.slice_start_id,
"slice_end_id": self.tokenizer.slice_end_id,
},
)
(sglang_output, _) = model.get_embedding(

View File

@@ -47,7 +47,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
def test_chat_completion(self):
def test_single_image_chat_completion(self):
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
response = client.chat.completions.create(
@@ -75,7 +75,9 @@ class TestOpenAIVisionServer(unittest.TestCase):
assert response.choices[0].message.role == "assistant"
text = response.choices[0].message.content
assert isinstance(text, str)
assert "man" in text or "cab" in text, text
assert "man" in text or "person" in text, text
assert "cab" in text or "taxi" in text or "SUV" in text, text
assert "iron" in text, text
assert response.id
assert response.created
assert response.usage.prompt_tokens > 0
@@ -169,7 +171,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
assert response.choices[0].message.role == "assistant"
text = response.choices[0].message.content
assert isinstance(text, str)
print(text)
print(f"LLM response: {text}")
assert "man" in text or "cab" in text or "SUV" in text or "taxi" in text, text
assert "logo" in text or '"S"' in text or "SG" in text, text
assert response.id
@@ -379,6 +381,8 @@ class TestQWen2VLServer(TestOpenAIVisionServer):
other_args=[
"--chat-template",
"qwen2-vl",
"--chunked-prefill-size",
"10000",
],
)
cls.base_url += "/v1"
@@ -408,7 +412,7 @@ class TestQWen2_5_VLServer(TestOpenAIVisionServer):
cls.base_url += "/v1"
class TestQWen2VLServerContextLengthIssue(unittest.TestCase):
class TestVLMContextLengthIssue(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = "Qwen/Qwen2-VL-7B-Instruct"
@@ -433,7 +437,7 @@ class TestQWen2VLServerContextLengthIssue(unittest.TestCase):
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
def test_chat_completion(self):
def test_single_image_chat_completion(self):
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
with self.assertRaises(openai.BadRequestError) as cm:
@@ -459,9 +463,11 @@ class TestQWen2VLServerContextLengthIssue(unittest.TestCase):
temperature=0,
)
self.assertIn(
"Multimodal prompt is too long after expanding multimodal tokens.",
str(cm.exception),
# context length is checked first, then max_req_input_len, which is calculated from the former
assert (
"Multimodal prompt is too long after expanding multimodal tokens."
in str(cm.exception)
or "is longer than the model's context length" in str(cm.exception)
)