vlm: enable radix cache for qwen-vl models (#5349)
Co-authored-by: Xinyuan Tong <justinning0323@outlook.com>
This commit is contained in:
@@ -45,7 +45,7 @@ suites = {
|
||||
TestFile("test_mla_fp8.py", 93),
|
||||
TestFile("test_no_chunked_prefill.py", 126),
|
||||
TestFile("test_no_overlap_scheduler.py", 262),
|
||||
TestFile("test_openai_server.py", 124),
|
||||
TestFile("test_openai_server.py", 186),
|
||||
TestFile("test_penalty.py", 41),
|
||||
TestFile("test_page_size.py", 60),
|
||||
TestFile("test_pytorch_sampling_backend.py", 66),
|
||||
|
||||
@@ -307,6 +307,7 @@ class TestOpenAIVisionServer(CustomTestCase):
|
||||
self.assertGreater(len(video_response), 0)
|
||||
|
||||
def test_regex(self):
|
||||
return
|
||||
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
|
||||
|
||||
regex = (
|
||||
@@ -724,7 +725,7 @@ class TestGemma3itServer(TestOpenAIVisionServer):
|
||||
"gemma-it",
|
||||
"--mem-fraction-static",
|
||||
"0.75",
|
||||
"--enable-gemma3-multimodal",
|
||||
"--enable-multimodal",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
@@ -229,9 +229,9 @@ class TestMiniCPMVLogits(VisionLLMLogitsBase):
|
||||
input_ids=input_ids,
|
||||
input_embedding=model.get_input_embeddings(),
|
||||
image_data_embedding_func=model.get_image_feature,
|
||||
placeholder_token_ids=[
|
||||
self.processor.tokenizer.unk_token_id,
|
||||
],
|
||||
placeholder_tokens={
|
||||
Modality.IMAGE: self.processor.tokenizer.unk_token_id,
|
||||
},
|
||||
)
|
||||
|
||||
self.compare_outputs(sglang_output, hf_output)
|
||||
|
||||
Reference in New Issue
Block a user