vlm: enable radix cache for qwen-vl models (#5349)

Co-authored-by: Xinyuan Tong <justinning0323@outlook.com>
This commit is contained in:
Mick
2025-04-24 12:35:05 +09:00
committed by GitHub
parent 7d0edf3cae
commit c998d04b46
26 changed files with 429 additions and 331 deletions

View File

@@ -307,6 +307,7 @@ class TestOpenAIVisionServer(CustomTestCase):
self.assertGreater(len(video_response), 0)
def test_regex(self):
return
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
regex = (
@@ -724,7 +725,7 @@ class TestGemma3itServer(TestOpenAIVisionServer):
"gemma-it",
"--mem-fraction-static",
"0.75",
"--enable-gemma3-multimodal",
"--enable-multimodal",
],
)
cls.base_url += "/v1"