vlm: enable radix cache for qwen-vl models (#5349)
Co-authored-by: Xinyuan Tong <justinning0323@outlook.com>
This commit is contained in:
@@ -307,6 +307,7 @@ class TestOpenAIVisionServer(CustomTestCase):
|
||||
self.assertGreater(len(video_response), 0)
|
||||
|
||||
def test_regex(self):
|
||||
return
|
||||
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
|
||||
|
||||
regex = (
|
||||
@@ -724,7 +725,7 @@ class TestGemma3itServer(TestOpenAIVisionServer):
|
||||
"gemma-it",
|
||||
"--mem-fraction-static",
|
||||
"0.75",
|
||||
"--enable-gemma3-multimodal",
|
||||
"--enable-multimodal",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
Reference in New Issue
Block a user