Support Phi-4 Multi-Modal (text + vision only) (#6494)

This commit is contained in:
Lifu Huang
2025-05-24 21:43:38 -07:00
committed by GitHub
parent 681e7af32b
commit 022012aae8
8 changed files with 650 additions and 6 deletions

View File

@@ -196,5 +196,31 @@ class TestKimiVLServer(TestOpenAIVisionServer):
pass
class TestPhi4MMServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "microsoft/Phi-4-multimodal-instruct"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--mem-fraction-static",
"0.75",
],
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
def test_multi_images_chat_completion(self):
# TODO (lifuhuang): support LoRA to enable Phi4MM multi-image understanding capability.
pass
if __name__ == "__main__":
unittest.main()