diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index d20365722..9dd1dbc66 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -89,6 +89,25 @@ jobs: cd test/srt python3 run_suite.py --suite per-commit-2-gpu + unittest-test-backend-4-gpu: + if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && + github.event.pull_request.draft == false + needs: [unit-test-frontend, unit-test-backend-2-gpu] + runs-on: 4-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + bash scripts/ci_install_dependency.sh + + - name: Run test + timeout-minutes: 20 + run: | + cd test/srt + python3 run_suite.py --suite per-commit-4-gpu + unittest-test-backend-8-gpu: if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 7db27a432..73108f073 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -81,7 +81,8 @@ suites = { TestFile("test_vertex_endpoint.py", 31), TestFile("test_vision_chunked_prefill.py", 175), TestFile("test_vlm_accuracy.py", 60), - TestFile("test_vision_openai_server.py", 637), + TestFile("test_vision_openai_server_a.py", 700), + TestFile("test_vision_openai_server_b.py", 700), TestFile("test_w8a8_quantization.py", 46), TestFile("models/lora/test_lora_cuda_graph.py", 250), ], @@ -104,17 +105,19 @@ suites = { "per-commit-2-gpu-amd": [ TestFile("test_mla_tp.py", 170), ], + "per-commit-4-gpu": [ + TestFile("test_local_attn.py", 250), + TestFile("test_pp_single_node.py", 150), + ], "per-commit-8-gpu": [ # Disabled deepep tests temporarily because it takes too much time. # TODO: re-enable them after reducing the test time with compilation cache and smaller models. # TestFile("test_deepep_intranode.py", 50), # TestFile("test_deepep_low_latency.py", 50), # TestFile("test_moe_deepep_eval_accuracy_large.py", 250), - TestFile("test_disaggregation.py", 210), - TestFile("test_local_attn.py", 250), + # TestFile("test_disaggregation.py", 210), # disabled since we have different_tp test TestFile("test_disaggregation_different_tp.py", 210), TestFile("test_full_deepseek_v3.py", 250), - TestFile("test_pp_single_node.py", 150), ], "per-commit-8-gpu-amd": [ TestFile("test_full_deepseek_v3.py", 250), diff --git a/test/srt/test_pp_single_node.py b/test/srt/test_pp_single_node.py index 3f95271ee..b7fdae2d6 100644 --- a/test/srt/test_pp_single_node.py +++ b/test/srt/test_pp_single_node.py @@ -34,7 +34,7 @@ class TestPPAccuracy(unittest.TestCase): "--tp-size", 2, "--pp-size", - 4, + 2, "--chunked-prefill-size", 256, ], diff --git a/test/srt/test_vision_openai_server_a.py b/test/srt/test_vision_openai_server_a.py new file mode 100644 index 000000000..4fef381fd --- /dev/null +++ b/test/srt/test_vision_openai_server_a.py @@ -0,0 +1,187 @@ +""" +Usage: +python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_mixed_batch +python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_multi_images_chat_completion +""" + +from test_vision_openai_server_common import * + +from sglang.srt.utils import kill_process_tree +from sglang.test.test_utils import ( + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, + CustomTestCase, + popen_launch_server, +) + + +class TestQwen2VLServer(TestOpenAIVisionServer): + @classmethod + def setUpClass(cls): + cls.model = "Qwen/Qwen2-VL-7B-Instruct" + cls.base_url = DEFAULT_URL_FOR_TEST + cls.api_key = "sk-123456" + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + api_key=cls.api_key, + other_args=[ + "--mem-fraction-static", + "0.4", + ], + ) + cls.base_url += "/v1" + + +class TestQwen2_5_VLServer(TestOpenAIVisionServer): + @classmethod + def setUpClass(cls): + cls.model = "Qwen/Qwen2.5-VL-7B-Instruct" + cls.base_url = DEFAULT_URL_FOR_TEST + cls.api_key = "sk-123456" + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + api_key=cls.api_key, + other_args=[ + "--mem-fraction-static", + "0.4", + ], + ) + cls.base_url += "/v1" + + +class TestVLMContextLengthIssue(CustomTestCase): + @classmethod + def setUpClass(cls): + cls.model = "Qwen/Qwen2-VL-7B-Instruct" + cls.base_url = DEFAULT_URL_FOR_TEST + cls.api_key = "sk-123456" + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + api_key=cls.api_key, + other_args=[ + "--context-length", + "300", + "--mem-fraction-static=0.80", + ], + ) + cls.base_url += "/v1" + + @classmethod + def tearDownClass(cls): + kill_process_tree(cls.process.pid) + + def test_single_image_chat_completion(self): + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + + with self.assertRaises(openai.BadRequestError) as cm: + client.chat.completions.create( + model="default", + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": IMAGE_MAN_IRONING_URL}, + }, + { + "type": "text", + "text": "Give a lengthy description of this picture", + }, + ], + }, + ], + temperature=0, + ) + + # context length is checked first, then max_req_input_len, which is calculated from the former + assert ( + "Multimodal prompt is too long after expanding multimodal tokens." + in str(cm.exception) + or "is longer than the model's context length" in str(cm.exception) + ) + + +class TestMllamaServer(TestOpenAIVisionServer): + @classmethod + def setUpClass(cls): + cls.model = "meta-llama/Llama-3.2-11B-Vision-Instruct" + cls.base_url = DEFAULT_URL_FOR_TEST + cls.api_key = "sk-123456" + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + api_key=cls.api_key, + ) + cls.base_url += "/v1" + + def test_video_chat_completion(self): + pass + + +class TestMinicpmvServer(TestOpenAIVisionServer): + @classmethod + def setUpClass(cls): + cls.model = "openbmb/MiniCPM-V-2_6" + cls.base_url = DEFAULT_URL_FOR_TEST + cls.api_key = "sk-123456" + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=[ + "--trust-remote-code", + "--mem-fraction-static", + "0.4", + ], + ) + cls.base_url += "/v1" + + +class TestInternVL2_5Server(TestOpenAIVisionServer): + @classmethod + def setUpClass(cls): + cls.model = "OpenGVLab/InternVL2_5-2B" + cls.base_url = DEFAULT_URL_FOR_TEST + cls.api_key = "sk-123456" + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=["--trust-remote-code"], + ) + cls.base_url += "/v1" + + +class TestMinicpmoServer(TestOpenAIVisionServer): + @classmethod + def setUpClass(cls): + cls.model = "openbmb/MiniCPM-o-2_6" + cls.base_url = DEFAULT_URL_FOR_TEST + cls.api_key = "sk-123456" + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=[ + "--trust-remote-code", + "--mem-fraction-static", + "0.7", + ], + ) + cls.base_url += "/v1" + + def test_audio_chat_completion(self): + self._test_audio_speech_completion() + self._test_audio_ambient_completion() + + +if __name__ == "__main__": + unittest.main() diff --git a/test/srt/test_vision_openai_server_b.py b/test/srt/test_vision_openai_server_b.py new file mode 100644 index 000000000..30c9808d3 --- /dev/null +++ b/test/srt/test_vision_openai_server_b.py @@ -0,0 +1,200 @@ +from test_vision_openai_server_common import * + +from sglang.srt.utils import kill_process_tree +from sglang.test.test_utils import ( + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, + CustomTestCase, + popen_launch_server, +) + + +class TestPixtralServer(TestOpenAIVisionServer): + @classmethod + def setUpClass(cls): + cls.model = "mistral-community/pixtral-12b" + cls.base_url = DEFAULT_URL_FOR_TEST + cls.api_key = "sk-123456" + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=[ + "--trust-remote-code", + "--mem-fraction-static", + "0.73", + ], + ) + cls.base_url += "/v1" + + def test_video_chat_completion(self): + pass + + +class TestMistral3_1Server(TestOpenAIVisionServer): + @classmethod + def setUpClass(cls): + cls.model = "unsloth/Mistral-Small-3.1-24B-Instruct-2503" + cls.base_url = DEFAULT_URL_FOR_TEST + cls.api_key = "sk-123456" + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=[ + "--trust-remote-code", + "--mem-fraction-static", + "0.8", + ], + ) + cls.base_url += "/v1" + + def test_video_chat_completion(self): + pass + + +class TestDeepseekVL2Server(TestOpenAIVisionServer): + @classmethod + def setUpClass(cls): + cls.model = "deepseek-ai/deepseek-vl2-small" + cls.base_url = DEFAULT_URL_FOR_TEST + cls.api_key = "sk-123456" + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=[ + "--trust-remote-code", + "--context-length", + "4096", + ], + ) + cls.base_url += "/v1" + + def test_video_chat_completion(self): + pass + + +class TestDeepseekVL2TinyServer(TestOpenAIVisionServer): + @classmethod + def setUpClass(cls): + cls.model = "deepseek-ai/deepseek-vl2-tiny" + cls.base_url = DEFAULT_URL_FOR_TEST + cls.api_key = "sk-123456" + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=[ + "--trust-remote-code", + "--context-length", + "4096", + ], + ) + cls.base_url += "/v1" + + def test_video_chat_completion(self): + pass + + +class TestJanusProServer(TestOpenAIVisionServer): + @classmethod + def setUpClass(cls): + cls.model = "deepseek-ai/Janus-Pro-7B" + cls.base_url = DEFAULT_URL_FOR_TEST + cls.api_key = "sk-123456" + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=[ + "--trust-remote-code", + "--mem-fraction-static", + "0.4", + ], + ) + cls.base_url += "/v1" + + def test_video_chat_completion(self): + pass + + def test_single_image_chat_completion(self): + # Skip this test because it is flaky + pass + + +## Skip for ci test +# class TestLlama4Server(TestOpenAIVisionServer): +# @classmethod +# def setUpClass(cls): +# cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct" +# cls.base_url = DEFAULT_URL_FOR_TEST +# cls.api_key = "sk-123456" +# cls.process = popen_launch_server( +# cls.model, +# cls.base_url, +# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, +# other_args=[ +# "--chat-template", +# "llama-4", +# "--mem-fraction-static", +# "0.8", +# "--tp-size=8", +# "--context-length=8192", +# ], +# ) +# cls.base_url += "/v1" + +# def test_video_chat_completion(self): +# pass + + +class TestGemma3itServer(TestOpenAIVisionServer): + @classmethod + def setUpClass(cls): + cls.model = "google/gemma-3-4b-it" + cls.base_url = DEFAULT_URL_FOR_TEST + cls.api_key = "sk-123456" + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=[ + "--trust-remote-code", + "--mem-fraction-static", + "0.75", + "--enable-multimodal", + ], + ) + cls.base_url += "/v1" + + def test_video_chat_completion(self): + pass + + +class TestKimiVLServer(TestOpenAIVisionServer): + @classmethod + def setUpClass(cls): + cls.model = "moonshotai/Kimi-VL-A3B-Instruct" + cls.base_url = DEFAULT_URL_FOR_TEST + cls.api_key = "sk-123456" + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=[ + "--trust-remote-code", + "--context-length", + "4096", + "--dtype", + "bfloat16", + ], + ) + cls.base_url += "/v1" + + def test_video_chat_completion(self): + pass + + +if __name__ == "__main__": + unittest.main() diff --git a/test/srt/test_vision_openai_server.py b/test/srt/test_vision_openai_server_common.py similarity index 60% rename from test/srt/test_vision_openai_server.py rename to test/srt/test_vision_openai_server_common.py index 895e19f5a..a10605ae5 100644 --- a/test/srt/test_vision_openai_server.py +++ b/test/srt/test_vision_openai_server_common.py @@ -1,9 +1,3 @@ -""" -Usage: -python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_mixed_batch -python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_multi_images_chat_completion -""" - import base64 import io import json @@ -472,362 +466,3 @@ class TestOpenAIVisionServer(CustomTestCase): def test_audio_chat_completion(self): pass - - -class TestQwen2VLServer(TestOpenAIVisionServer): - @classmethod - def setUpClass(cls): - cls.model = "Qwen/Qwen2-VL-7B-Instruct" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - api_key=cls.api_key, - other_args=[ - "--mem-fraction-static", - "0.4", - ], - ) - cls.base_url += "/v1" - - -class TestQwen2_5_VLServer(TestOpenAIVisionServer): - @classmethod - def setUpClass(cls): - cls.model = "Qwen/Qwen2.5-VL-7B-Instruct" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - api_key=cls.api_key, - other_args=[ - "--mem-fraction-static", - "0.4", - ], - ) - cls.base_url += "/v1" - - -class TestVLMContextLengthIssue(CustomTestCase): - @classmethod - def setUpClass(cls): - cls.model = "Qwen/Qwen2-VL-7B-Instruct" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - api_key=cls.api_key, - other_args=[ - "--context-length", - "300", - "--mem-fraction-static=0.80", - ], - ) - cls.base_url += "/v1" - - @classmethod - def tearDownClass(cls): - kill_process_tree(cls.process.pid) - - def test_single_image_chat_completion(self): - client = openai.Client(api_key=self.api_key, base_url=self.base_url) - - with self.assertRaises(openai.BadRequestError) as cm: - client.chat.completions.create( - model="default", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": {"url": IMAGE_MAN_IRONING_URL}, - }, - { - "type": "text", - "text": "Give a lengthy description of this picture", - }, - ], - }, - ], - temperature=0, - ) - - # context length is checked first, then max_req_input_len, which is calculated from the former - assert ( - "Multimodal prompt is too long after expanding multimodal tokens." - in str(cm.exception) - or "is longer than the model's context length" in str(cm.exception) - ) - - -class TestMllamaServer(TestOpenAIVisionServer): - @classmethod - def setUpClass(cls): - cls.model = "meta-llama/Llama-3.2-11B-Vision-Instruct" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - api_key=cls.api_key, - ) - cls.base_url += "/v1" - - def test_video_chat_completion(self): - pass - - -class TestMinicpmvServer(TestOpenAIVisionServer): - @classmethod - def setUpClass(cls): - cls.model = "openbmb/MiniCPM-V-2_6" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--mem-fraction-static", - "0.4", - ], - ) - cls.base_url += "/v1" - - -class TestInternVL2_5Server(TestOpenAIVisionServer): - @classmethod - def setUpClass(cls): - cls.model = "OpenGVLab/InternVL2_5-2B" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=["--trust-remote-code"], - ) - cls.base_url += "/v1" - - -class TestMinicpmoServer(TestOpenAIVisionServer): - @classmethod - def setUpClass(cls): - cls.model = "openbmb/MiniCPM-o-2_6" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--mem-fraction-static", - "0.7", - ], - ) - cls.base_url += "/v1" - - def test_audio_chat_completion(self): - self._test_audio_speech_completion() - self._test_audio_ambient_completion() - - -class TestPixtralServer(TestOpenAIVisionServer): - @classmethod - def setUpClass(cls): - cls.model = "mistral-community/pixtral-12b" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--mem-fraction-static", - "0.73", - ], - ) - cls.base_url += "/v1" - - def test_video_chat_completion(self): - pass - - -class TestMistral3_1Server(TestOpenAIVisionServer): - @classmethod - def setUpClass(cls): - cls.model = "unsloth/Mistral-Small-3.1-24B-Instruct-2503" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--mem-fraction-static", - "0.8", - ], - ) - cls.base_url += "/v1" - - def test_video_chat_completion(self): - pass - - -class TestDeepseekVL2Server(TestOpenAIVisionServer): - @classmethod - def setUpClass(cls): - cls.model = "deepseek-ai/deepseek-vl2-small" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--context-length", - "4096", - ], - ) - cls.base_url += "/v1" - - def test_video_chat_completion(self): - pass - - -class TestDeepseekVL2TinyServer(TestOpenAIVisionServer): - @classmethod - def setUpClass(cls): - cls.model = "deepseek-ai/deepseek-vl2-tiny" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--context-length", - "4096", - ], - ) - cls.base_url += "/v1" - - def test_video_chat_completion(self): - pass - - -class TestJanusProServer(TestOpenAIVisionServer): - @classmethod - def setUpClass(cls): - cls.model = "deepseek-ai/Janus-Pro-7B" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--mem-fraction-static", - "0.4", - ], - ) - cls.base_url += "/v1" - - def test_video_chat_completion(self): - pass - - def test_single_image_chat_completion(self): - # Skip this test because it is flaky - pass - - -## Skip for ci test -# class TestLlama4Server(TestOpenAIVisionServer): -# @classmethod -# def setUpClass(cls): -# cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct" -# cls.base_url = DEFAULT_URL_FOR_TEST -# cls.api_key = "sk-123456" -# cls.process = popen_launch_server( -# cls.model, -# cls.base_url, -# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, -# other_args=[ -# "--chat-template", -# "llama-4", -# "--mem-fraction-static", -# "0.8", -# "--tp-size=8", -# "--context-length=8192", -# ], -# ) -# cls.base_url += "/v1" - -# def test_video_chat_completion(self): -# pass - - -class TestGemma3itServer(TestOpenAIVisionServer): - @classmethod - def setUpClass(cls): - cls.model = "google/gemma-3-4b-it" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--mem-fraction-static", - "0.75", - "--enable-multimodal", - ], - ) - cls.base_url += "/v1" - - def test_video_chat_completion(self): - pass - - -class TestKimiVLServer(TestOpenAIVisionServer): - @classmethod - def setUpClass(cls): - cls.model = "moonshotai/Kimi-VL-A3B-Instruct" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--context-length", - "4096", - "--dtype", - "bfloat16", - ], - ) - cls.base_url += "/v1" - - def test_video_chat_completion(self): - pass - - -if __name__ == "__main__": - unittest.main()