Add 4-GPU runner tests and split existing tests (#6383)
This commit is contained in:
19
.github/workflows/pr-test.yml
vendored
19
.github/workflows/pr-test.yml
vendored
@@ -89,6 +89,25 @@ jobs:
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite per-commit-2-gpu
|
||||
|
||||
unittest-test-backend-4-gpu:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
needs: [unit-test-frontend, unit-test-backend-2-gpu]
|
||||
runs-on: 4-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci_install_dependency.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 20
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite per-commit-4-gpu
|
||||
|
||||
unittest-test-backend-8-gpu:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
|
||||
@@ -81,7 +81,8 @@ suites = {
|
||||
TestFile("test_vertex_endpoint.py", 31),
|
||||
TestFile("test_vision_chunked_prefill.py", 175),
|
||||
TestFile("test_vlm_accuracy.py", 60),
|
||||
TestFile("test_vision_openai_server.py", 637),
|
||||
TestFile("test_vision_openai_server_a.py", 700),
|
||||
TestFile("test_vision_openai_server_b.py", 700),
|
||||
TestFile("test_w8a8_quantization.py", 46),
|
||||
TestFile("models/lora/test_lora_cuda_graph.py", 250),
|
||||
],
|
||||
@@ -104,17 +105,19 @@ suites = {
|
||||
"per-commit-2-gpu-amd": [
|
||||
TestFile("test_mla_tp.py", 170),
|
||||
],
|
||||
"per-commit-4-gpu": [
|
||||
TestFile("test_local_attn.py", 250),
|
||||
TestFile("test_pp_single_node.py", 150),
|
||||
],
|
||||
"per-commit-8-gpu": [
|
||||
# Disabled deepep tests temporarily because it takes too much time.
|
||||
# TODO: re-enable them after reducing the test time with compilation cache and smaller models.
|
||||
# TestFile("test_deepep_intranode.py", 50),
|
||||
# TestFile("test_deepep_low_latency.py", 50),
|
||||
# TestFile("test_moe_deepep_eval_accuracy_large.py", 250),
|
||||
TestFile("test_disaggregation.py", 210),
|
||||
TestFile("test_local_attn.py", 250),
|
||||
# TestFile("test_disaggregation.py", 210), # disabled since we have different_tp test
|
||||
TestFile("test_disaggregation_different_tp.py", 210),
|
||||
TestFile("test_full_deepseek_v3.py", 250),
|
||||
TestFile("test_pp_single_node.py", 150),
|
||||
],
|
||||
"per-commit-8-gpu-amd": [
|
||||
TestFile("test_full_deepseek_v3.py", 250),
|
||||
|
||||
@@ -34,7 +34,7 @@ class TestPPAccuracy(unittest.TestCase):
|
||||
"--tp-size",
|
||||
2,
|
||||
"--pp-size",
|
||||
4,
|
||||
2,
|
||||
"--chunked-prefill-size",
|
||||
256,
|
||||
],
|
||||
|
||||
187
test/srt/test_vision_openai_server_a.py
Normal file
187
test/srt/test_vision_openai_server_a.py
Normal file
@@ -0,0 +1,187 @@
|
||||
"""
|
||||
Usage:
|
||||
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_mixed_batch
|
||||
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_multi_images_chat_completion
|
||||
"""
|
||||
|
||||
from test_vision_openai_server_common import *
|
||||
|
||||
from sglang.srt.utils import kill_process_tree
|
||||
from sglang.test.test_utils import (
|
||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
DEFAULT_URL_FOR_TEST,
|
||||
CustomTestCase,
|
||||
popen_launch_server,
|
||||
)
|
||||
|
||||
|
||||
class TestQwen2VLServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "Qwen/Qwen2-VL-7B-Instruct"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
api_key=cls.api_key,
|
||||
other_args=[
|
||||
"--mem-fraction-static",
|
||||
"0.4",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
|
||||
class TestQwen2_5_VLServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "Qwen/Qwen2.5-VL-7B-Instruct"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
api_key=cls.api_key,
|
||||
other_args=[
|
||||
"--mem-fraction-static",
|
||||
"0.4",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
|
||||
class TestVLMContextLengthIssue(CustomTestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "Qwen/Qwen2-VL-7B-Instruct"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
api_key=cls.api_key,
|
||||
other_args=[
|
||||
"--context-length",
|
||||
"300",
|
||||
"--mem-fraction-static=0.80",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
kill_process_tree(cls.process.pid)
|
||||
|
||||
def test_single_image_chat_completion(self):
|
||||
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
|
||||
|
||||
with self.assertRaises(openai.BadRequestError) as cm:
|
||||
client.chat.completions.create(
|
||||
model="default",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": IMAGE_MAN_IRONING_URL},
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Give a lengthy description of this picture",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
temperature=0,
|
||||
)
|
||||
|
||||
# context length is checked first, then max_req_input_len, which is calculated from the former
|
||||
assert (
|
||||
"Multimodal prompt is too long after expanding multimodal tokens."
|
||||
in str(cm.exception)
|
||||
or "is longer than the model's context length" in str(cm.exception)
|
||||
)
|
||||
|
||||
|
||||
class TestMllamaServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "meta-llama/Llama-3.2-11B-Vision-Instruct"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
api_key=cls.api_key,
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
def test_video_chat_completion(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestMinicpmvServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "openbmb/MiniCPM-V-2_6"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--mem-fraction-static",
|
||||
"0.4",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
|
||||
class TestInternVL2_5Server(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "OpenGVLab/InternVL2_5-2B"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=["--trust-remote-code"],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
|
||||
class TestMinicpmoServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "openbmb/MiniCPM-o-2_6"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--mem-fraction-static",
|
||||
"0.7",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
def test_audio_chat_completion(self):
|
||||
self._test_audio_speech_completion()
|
||||
self._test_audio_ambient_completion()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
200
test/srt/test_vision_openai_server_b.py
Normal file
200
test/srt/test_vision_openai_server_b.py
Normal file
@@ -0,0 +1,200 @@
|
||||
from test_vision_openai_server_common import *
|
||||
|
||||
from sglang.srt.utils import kill_process_tree
|
||||
from sglang.test.test_utils import (
|
||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
DEFAULT_URL_FOR_TEST,
|
||||
CustomTestCase,
|
||||
popen_launch_server,
|
||||
)
|
||||
|
||||
|
||||
class TestPixtralServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "mistral-community/pixtral-12b"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--mem-fraction-static",
|
||||
"0.73",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
def test_video_chat_completion(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestMistral3_1Server(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "unsloth/Mistral-Small-3.1-24B-Instruct-2503"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--mem-fraction-static",
|
||||
"0.8",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
def test_video_chat_completion(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestDeepseekVL2Server(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "deepseek-ai/deepseek-vl2-small"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--context-length",
|
||||
"4096",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
def test_video_chat_completion(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestDeepseekVL2TinyServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "deepseek-ai/deepseek-vl2-tiny"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--context-length",
|
||||
"4096",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
def test_video_chat_completion(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestJanusProServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "deepseek-ai/Janus-Pro-7B"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--mem-fraction-static",
|
||||
"0.4",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
def test_video_chat_completion(self):
|
||||
pass
|
||||
|
||||
def test_single_image_chat_completion(self):
|
||||
# Skip this test because it is flaky
|
||||
pass
|
||||
|
||||
|
||||
## Skip for ci test
|
||||
# class TestLlama4Server(TestOpenAIVisionServer):
|
||||
# @classmethod
|
||||
# def setUpClass(cls):
|
||||
# cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
|
||||
# cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
# cls.api_key = "sk-123456"
|
||||
# cls.process = popen_launch_server(
|
||||
# cls.model,
|
||||
# cls.base_url,
|
||||
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
# other_args=[
|
||||
# "--chat-template",
|
||||
# "llama-4",
|
||||
# "--mem-fraction-static",
|
||||
# "0.8",
|
||||
# "--tp-size=8",
|
||||
# "--context-length=8192",
|
||||
# ],
|
||||
# )
|
||||
# cls.base_url += "/v1"
|
||||
|
||||
# def test_video_chat_completion(self):
|
||||
# pass
|
||||
|
||||
|
||||
class TestGemma3itServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "google/gemma-3-4b-it"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--mem-fraction-static",
|
||||
"0.75",
|
||||
"--enable-multimodal",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
def test_video_chat_completion(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestKimiVLServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "moonshotai/Kimi-VL-A3B-Instruct"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--context-length",
|
||||
"4096",
|
||||
"--dtype",
|
||||
"bfloat16",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
def test_video_chat_completion(self):
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,9 +1,3 @@
|
||||
"""
|
||||
Usage:
|
||||
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_mixed_batch
|
||||
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_multi_images_chat_completion
|
||||
"""
|
||||
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
@@ -472,362 +466,3 @@ class TestOpenAIVisionServer(CustomTestCase):
|
||||
|
||||
def test_audio_chat_completion(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestQwen2VLServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "Qwen/Qwen2-VL-7B-Instruct"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
api_key=cls.api_key,
|
||||
other_args=[
|
||||
"--mem-fraction-static",
|
||||
"0.4",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
|
||||
class TestQwen2_5_VLServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "Qwen/Qwen2.5-VL-7B-Instruct"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
api_key=cls.api_key,
|
||||
other_args=[
|
||||
"--mem-fraction-static",
|
||||
"0.4",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
|
||||
class TestVLMContextLengthIssue(CustomTestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "Qwen/Qwen2-VL-7B-Instruct"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
api_key=cls.api_key,
|
||||
other_args=[
|
||||
"--context-length",
|
||||
"300",
|
||||
"--mem-fraction-static=0.80",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
kill_process_tree(cls.process.pid)
|
||||
|
||||
def test_single_image_chat_completion(self):
|
||||
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
|
||||
|
||||
with self.assertRaises(openai.BadRequestError) as cm:
|
||||
client.chat.completions.create(
|
||||
model="default",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": IMAGE_MAN_IRONING_URL},
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Give a lengthy description of this picture",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
temperature=0,
|
||||
)
|
||||
|
||||
# context length is checked first, then max_req_input_len, which is calculated from the former
|
||||
assert (
|
||||
"Multimodal prompt is too long after expanding multimodal tokens."
|
||||
in str(cm.exception)
|
||||
or "is longer than the model's context length" in str(cm.exception)
|
||||
)
|
||||
|
||||
|
||||
class TestMllamaServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "meta-llama/Llama-3.2-11B-Vision-Instruct"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
api_key=cls.api_key,
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
def test_video_chat_completion(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestMinicpmvServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "openbmb/MiniCPM-V-2_6"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--mem-fraction-static",
|
||||
"0.4",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
|
||||
class TestInternVL2_5Server(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "OpenGVLab/InternVL2_5-2B"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=["--trust-remote-code"],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
|
||||
class TestMinicpmoServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "openbmb/MiniCPM-o-2_6"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--mem-fraction-static",
|
||||
"0.7",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
def test_audio_chat_completion(self):
|
||||
self._test_audio_speech_completion()
|
||||
self._test_audio_ambient_completion()
|
||||
|
||||
|
||||
class TestPixtralServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "mistral-community/pixtral-12b"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--mem-fraction-static",
|
||||
"0.73",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
def test_video_chat_completion(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestMistral3_1Server(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "unsloth/Mistral-Small-3.1-24B-Instruct-2503"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--mem-fraction-static",
|
||||
"0.8",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
def test_video_chat_completion(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestDeepseekVL2Server(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "deepseek-ai/deepseek-vl2-small"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--context-length",
|
||||
"4096",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
def test_video_chat_completion(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestDeepseekVL2TinyServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "deepseek-ai/deepseek-vl2-tiny"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--context-length",
|
||||
"4096",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
def test_video_chat_completion(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestJanusProServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "deepseek-ai/Janus-Pro-7B"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--mem-fraction-static",
|
||||
"0.4",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
def test_video_chat_completion(self):
|
||||
pass
|
||||
|
||||
def test_single_image_chat_completion(self):
|
||||
# Skip this test because it is flaky
|
||||
pass
|
||||
|
||||
|
||||
## Skip for ci test
|
||||
# class TestLlama4Server(TestOpenAIVisionServer):
|
||||
# @classmethod
|
||||
# def setUpClass(cls):
|
||||
# cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
|
||||
# cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
# cls.api_key = "sk-123456"
|
||||
# cls.process = popen_launch_server(
|
||||
# cls.model,
|
||||
# cls.base_url,
|
||||
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
# other_args=[
|
||||
# "--chat-template",
|
||||
# "llama-4",
|
||||
# "--mem-fraction-static",
|
||||
# "0.8",
|
||||
# "--tp-size=8",
|
||||
# "--context-length=8192",
|
||||
# ],
|
||||
# )
|
||||
# cls.base_url += "/v1"
|
||||
|
||||
# def test_video_chat_completion(self):
|
||||
# pass
|
||||
|
||||
|
||||
class TestGemma3itServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "google/gemma-3-4b-it"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--mem-fraction-static",
|
||||
"0.75",
|
||||
"--enable-multimodal",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
def test_video_chat_completion(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestKimiVLServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "moonshotai/Kimi-VL-A3B-Instruct"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--context-length",
|
||||
"4096",
|
||||
"--dtype",
|
||||
"bfloat16",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
def test_video_chat_completion(self):
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user