Add 4-GPU runner tests and split existing tests (#6383)
This commit is contained in:
19
.github/workflows/pr-test.yml
vendored
19
.github/workflows/pr-test.yml
vendored
@@ -89,6 +89,25 @@ jobs:
|
|||||||
cd test/srt
|
cd test/srt
|
||||||
python3 run_suite.py --suite per-commit-2-gpu
|
python3 run_suite.py --suite per-commit-2-gpu
|
||||||
|
|
||||||
|
unittest-test-backend-4-gpu:
|
||||||
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||||
|
github.event.pull_request.draft == false
|
||||||
|
needs: [unit-test-frontend, unit-test-backend-2-gpu]
|
||||||
|
runs-on: 4-gpu-runner
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
bash scripts/ci_install_dependency.sh
|
||||||
|
|
||||||
|
- name: Run test
|
||||||
|
timeout-minutes: 20
|
||||||
|
run: |
|
||||||
|
cd test/srt
|
||||||
|
python3 run_suite.py --suite per-commit-4-gpu
|
||||||
|
|
||||||
unittest-test-backend-8-gpu:
|
unittest-test-backend-8-gpu:
|
||||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||||
github.event.pull_request.draft == false
|
github.event.pull_request.draft == false
|
||||||
|
|||||||
@@ -81,7 +81,8 @@ suites = {
|
|||||||
TestFile("test_vertex_endpoint.py", 31),
|
TestFile("test_vertex_endpoint.py", 31),
|
||||||
TestFile("test_vision_chunked_prefill.py", 175),
|
TestFile("test_vision_chunked_prefill.py", 175),
|
||||||
TestFile("test_vlm_accuracy.py", 60),
|
TestFile("test_vlm_accuracy.py", 60),
|
||||||
TestFile("test_vision_openai_server.py", 637),
|
TestFile("test_vision_openai_server_a.py", 700),
|
||||||
|
TestFile("test_vision_openai_server_b.py", 700),
|
||||||
TestFile("test_w8a8_quantization.py", 46),
|
TestFile("test_w8a8_quantization.py", 46),
|
||||||
TestFile("models/lora/test_lora_cuda_graph.py", 250),
|
TestFile("models/lora/test_lora_cuda_graph.py", 250),
|
||||||
],
|
],
|
||||||
@@ -104,17 +105,19 @@ suites = {
|
|||||||
"per-commit-2-gpu-amd": [
|
"per-commit-2-gpu-amd": [
|
||||||
TestFile("test_mla_tp.py", 170),
|
TestFile("test_mla_tp.py", 170),
|
||||||
],
|
],
|
||||||
|
"per-commit-4-gpu": [
|
||||||
|
TestFile("test_local_attn.py", 250),
|
||||||
|
TestFile("test_pp_single_node.py", 150),
|
||||||
|
],
|
||||||
"per-commit-8-gpu": [
|
"per-commit-8-gpu": [
|
||||||
# Disabled deepep tests temporarily because it takes too much time.
|
# Disabled deepep tests temporarily because it takes too much time.
|
||||||
# TODO: re-enable them after reducing the test time with compilation cache and smaller models.
|
# TODO: re-enable them after reducing the test time with compilation cache and smaller models.
|
||||||
# TestFile("test_deepep_intranode.py", 50),
|
# TestFile("test_deepep_intranode.py", 50),
|
||||||
# TestFile("test_deepep_low_latency.py", 50),
|
# TestFile("test_deepep_low_latency.py", 50),
|
||||||
# TestFile("test_moe_deepep_eval_accuracy_large.py", 250),
|
# TestFile("test_moe_deepep_eval_accuracy_large.py", 250),
|
||||||
TestFile("test_disaggregation.py", 210),
|
# TestFile("test_disaggregation.py", 210), # disabled since we have different_tp test
|
||||||
TestFile("test_local_attn.py", 250),
|
|
||||||
TestFile("test_disaggregation_different_tp.py", 210),
|
TestFile("test_disaggregation_different_tp.py", 210),
|
||||||
TestFile("test_full_deepseek_v3.py", 250),
|
TestFile("test_full_deepseek_v3.py", 250),
|
||||||
TestFile("test_pp_single_node.py", 150),
|
|
||||||
],
|
],
|
||||||
"per-commit-8-gpu-amd": [
|
"per-commit-8-gpu-amd": [
|
||||||
TestFile("test_full_deepseek_v3.py", 250),
|
TestFile("test_full_deepseek_v3.py", 250),
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ class TestPPAccuracy(unittest.TestCase):
|
|||||||
"--tp-size",
|
"--tp-size",
|
||||||
2,
|
2,
|
||||||
"--pp-size",
|
"--pp-size",
|
||||||
4,
|
2,
|
||||||
"--chunked-prefill-size",
|
"--chunked-prefill-size",
|
||||||
256,
|
256,
|
||||||
],
|
],
|
||||||
|
|||||||
187
test/srt/test_vision_openai_server_a.py
Normal file
187
test/srt/test_vision_openai_server_a.py
Normal file
@@ -0,0 +1,187 @@
|
|||||||
|
"""
|
||||||
|
Usage:
|
||||||
|
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_mixed_batch
|
||||||
|
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_multi_images_chat_completion
|
||||||
|
"""
|
||||||
|
|
||||||
|
from test_vision_openai_server_common import *
|
||||||
|
|
||||||
|
from sglang.srt.utils import kill_process_tree
|
||||||
|
from sglang.test.test_utils import (
|
||||||
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
|
popen_launch_server,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestQwen2VLServer(TestOpenAIVisionServer):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.model = "Qwen/Qwen2-VL-7B-Instruct"
|
||||||
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
cls.api_key = "sk-123456"
|
||||||
|
cls.process = popen_launch_server(
|
||||||
|
cls.model,
|
||||||
|
cls.base_url,
|
||||||
|
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
api_key=cls.api_key,
|
||||||
|
other_args=[
|
||||||
|
"--mem-fraction-static",
|
||||||
|
"0.4",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
cls.base_url += "/v1"
|
||||||
|
|
||||||
|
|
||||||
|
class TestQwen2_5_VLServer(TestOpenAIVisionServer):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.model = "Qwen/Qwen2.5-VL-7B-Instruct"
|
||||||
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
cls.api_key = "sk-123456"
|
||||||
|
cls.process = popen_launch_server(
|
||||||
|
cls.model,
|
||||||
|
cls.base_url,
|
||||||
|
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
api_key=cls.api_key,
|
||||||
|
other_args=[
|
||||||
|
"--mem-fraction-static",
|
||||||
|
"0.4",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
cls.base_url += "/v1"
|
||||||
|
|
||||||
|
|
||||||
|
class TestVLMContextLengthIssue(CustomTestCase):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.model = "Qwen/Qwen2-VL-7B-Instruct"
|
||||||
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
cls.api_key = "sk-123456"
|
||||||
|
cls.process = popen_launch_server(
|
||||||
|
cls.model,
|
||||||
|
cls.base_url,
|
||||||
|
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
api_key=cls.api_key,
|
||||||
|
other_args=[
|
||||||
|
"--context-length",
|
||||||
|
"300",
|
||||||
|
"--mem-fraction-static=0.80",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
cls.base_url += "/v1"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def tearDownClass(cls):
|
||||||
|
kill_process_tree(cls.process.pid)
|
||||||
|
|
||||||
|
def test_single_image_chat_completion(self):
|
||||||
|
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
|
||||||
|
|
||||||
|
with self.assertRaises(openai.BadRequestError) as cm:
|
||||||
|
client.chat.completions.create(
|
||||||
|
model="default",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": IMAGE_MAN_IRONING_URL},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Give a lengthy description of this picture",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
temperature=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
# context length is checked first, then max_req_input_len, which is calculated from the former
|
||||||
|
assert (
|
||||||
|
"Multimodal prompt is too long after expanding multimodal tokens."
|
||||||
|
in str(cm.exception)
|
||||||
|
or "is longer than the model's context length" in str(cm.exception)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestMllamaServer(TestOpenAIVisionServer):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.model = "meta-llama/Llama-3.2-11B-Vision-Instruct"
|
||||||
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
cls.api_key = "sk-123456"
|
||||||
|
cls.process = popen_launch_server(
|
||||||
|
cls.model,
|
||||||
|
cls.base_url,
|
||||||
|
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
api_key=cls.api_key,
|
||||||
|
)
|
||||||
|
cls.base_url += "/v1"
|
||||||
|
|
||||||
|
def test_video_chat_completion(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TestMinicpmvServer(TestOpenAIVisionServer):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.model = "openbmb/MiniCPM-V-2_6"
|
||||||
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
cls.api_key = "sk-123456"
|
||||||
|
cls.process = popen_launch_server(
|
||||||
|
cls.model,
|
||||||
|
cls.base_url,
|
||||||
|
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
other_args=[
|
||||||
|
"--trust-remote-code",
|
||||||
|
"--mem-fraction-static",
|
||||||
|
"0.4",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
cls.base_url += "/v1"
|
||||||
|
|
||||||
|
|
||||||
|
class TestInternVL2_5Server(TestOpenAIVisionServer):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.model = "OpenGVLab/InternVL2_5-2B"
|
||||||
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
cls.api_key = "sk-123456"
|
||||||
|
cls.process = popen_launch_server(
|
||||||
|
cls.model,
|
||||||
|
cls.base_url,
|
||||||
|
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
other_args=["--trust-remote-code"],
|
||||||
|
)
|
||||||
|
cls.base_url += "/v1"
|
||||||
|
|
||||||
|
|
||||||
|
class TestMinicpmoServer(TestOpenAIVisionServer):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.model = "openbmb/MiniCPM-o-2_6"
|
||||||
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
cls.api_key = "sk-123456"
|
||||||
|
cls.process = popen_launch_server(
|
||||||
|
cls.model,
|
||||||
|
cls.base_url,
|
||||||
|
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
other_args=[
|
||||||
|
"--trust-remote-code",
|
||||||
|
"--mem-fraction-static",
|
||||||
|
"0.7",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
cls.base_url += "/v1"
|
||||||
|
|
||||||
|
def test_audio_chat_completion(self):
|
||||||
|
self._test_audio_speech_completion()
|
||||||
|
self._test_audio_ambient_completion()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
200
test/srt/test_vision_openai_server_b.py
Normal file
200
test/srt/test_vision_openai_server_b.py
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
from test_vision_openai_server_common import *
|
||||||
|
|
||||||
|
from sglang.srt.utils import kill_process_tree
|
||||||
|
from sglang.test.test_utils import (
|
||||||
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
|
popen_launch_server,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestPixtralServer(TestOpenAIVisionServer):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.model = "mistral-community/pixtral-12b"
|
||||||
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
cls.api_key = "sk-123456"
|
||||||
|
cls.process = popen_launch_server(
|
||||||
|
cls.model,
|
||||||
|
cls.base_url,
|
||||||
|
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
other_args=[
|
||||||
|
"--trust-remote-code",
|
||||||
|
"--mem-fraction-static",
|
||||||
|
"0.73",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
cls.base_url += "/v1"
|
||||||
|
|
||||||
|
def test_video_chat_completion(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TestMistral3_1Server(TestOpenAIVisionServer):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.model = "unsloth/Mistral-Small-3.1-24B-Instruct-2503"
|
||||||
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
cls.api_key = "sk-123456"
|
||||||
|
cls.process = popen_launch_server(
|
||||||
|
cls.model,
|
||||||
|
cls.base_url,
|
||||||
|
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
other_args=[
|
||||||
|
"--trust-remote-code",
|
||||||
|
"--mem-fraction-static",
|
||||||
|
"0.8",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
cls.base_url += "/v1"
|
||||||
|
|
||||||
|
def test_video_chat_completion(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TestDeepseekVL2Server(TestOpenAIVisionServer):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.model = "deepseek-ai/deepseek-vl2-small"
|
||||||
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
cls.api_key = "sk-123456"
|
||||||
|
cls.process = popen_launch_server(
|
||||||
|
cls.model,
|
||||||
|
cls.base_url,
|
||||||
|
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
other_args=[
|
||||||
|
"--trust-remote-code",
|
||||||
|
"--context-length",
|
||||||
|
"4096",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
cls.base_url += "/v1"
|
||||||
|
|
||||||
|
def test_video_chat_completion(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TestDeepseekVL2TinyServer(TestOpenAIVisionServer):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.model = "deepseek-ai/deepseek-vl2-tiny"
|
||||||
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
cls.api_key = "sk-123456"
|
||||||
|
cls.process = popen_launch_server(
|
||||||
|
cls.model,
|
||||||
|
cls.base_url,
|
||||||
|
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
other_args=[
|
||||||
|
"--trust-remote-code",
|
||||||
|
"--context-length",
|
||||||
|
"4096",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
cls.base_url += "/v1"
|
||||||
|
|
||||||
|
def test_video_chat_completion(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TestJanusProServer(TestOpenAIVisionServer):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.model = "deepseek-ai/Janus-Pro-7B"
|
||||||
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
cls.api_key = "sk-123456"
|
||||||
|
cls.process = popen_launch_server(
|
||||||
|
cls.model,
|
||||||
|
cls.base_url,
|
||||||
|
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
other_args=[
|
||||||
|
"--trust-remote-code",
|
||||||
|
"--mem-fraction-static",
|
||||||
|
"0.4",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
cls.base_url += "/v1"
|
||||||
|
|
||||||
|
def test_video_chat_completion(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def test_single_image_chat_completion(self):
|
||||||
|
# Skip this test because it is flaky
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
## Skip for ci test
|
||||||
|
# class TestLlama4Server(TestOpenAIVisionServer):
|
||||||
|
# @classmethod
|
||||||
|
# def setUpClass(cls):
|
||||||
|
# cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
|
||||||
|
# cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
# cls.api_key = "sk-123456"
|
||||||
|
# cls.process = popen_launch_server(
|
||||||
|
# cls.model,
|
||||||
|
# cls.base_url,
|
||||||
|
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
# other_args=[
|
||||||
|
# "--chat-template",
|
||||||
|
# "llama-4",
|
||||||
|
# "--mem-fraction-static",
|
||||||
|
# "0.8",
|
||||||
|
# "--tp-size=8",
|
||||||
|
# "--context-length=8192",
|
||||||
|
# ],
|
||||||
|
# )
|
||||||
|
# cls.base_url += "/v1"
|
||||||
|
|
||||||
|
# def test_video_chat_completion(self):
|
||||||
|
# pass
|
||||||
|
|
||||||
|
|
||||||
|
class TestGemma3itServer(TestOpenAIVisionServer):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.model = "google/gemma-3-4b-it"
|
||||||
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
cls.api_key = "sk-123456"
|
||||||
|
cls.process = popen_launch_server(
|
||||||
|
cls.model,
|
||||||
|
cls.base_url,
|
||||||
|
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
other_args=[
|
||||||
|
"--trust-remote-code",
|
||||||
|
"--mem-fraction-static",
|
||||||
|
"0.75",
|
||||||
|
"--enable-multimodal",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
cls.base_url += "/v1"
|
||||||
|
|
||||||
|
def test_video_chat_completion(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TestKimiVLServer(TestOpenAIVisionServer):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.model = "moonshotai/Kimi-VL-A3B-Instruct"
|
||||||
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
cls.api_key = "sk-123456"
|
||||||
|
cls.process = popen_launch_server(
|
||||||
|
cls.model,
|
||||||
|
cls.base_url,
|
||||||
|
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
other_args=[
|
||||||
|
"--trust-remote-code",
|
||||||
|
"--context-length",
|
||||||
|
"4096",
|
||||||
|
"--dtype",
|
||||||
|
"bfloat16",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
cls.base_url += "/v1"
|
||||||
|
|
||||||
|
def test_video_chat_completion(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
@@ -1,9 +1,3 @@
|
|||||||
"""
|
|
||||||
Usage:
|
|
||||||
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_mixed_batch
|
|
||||||
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_multi_images_chat_completion
|
|
||||||
"""
|
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
@@ -472,362 +466,3 @@ class TestOpenAIVisionServer(CustomTestCase):
|
|||||||
|
|
||||||
def test_audio_chat_completion(self):
|
def test_audio_chat_completion(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class TestQwen2VLServer(TestOpenAIVisionServer):
|
|
||||||
@classmethod
|
|
||||||
def setUpClass(cls):
|
|
||||||
cls.model = "Qwen/Qwen2-VL-7B-Instruct"
|
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
|
||||||
cls.api_key = "sk-123456"
|
|
||||||
cls.process = popen_launch_server(
|
|
||||||
cls.model,
|
|
||||||
cls.base_url,
|
|
||||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
|
||||||
api_key=cls.api_key,
|
|
||||||
other_args=[
|
|
||||||
"--mem-fraction-static",
|
|
||||||
"0.4",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
cls.base_url += "/v1"
|
|
||||||
|
|
||||||
|
|
||||||
class TestQwen2_5_VLServer(TestOpenAIVisionServer):
|
|
||||||
@classmethod
|
|
||||||
def setUpClass(cls):
|
|
||||||
cls.model = "Qwen/Qwen2.5-VL-7B-Instruct"
|
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
|
||||||
cls.api_key = "sk-123456"
|
|
||||||
cls.process = popen_launch_server(
|
|
||||||
cls.model,
|
|
||||||
cls.base_url,
|
|
||||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
|
||||||
api_key=cls.api_key,
|
|
||||||
other_args=[
|
|
||||||
"--mem-fraction-static",
|
|
||||||
"0.4",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
cls.base_url += "/v1"
|
|
||||||
|
|
||||||
|
|
||||||
class TestVLMContextLengthIssue(CustomTestCase):
|
|
||||||
@classmethod
|
|
||||||
def setUpClass(cls):
|
|
||||||
cls.model = "Qwen/Qwen2-VL-7B-Instruct"
|
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
|
||||||
cls.api_key = "sk-123456"
|
|
||||||
cls.process = popen_launch_server(
|
|
||||||
cls.model,
|
|
||||||
cls.base_url,
|
|
||||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
|
||||||
api_key=cls.api_key,
|
|
||||||
other_args=[
|
|
||||||
"--context-length",
|
|
||||||
"300",
|
|
||||||
"--mem-fraction-static=0.80",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
cls.base_url += "/v1"
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def tearDownClass(cls):
|
|
||||||
kill_process_tree(cls.process.pid)
|
|
||||||
|
|
||||||
def test_single_image_chat_completion(self):
|
|
||||||
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
|
|
||||||
|
|
||||||
with self.assertRaises(openai.BadRequestError) as cm:
|
|
||||||
client.chat.completions.create(
|
|
||||||
model="default",
|
|
||||||
messages=[
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": [
|
|
||||||
{
|
|
||||||
"type": "image_url",
|
|
||||||
"image_url": {"url": IMAGE_MAN_IRONING_URL},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "text",
|
|
||||||
"text": "Give a lengthy description of this picture",
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
],
|
|
||||||
temperature=0,
|
|
||||||
)
|
|
||||||
|
|
||||||
# context length is checked first, then max_req_input_len, which is calculated from the former
|
|
||||||
assert (
|
|
||||||
"Multimodal prompt is too long after expanding multimodal tokens."
|
|
||||||
in str(cm.exception)
|
|
||||||
or "is longer than the model's context length" in str(cm.exception)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TestMllamaServer(TestOpenAIVisionServer):
|
|
||||||
@classmethod
|
|
||||||
def setUpClass(cls):
|
|
||||||
cls.model = "meta-llama/Llama-3.2-11B-Vision-Instruct"
|
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
|
||||||
cls.api_key = "sk-123456"
|
|
||||||
cls.process = popen_launch_server(
|
|
||||||
cls.model,
|
|
||||||
cls.base_url,
|
|
||||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
|
||||||
api_key=cls.api_key,
|
|
||||||
)
|
|
||||||
cls.base_url += "/v1"
|
|
||||||
|
|
||||||
def test_video_chat_completion(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class TestMinicpmvServer(TestOpenAIVisionServer):
|
|
||||||
@classmethod
|
|
||||||
def setUpClass(cls):
|
|
||||||
cls.model = "openbmb/MiniCPM-V-2_6"
|
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
|
||||||
cls.api_key = "sk-123456"
|
|
||||||
cls.process = popen_launch_server(
|
|
||||||
cls.model,
|
|
||||||
cls.base_url,
|
|
||||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
|
||||||
other_args=[
|
|
||||||
"--trust-remote-code",
|
|
||||||
"--mem-fraction-static",
|
|
||||||
"0.4",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
cls.base_url += "/v1"
|
|
||||||
|
|
||||||
|
|
||||||
class TestInternVL2_5Server(TestOpenAIVisionServer):
|
|
||||||
@classmethod
|
|
||||||
def setUpClass(cls):
|
|
||||||
cls.model = "OpenGVLab/InternVL2_5-2B"
|
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
|
||||||
cls.api_key = "sk-123456"
|
|
||||||
cls.process = popen_launch_server(
|
|
||||||
cls.model,
|
|
||||||
cls.base_url,
|
|
||||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
|
||||||
other_args=["--trust-remote-code"],
|
|
||||||
)
|
|
||||||
cls.base_url += "/v1"
|
|
||||||
|
|
||||||
|
|
||||||
class TestMinicpmoServer(TestOpenAIVisionServer):
|
|
||||||
@classmethod
|
|
||||||
def setUpClass(cls):
|
|
||||||
cls.model = "openbmb/MiniCPM-o-2_6"
|
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
|
||||||
cls.api_key = "sk-123456"
|
|
||||||
cls.process = popen_launch_server(
|
|
||||||
cls.model,
|
|
||||||
cls.base_url,
|
|
||||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
|
||||||
other_args=[
|
|
||||||
"--trust-remote-code",
|
|
||||||
"--mem-fraction-static",
|
|
||||||
"0.7",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
cls.base_url += "/v1"
|
|
||||||
|
|
||||||
def test_audio_chat_completion(self):
|
|
||||||
self._test_audio_speech_completion()
|
|
||||||
self._test_audio_ambient_completion()
|
|
||||||
|
|
||||||
|
|
||||||
class TestPixtralServer(TestOpenAIVisionServer):
|
|
||||||
@classmethod
|
|
||||||
def setUpClass(cls):
|
|
||||||
cls.model = "mistral-community/pixtral-12b"
|
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
|
||||||
cls.api_key = "sk-123456"
|
|
||||||
cls.process = popen_launch_server(
|
|
||||||
cls.model,
|
|
||||||
cls.base_url,
|
|
||||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
|
||||||
other_args=[
|
|
||||||
"--trust-remote-code",
|
|
||||||
"--mem-fraction-static",
|
|
||||||
"0.73",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
cls.base_url += "/v1"
|
|
||||||
|
|
||||||
def test_video_chat_completion(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class TestMistral3_1Server(TestOpenAIVisionServer):
|
|
||||||
@classmethod
|
|
||||||
def setUpClass(cls):
|
|
||||||
cls.model = "unsloth/Mistral-Small-3.1-24B-Instruct-2503"
|
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
|
||||||
cls.api_key = "sk-123456"
|
|
||||||
cls.process = popen_launch_server(
|
|
||||||
cls.model,
|
|
||||||
cls.base_url,
|
|
||||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
|
||||||
other_args=[
|
|
||||||
"--trust-remote-code",
|
|
||||||
"--mem-fraction-static",
|
|
||||||
"0.8",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
cls.base_url += "/v1"
|
|
||||||
|
|
||||||
def test_video_chat_completion(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class TestDeepseekVL2Server(TestOpenAIVisionServer):
|
|
||||||
@classmethod
|
|
||||||
def setUpClass(cls):
|
|
||||||
cls.model = "deepseek-ai/deepseek-vl2-small"
|
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
|
||||||
cls.api_key = "sk-123456"
|
|
||||||
cls.process = popen_launch_server(
|
|
||||||
cls.model,
|
|
||||||
cls.base_url,
|
|
||||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
|
||||||
other_args=[
|
|
||||||
"--trust-remote-code",
|
|
||||||
"--context-length",
|
|
||||||
"4096",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
cls.base_url += "/v1"
|
|
||||||
|
|
||||||
def test_video_chat_completion(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class TestDeepseekVL2TinyServer(TestOpenAIVisionServer):
|
|
||||||
@classmethod
|
|
||||||
def setUpClass(cls):
|
|
||||||
cls.model = "deepseek-ai/deepseek-vl2-tiny"
|
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
|
||||||
cls.api_key = "sk-123456"
|
|
||||||
cls.process = popen_launch_server(
|
|
||||||
cls.model,
|
|
||||||
cls.base_url,
|
|
||||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
|
||||||
other_args=[
|
|
||||||
"--trust-remote-code",
|
|
||||||
"--context-length",
|
|
||||||
"4096",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
cls.base_url += "/v1"
|
|
||||||
|
|
||||||
def test_video_chat_completion(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class TestJanusProServer(TestOpenAIVisionServer):
|
|
||||||
@classmethod
|
|
||||||
def setUpClass(cls):
|
|
||||||
cls.model = "deepseek-ai/Janus-Pro-7B"
|
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
|
||||||
cls.api_key = "sk-123456"
|
|
||||||
cls.process = popen_launch_server(
|
|
||||||
cls.model,
|
|
||||||
cls.base_url,
|
|
||||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
|
||||||
other_args=[
|
|
||||||
"--trust-remote-code",
|
|
||||||
"--mem-fraction-static",
|
|
||||||
"0.4",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
cls.base_url += "/v1"
|
|
||||||
|
|
||||||
def test_video_chat_completion(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def test_single_image_chat_completion(self):
|
|
||||||
# Skip this test because it is flaky
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
## Skip for ci test
|
|
||||||
# class TestLlama4Server(TestOpenAIVisionServer):
|
|
||||||
# @classmethod
|
|
||||||
# def setUpClass(cls):
|
|
||||||
# cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
|
|
||||||
# cls.base_url = DEFAULT_URL_FOR_TEST
|
|
||||||
# cls.api_key = "sk-123456"
|
|
||||||
# cls.process = popen_launch_server(
|
|
||||||
# cls.model,
|
|
||||||
# cls.base_url,
|
|
||||||
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
|
||||||
# other_args=[
|
|
||||||
# "--chat-template",
|
|
||||||
# "llama-4",
|
|
||||||
# "--mem-fraction-static",
|
|
||||||
# "0.8",
|
|
||||||
# "--tp-size=8",
|
|
||||||
# "--context-length=8192",
|
|
||||||
# ],
|
|
||||||
# )
|
|
||||||
# cls.base_url += "/v1"
|
|
||||||
|
|
||||||
# def test_video_chat_completion(self):
|
|
||||||
# pass
|
|
||||||
|
|
||||||
|
|
||||||
class TestGemma3itServer(TestOpenAIVisionServer):
|
|
||||||
@classmethod
|
|
||||||
def setUpClass(cls):
|
|
||||||
cls.model = "google/gemma-3-4b-it"
|
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
|
||||||
cls.api_key = "sk-123456"
|
|
||||||
cls.process = popen_launch_server(
|
|
||||||
cls.model,
|
|
||||||
cls.base_url,
|
|
||||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
|
||||||
other_args=[
|
|
||||||
"--trust-remote-code",
|
|
||||||
"--mem-fraction-static",
|
|
||||||
"0.75",
|
|
||||||
"--enable-multimodal",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
cls.base_url += "/v1"
|
|
||||||
|
|
||||||
def test_video_chat_completion(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class TestKimiVLServer(TestOpenAIVisionServer):
|
|
||||||
@classmethod
|
|
||||||
def setUpClass(cls):
|
|
||||||
cls.model = "moonshotai/Kimi-VL-A3B-Instruct"
|
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
|
||||||
cls.api_key = "sk-123456"
|
|
||||||
cls.process = popen_launch_server(
|
|
||||||
cls.model,
|
|
||||||
cls.base_url,
|
|
||||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
|
||||||
other_args=[
|
|
||||||
"--trust-remote-code",
|
|
||||||
"--context-length",
|
|
||||||
"4096",
|
|
||||||
"--dtype",
|
|
||||||
"bfloat16",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
cls.base_url += "/v1"
|
|
||||||
|
|
||||||
def test_video_chat_completion(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
unittest.main()
|
|
||||||
Reference in New Issue
Block a user