improve the threshold and ports in tests (#1215)

This commit is contained in:
Mingyi
2024-08-25 19:02:08 -07:00
committed by GitHub
parent d3efcb3930
commit 158e8f1e2d
18 changed files with 122 additions and 86 deletions

View File

@@ -23,18 +23,14 @@ from sglang.utils import get_exception_traceback
DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct" DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1" DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH = 600
if os.getenv("SGLANG_IS_IN_CI", "false") == "true": if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
DEFAULT_PORT_FOR_SRT_TEST_RUNNER = 5157 DEFAULT_PORT_FOR_SRT_TEST_RUNNER = 5157
DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:6157" DEFAULT_URL_FOR_TEST = "http://127.0.0.1:6157"
DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:7157"
DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:8157"
DEFAULT_URL_FOR_E2E_TEST = "http://127.0.0.1:9157"
else: else:
DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:1157" DEFAULT_PORT_FOR_SRT_TEST_RUNNER = 1157
DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:1257" DEFAULT_URL_FOR_TEST = "http://127.0.0.1:2157"
DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:1357"
DEFAULT_URL_FOR_E2E_TEST = "http://127.0.0.1:1457"
def call_generate_lightllm(prompt, temperature, max_tokens, stop=None, url=None): def call_generate_lightllm(prompt, temperature, max_tokens, stop=None, url=None):

View File

@@ -7,7 +7,8 @@ import requests
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
@@ -17,11 +18,11 @@ class TestBatchPenalizerE2E(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.model,
cls.base_url, cls.base_url,
timeout=300, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=( other_args=(
"--random-seed", "--random-seed",
"0", "0",

View File

@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval from sglang.test.run_eval import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
@@ -20,11 +21,11 @@ class TestChunkedPrefill(unittest.TestCase):
other_args += ["--enable-mixed-chunk"] other_args += ["--enable-mixed-chunk"]
model = DEFAULT_MODEL_NAME_FOR_TEST model = DEFAULT_MODEL_NAME_FOR_TEST
base_url = DEFAULT_URL_FOR_UNIT_TEST base_url = DEFAULT_URL_FOR_TEST
process = popen_launch_server( process = popen_launch_server(
model, model,
base_url, base_url,
timeout=300, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=other_args, other_args=other_args,
) )

View File

@@ -4,17 +4,24 @@ import openai
from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import DEFAULT_URL_FOR_UNIT_TEST, popen_launch_server from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server,
)
class TestOpenAIServer(unittest.TestCase): class TestOpenAIServer(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "intfloat/e5-mistral-7b-instruct" cls.model = "intfloat/e5-mistral-7b-instruct"
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456" cls.api_key = "sk-123456"
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=300, api_key=cls.api_key cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
) )
cls.base_url += "/v1" cls.base_url += "/v1"
cls.tokenizer = get_tokenizer(cls.model) cls.tokenizer = get_tokenizer(cls.model)

View File

@@ -5,8 +5,8 @@ from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval from sglang.test.run_eval import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_ACCURACY_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
@@ -15,11 +15,11 @@ class TestEvalAccuracyLarge(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_ACCURACY_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.model,
cls.base_url, cls.base_url,
timeout=300, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=["--log-level-http", "warning"], other_args=["--log-level-http", "warning"],
) )

View File

@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval from sglang.test.run_eval import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_ACCURACY_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
@@ -14,11 +15,11 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_ACCURACY_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.model,
cls.base_url, cls.base_url,
timeout=300, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=["--log-level-http", "warning", "--chunked-prefill-size", "256"], other_args=["--log-level-http", "warning", "--chunked-prefill-size", "256"],
) )

View File

@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval from sglang.test.run_eval import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_ACCURACY_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
@@ -14,11 +15,11 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_ACCURACY_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.model,
cls.base_url, cls.base_url,
timeout=300, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[ other_args=[
"--log-level-http", "--log-level-http",
"warning", "warning",

View File

@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval from sglang.test.run_eval import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
@@ -14,8 +15,10 @@ class TestEvalAccuracyMini(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300) cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
)
@classmethod @classmethod
def tearDownClass(cls): def tearDownClass(cls):

View File

@@ -10,7 +10,8 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
@@ -19,12 +20,12 @@ class TestOpenAIServer(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456" cls.api_key = "sk-123456"
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.model,
cls.base_url, cls.base_url,
timeout=300, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key, api_key=cls.api_key,
other_args=("--max-total-token", "1024"), other_args=("--max-total-token", "1024"),
env={"SGLANG_CLIP_MAX_NEW_TOKENS": "256", **os.environ}, env={"SGLANG_CLIP_MAX_NEW_TOKENS": "256", **os.environ},

View File

@@ -7,7 +7,8 @@ from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MOE_MODEL_NAME_FOR_TEST, DEFAULT_MOE_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_MOE_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
@@ -25,9 +26,12 @@ class TestServingThroughput(unittest.TestCase):
other_args.append("--enable-p2p-check") other_args.append("--enable-p2p-check")
model = DEFAULT_MOE_MODEL_NAME_FOR_TEST model = DEFAULT_MOE_MODEL_NAME_FOR_TEST
base_url = DEFAULT_URL_FOR_MOE_TEST base_url = DEFAULT_URL_FOR_TEST
process = popen_launch_server( process = popen_launch_server(
model, base_url, timeout=300, other_args=other_args model,
base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=other_args,
) )
# Run benchmark # Run benchmark
@@ -72,8 +76,8 @@ class TestServingThroughput(unittest.TestCase):
) )
if os.getenv("SGLANG_IS_IN_CI", "false") == "true": if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
# A100 (PCIE) performance # A100 (PCIE): 950, H100 (SMX): 1800
assert res["output_throughput"] > 910 assert res["output_throughput"] > 1750
def test_default_without_radix_cache(self): def test_default_without_radix_cache(self):
res = self.run_test( res = self.run_test(
@@ -83,19 +87,8 @@ class TestServingThroughput(unittest.TestCase):
) )
if os.getenv("SGLANG_IS_IN_CI", "false") == "true": if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
# A100 (PCIE) performance # A100 (PCIE): 950, H100 (SMX): 1900
assert res["output_throughput"] > 910 assert res["output_throughput"] > 1850
def test_default_without_chunked_prefill(self):
res = self.run_test(
disable_radix_cache=ServerArgs.disable_radix_cache,
disable_flashinfer=ServerArgs.disable_flashinfer,
chunked_prefill_size=-1,
)
if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
# A100 (PCIE) performance
print(res["output_throughput"])
def test_all_cases(self): def test_all_cases(self):
for disable_radix_cache in [False, True]: for disable_radix_cache in [False, True]:

View File

@@ -8,7 +8,8 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
@@ -17,10 +18,13 @@ class TestOpenAIServer(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456" cls.api_key = "sk-123456"
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=300, api_key=cls.api_key cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
) )
cls.base_url += "/v1" cls.base_url += "/v1"
cls.tokenizer = get_tokenizer(DEFAULT_MODEL_NAME_FOR_TEST) cls.tokenizer = get_tokenizer(DEFAULT_MODEL_NAME_FOR_TEST)

View File

@@ -7,7 +7,8 @@ from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_E2E_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
@@ -23,9 +24,12 @@ class TestServingThroughput(unittest.TestCase):
other_args.extend(["--chunked-prefill-size", str(chunked_prefill_size)]) other_args.extend(["--chunked-prefill-size", str(chunked_prefill_size)])
model = DEFAULT_MODEL_NAME_FOR_TEST model = DEFAULT_MODEL_NAME_FOR_TEST
base_url = DEFAULT_URL_FOR_E2E_TEST base_url = DEFAULT_URL_FOR_TEST
process = popen_launch_server( process = popen_launch_server(
model, base_url, timeout=300, other_args=other_args model,
base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=other_args,
) )
# Run benchmark # Run benchmark
@@ -70,8 +74,8 @@ class TestServingThroughput(unittest.TestCase):
) )
if os.getenv("SGLANG_IS_IN_CI", "false") == "true": if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
# A100 (PCIE) performance # A100 (PCIE): 1450, H100 (SMX): 2550
assert res["output_throughput"] > 1400 assert res["output_throughput"] > 2500
def test_default_without_radix_cache(self): def test_default_without_radix_cache(self):
res = self.run_test( res = self.run_test(
@@ -81,8 +85,8 @@ class TestServingThroughput(unittest.TestCase):
) )
if os.getenv("SGLANG_IS_IN_CI", "false") == "true": if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
# A100 (PCIE) performance # A100 (PCIE): 1500, H100 (SMX): 2850
assert res["output_throughput"] > 1450 assert res["output_throughput"] > 2800
def test_default_without_chunked_prefill(self): def test_default_without_chunked_prefill(self):
res = self.run_test( res = self.run_test(
@@ -92,8 +96,8 @@ class TestServingThroughput(unittest.TestCase):
) )
if os.getenv("SGLANG_IS_IN_CI", "false") == "true": if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
# A100 (PCIE) performance # A100 (PCIE): 1450, H100 (SMX): 2550
assert res["output_throughput"] > 1400 assert res["output_throughput"] > 2500
def test_all_cases(self): def test_all_cases(self):
for disable_radix_cache in [False, True]: for disable_radix_cache in [False, True]:

View File

@@ -6,7 +6,8 @@ import requests
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
@@ -15,9 +16,12 @@ class TestSkipTokenizerInit(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=300, other_args=["--skip-tokenizer-init"] cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=["--skip-tokenizer-init"],
) )
@classmethod @classmethod

View File

@@ -6,7 +6,8 @@ import requests
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
@@ -15,8 +16,10 @@ class TestSRTEndpoint(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300) cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
)
@classmethod @classmethod
def tearDownClass(cls): def tearDownClass(cls):

View File

@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval from sglang.test.run_eval import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
@@ -14,9 +15,12 @@ class TestTorchCompile(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=300, other_args=["--enable-torch-compile"] cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=["--enable-torch-compile"],
) )
@classmethod @classmethod

View File

@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval from sglang.test.run_eval import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
@@ -14,9 +15,12 @@ class TestTritonAttnBackend(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=300, other_args=["--disable-flashinfer"] cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=["--disable-flashinfer"],
) )
@classmethod @classmethod

View File

@@ -6,7 +6,8 @@ import requests
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
@@ -15,8 +16,10 @@ class TestReplaceWeights(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300) cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
)
@classmethod @classmethod
def tearDownClass(cls): def tearDownClass(cls):

View File

@@ -11,19 +11,23 @@ from decord import VideoReader, cpu
from PIL import Image from PIL import Image
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import DEFAULT_URL_FOR_UNIT_TEST, popen_launch_server from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server,
)
class TestOpenAIVisionServer(unittest.TestCase): class TestOpenAIVisionServer(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "lmms-lab/llava-onevision-qwen2-0.5b-ov" cls.model = "lmms-lab/llava-onevision-qwen2-0.5b-ov"
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456" cls.api_key = "sk-123456"
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.model,
cls.base_url, cls.base_url,
timeout=300, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key, api_key=cls.api_key,
other_args=[ other_args=[
"--chat-template", "--chat-template",
@@ -67,7 +71,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
assert response.choices[0].message.role == "assistant" assert response.choices[0].message.role == "assistant"
text = response.choices[0].message.content text = response.choices[0].message.content
assert isinstance(text, str) assert isinstance(text, str)
assert "logo" in text, text assert "man" in text or "cab" in text, text
assert response.id assert response.id
assert response.created assert response.created
assert response.usage.prompt_tokens > 0 assert response.usage.prompt_tokens > 0
@@ -86,18 +90,19 @@ class TestOpenAIVisionServer(unittest.TestCase):
{ {
"type": "image_url", "type": "image_url",
"image_url": { "image_url": {
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" "url": "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png"
}, },
}, },
{ {
"type": "image_url", "type": "image_url",
"image_url": { "image_url": {
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png" "url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png"
}, },
}, },
{ {
"type": "text", "type": "text",
"text": "I have shown you two images. Please describe the two images to me.", "text": "I have two very different images. They are not related at all. "
"Please describe the first image in one sentence, and then describe the second image in another sentence.",
}, },
], ],
}, },
@@ -108,8 +113,9 @@ class TestOpenAIVisionServer(unittest.TestCase):
assert response.choices[0].message.role == "assistant" assert response.choices[0].message.role == "assistant"
text = response.choices[0].message.content text = response.choices[0].message.content
assert isinstance(text, str) assert isinstance(text, str)
print(text)
assert "man" in text or "cab" in text, text assert "man" in text or "cab" in text, text
assert "logo" in text, text # assert "logo" in text, text
assert response.id assert response.id
assert response.created assert response.created
assert response.usage.prompt_tokens > 0 assert response.usage.prompt_tokens > 0