ci: add moe test (#1053)
This commit is contained in:
42
.github/workflows/moe-test.yml
vendored
Normal file
42
.github/workflows/moe-test.yml
vendored
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
name: MoE Test
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ main ]
|
||||||
|
paths:
|
||||||
|
- "python/sglang/**"
|
||||||
|
- "test/**"
|
||||||
|
pull_request:
|
||||||
|
branches: [ main ]
|
||||||
|
paths:
|
||||||
|
- "python/sglang/**"
|
||||||
|
- "test/**"
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: moe-test-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
moe-test:
|
||||||
|
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||||
|
runs-on: accuracy
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
source $HOME/venv/bin/activate
|
||||||
|
echo "$HOME/venv/bin" >> $GITHUB_PATH
|
||||||
|
|
||||||
|
pip install --upgrade pip
|
||||||
|
pip install -e "python[all]"
|
||||||
|
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
|
||||||
|
|
||||||
|
- name: Benchmark MOE Serving Throughput
|
||||||
|
run: |
|
||||||
|
cd test/srt
|
||||||
|
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
|
||||||
|
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
|
||||||
@@ -21,7 +21,11 @@ from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
|||||||
from sglang.utils import get_exception_traceback
|
from sglang.utils import get_exception_traceback
|
||||||
|
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
||||||
DEFAULT_URL_FOR_TEST = "http://127.0.0.1:8157"
|
DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
||||||
|
DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:6157"
|
||||||
|
DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:7157"
|
||||||
|
DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:8157"
|
||||||
|
DEFAULT_URL_FOR_E2E_TEST = "http://127.0.0.1:9157"
|
||||||
|
|
||||||
|
|
||||||
def call_generate_lightllm(prompt, temperature, max_tokens, stop=None, url=None):
|
def call_generate_lightllm(prompt, temperature, max_tokens, stop=None, url=None):
|
||||||
|
|||||||
@@ -5,20 +5,19 @@ from sglang.srt.utils import kill_child_process
|
|||||||
from sglang.test.run_eval import run_eval
|
from sglang.test.run_eval import run_eval
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_UNIT_TEST,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestChunkedPrefill(unittest.TestCase):
|
class TestChunkedPrefill(unittest.TestCase):
|
||||||
|
|
||||||
def run_mmlu(self, disable_radix_cache):
|
def run_mmlu(self, disable_radix_cache):
|
||||||
other_args = ["--chunked-prefill-size", "32"]
|
other_args = ["--chunked-prefill-size", "32"]
|
||||||
if disable_radix_cache:
|
if disable_radix_cache:
|
||||||
other_args += ["--disable-radix-cache"]
|
other_args += ["--disable-radix-cache"]
|
||||||
|
|
||||||
model = DEFAULT_MODEL_NAME_FOR_TEST
|
model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
base_url = DEFAULT_URL_FOR_TEST
|
base_url = DEFAULT_URL_FOR_UNIT_TEST
|
||||||
process = popen_launch_server(
|
process = popen_launch_server(
|
||||||
model,
|
model,
|
||||||
base_url,
|
base_url,
|
||||||
|
|||||||
@@ -4,15 +4,14 @@ import openai
|
|||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.srt.utils import kill_child_process
|
from sglang.srt.utils import kill_child_process
|
||||||
from sglang.test.test_utils import DEFAULT_URL_FOR_TEST, popen_launch_server
|
from sglang.test.test_utils import DEFAULT_URL_FOR_UNIT_TEST, popen_launch_server
|
||||||
|
|
||||||
|
|
||||||
class TestOpenAIServer(unittest.TestCase):
|
class TestOpenAIServer(unittest.TestCase):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "intfloat/e5-mistral-7b-instruct"
|
cls.model = "intfloat/e5-mistral-7b-instruct"
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST
|
||||||
cls.api_key = "sk-123456"
|
cls.api_key = "sk-123456"
|
||||||
cls.process = popen_launch_server(
|
cls.process = popen_launch_server(
|
||||||
cls.model, cls.base_url, timeout=300, api_key=cls.api_key
|
cls.model, cls.base_url, timeout=300, api_key=cls.api_key
|
||||||
|
|||||||
@@ -5,17 +5,17 @@ from sglang.srt.utils import kill_child_process
|
|||||||
from sglang.test.run_eval import run_eval
|
from sglang.test.run_eval import run_eval
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_ACCURACY_TEST,
|
||||||
|
DEFAULT_URL_FOR_UNIT_TEST,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestEvalAccuracyLarge(unittest.TestCase):
|
class TestEvalAccuracyLarge(unittest.TestCase):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
cls.base_url = "http://127.0.0.1:7157"
|
cls.base_url = DEFAULT_URL_FOR_ACCURACY_TEST
|
||||||
cls.process = popen_launch_server(
|
cls.process = popen_launch_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.base_url,
|
cls.base_url,
|
||||||
@@ -49,7 +49,7 @@ class TestEvalAccuracyLarge(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
metrics = run_eval(args)
|
metrics = run_eval(args)
|
||||||
assert metrics["score"] >= 0.65, f"{metrics}"
|
assert metrics["score"] >= 0.64, f"{metrics}"
|
||||||
|
|
||||||
def test_mgsm_en(self):
|
def test_mgsm_en(self):
|
||||||
args = SimpleNamespace(
|
args = SimpleNamespace(
|
||||||
@@ -61,7 +61,7 @@ class TestEvalAccuracyLarge(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
metrics = run_eval(args)
|
metrics = run_eval(args)
|
||||||
assert metrics["score"] >= 0.85, f"{metrics}"
|
assert metrics["score"] >= 0.84, f"{metrics}"
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -5,17 +5,17 @@ from sglang.srt.utils import kill_child_process
|
|||||||
from sglang.test.run_eval import run_eval
|
from sglang.test.run_eval import run_eval
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_ACCURACY_TEST,
|
||||||
|
DEFAULT_URL_FOR_UNIT_TEST,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
|
class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
cls.base_url = "http://127.0.0.1:7157"
|
cls.base_url = DEFAULT_URL_FOR_ACCURACY_TEST
|
||||||
cls.process = popen_launch_server(
|
cls.process = popen_launch_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.base_url,
|
cls.base_url,
|
||||||
@@ -49,7 +49,7 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
metrics = run_eval(args)
|
metrics = run_eval(args)
|
||||||
assert metrics["score"] >= 0.65, f"{metrics}"
|
assert metrics["score"] >= 0.64, f"{metrics}"
|
||||||
|
|
||||||
def test_mgsm_en(self):
|
def test_mgsm_en(self):
|
||||||
args = SimpleNamespace(
|
args = SimpleNamespace(
|
||||||
@@ -61,7 +61,7 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
metrics = run_eval(args)
|
metrics = run_eval(args)
|
||||||
assert metrics["score"] >= 0.85, f"{metrics}"
|
assert metrics["score"] >= 0.84, f"{metrics}"
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -5,17 +5,16 @@ from sglang.srt.utils import kill_child_process
|
|||||||
from sglang.test.run_eval import run_eval
|
from sglang.test.run_eval import run_eval
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_UNIT_TEST,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestEvalAccuracyMini(unittest.TestCase):
|
class TestEvalAccuracyMini(unittest.TestCase):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST
|
||||||
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300)
|
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -10,17 +10,16 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
|
|||||||
from sglang.srt.utils import kill_child_process
|
from sglang.srt.utils import kill_child_process
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_UNIT_TEST,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestOpenAIServer(unittest.TestCase):
|
class TestOpenAIServer(unittest.TestCase):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST
|
||||||
cls.api_key = "sk-123456"
|
cls.api_key = "sk-123456"
|
||||||
cls.process = popen_launch_server(
|
cls.process = popen_launch_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
|
|||||||
112
test/srt/test_moe_serving_throughput.py
Normal file
112
test/srt/test_moe_serving_throughput.py
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
import os
|
||||||
|
import unittest
|
||||||
|
from types import SimpleNamespace
|
||||||
|
|
||||||
|
from sglang.bench_serving import run_benchmark
|
||||||
|
from sglang.srt.server_args import ServerArgs
|
||||||
|
from sglang.srt.utils import kill_child_process
|
||||||
|
from sglang.test.test_utils import (
|
||||||
|
DEFAULT_MOE_MODEL_NAME_FOR_TEST,
|
||||||
|
DEFAULT_URL_FOR_MOE_TEST,
|
||||||
|
popen_launch_server,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestServingThroughput(unittest.TestCase):
|
||||||
|
def run_test(self, disable_radix_cache, disable_flashinfer, chunked_prefill_size):
|
||||||
|
# Launch the server
|
||||||
|
other_args = []
|
||||||
|
if disable_radix_cache:
|
||||||
|
other_args.append("--disable-radix-cache")
|
||||||
|
if disable_flashinfer:
|
||||||
|
other_args.append("--disable-flashinfer")
|
||||||
|
other_args.extend(["--chunked-prefill-size", str(chunked_prefill_size)])
|
||||||
|
other_args.extend(["--tensor-parallel-size", "2"])
|
||||||
|
other_args.append("--enable-p2p-check")
|
||||||
|
|
||||||
|
model = DEFAULT_MOE_MODEL_NAME_FOR_TEST
|
||||||
|
base_url = DEFAULT_URL_FOR_MOE_TEST
|
||||||
|
process = popen_launch_server(
|
||||||
|
model, base_url, timeout=300, other_args=other_args
|
||||||
|
)
|
||||||
|
|
||||||
|
# Run benchmark
|
||||||
|
num_prompts = 400
|
||||||
|
args = SimpleNamespace(
|
||||||
|
backend="sglang",
|
||||||
|
base_url=base_url,
|
||||||
|
host=None,
|
||||||
|
port=None,
|
||||||
|
dataset_name="random",
|
||||||
|
dataset_path="",
|
||||||
|
model=None,
|
||||||
|
tokenizer=None,
|
||||||
|
num_prompts=num_prompts,
|
||||||
|
sharegpt_output_len=None,
|
||||||
|
random_input_len=4096,
|
||||||
|
random_output_len=2048,
|
||||||
|
random_range_ratio=0.0,
|
||||||
|
request_rate=float("inf"),
|
||||||
|
multi=None,
|
||||||
|
seed=0,
|
||||||
|
output_file=None,
|
||||||
|
disable_tqdm=False,
|
||||||
|
disable_stream=False,
|
||||||
|
disable_ignore_eos=False,
|
||||||
|
extra_request_body=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
res = run_benchmark(args)
|
||||||
|
finally:
|
||||||
|
kill_child_process(process.pid)
|
||||||
|
|
||||||
|
assert res["completed"] == num_prompts
|
||||||
|
return res
|
||||||
|
|
||||||
|
def test_default(self):
|
||||||
|
res = self.run_test(
|
||||||
|
disable_radix_cache=ServerArgs.disable_radix_cache,
|
||||||
|
disable_flashinfer=ServerArgs.disable_flashinfer,
|
||||||
|
chunked_prefill_size=ServerArgs.chunked_prefill_size,
|
||||||
|
)
|
||||||
|
|
||||||
|
if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
|
||||||
|
# A100 (PCIE) performance
|
||||||
|
assert res["output_throughput"] > 950
|
||||||
|
|
||||||
|
def test_default_without_radix_cache(self):
|
||||||
|
res = self.run_test(
|
||||||
|
disable_radix_cache=True,
|
||||||
|
disable_flashinfer=ServerArgs.disable_flashinfer,
|
||||||
|
chunked_prefill_size=ServerArgs.chunked_prefill_size,
|
||||||
|
)
|
||||||
|
|
||||||
|
if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
|
||||||
|
# A100 (PCIE) performance
|
||||||
|
assert res["output_throughput"] > 950
|
||||||
|
|
||||||
|
def test_default_with_chunked_prefill(self):
|
||||||
|
res = self.run_test(
|
||||||
|
disable_radix_cache=ServerArgs.disable_radix_cache,
|
||||||
|
disable_flashinfer=ServerArgs.disable_flashinfer,
|
||||||
|
chunked_prefill_size=8192,
|
||||||
|
)
|
||||||
|
|
||||||
|
if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
|
||||||
|
# A100 (PCIE) performance
|
||||||
|
print(res["output_throughput"])
|
||||||
|
|
||||||
|
def test_all_cases(self):
|
||||||
|
for disable_radix_cache in [False, True]:
|
||||||
|
for disable_flashinfer in [False, True]:
|
||||||
|
for chunked_prefill_size in [-1, 2048]:
|
||||||
|
self.run_test(
|
||||||
|
disable_radix_cache=False,
|
||||||
|
disable_flashinfer=False,
|
||||||
|
chunked_prefill_size=-1,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
@@ -8,17 +8,16 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
|
|||||||
from sglang.srt.utils import kill_child_process
|
from sglang.srt.utils import kill_child_process
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_UNIT_TEST,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestOpenAIServer(unittest.TestCase):
|
class TestOpenAIServer(unittest.TestCase):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST
|
||||||
cls.api_key = "sk-123456"
|
cls.api_key = "sk-123456"
|
||||||
cls.process = popen_launch_server(
|
cls.process = popen_launch_server(
|
||||||
cls.model, cls.base_url, timeout=300, api_key=cls.api_key
|
cls.model, cls.base_url, timeout=300, api_key=cls.api_key
|
||||||
|
|||||||
@@ -5,11 +5,14 @@ from types import SimpleNamespace
|
|||||||
from sglang.bench_serving import run_benchmark
|
from sglang.bench_serving import run_benchmark
|
||||||
from sglang.srt.server_args import ServerArgs
|
from sglang.srt.server_args import ServerArgs
|
||||||
from sglang.srt.utils import kill_child_process
|
from sglang.srt.utils import kill_child_process
|
||||||
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server
|
from sglang.test.test_utils import (
|
||||||
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
|
DEFAULT_URL_FOR_E2E_TEST,
|
||||||
|
popen_launch_server,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestServingThroughput(unittest.TestCase):
|
class TestServingThroughput(unittest.TestCase):
|
||||||
|
|
||||||
def run_test(self, disable_radix_cache, disable_flashinfer, chunked_prefill_size):
|
def run_test(self, disable_radix_cache, disable_flashinfer, chunked_prefill_size):
|
||||||
# Launch the server
|
# Launch the server
|
||||||
other_args = []
|
other_args = []
|
||||||
@@ -20,7 +23,7 @@ class TestServingThroughput(unittest.TestCase):
|
|||||||
other_args.extend(["--chunked-prefill-size", str(chunked_prefill_size)])
|
other_args.extend(["--chunked-prefill-size", str(chunked_prefill_size)])
|
||||||
|
|
||||||
model = DEFAULT_MODEL_NAME_FOR_TEST
|
model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
base_url = "http://127.0.0.1:9157"
|
base_url = DEFAULT_URL_FOR_E2E_TEST
|
||||||
process = popen_launch_server(
|
process = popen_launch_server(
|
||||||
model, base_url, timeout=300, other_args=other_args
|
model, base_url, timeout=300, other_args=other_args
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -6,17 +6,16 @@ import requests
|
|||||||
from sglang.srt.utils import kill_child_process
|
from sglang.srt.utils import kill_child_process
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_UNIT_TEST,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestSkipTokenizerInit(unittest.TestCase):
|
class TestSkipTokenizerInit(unittest.TestCase):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST
|
||||||
cls.process = popen_launch_server(
|
cls.process = popen_launch_server(
|
||||||
cls.model, cls.base_url, timeout=300, other_args=["--skip-tokenizer-init"]
|
cls.model, cls.base_url, timeout=300, other_args=["--skip-tokenizer-init"]
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -6,17 +6,16 @@ import requests
|
|||||||
from sglang.srt.utils import kill_child_process
|
from sglang.srt.utils import kill_child_process
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_UNIT_TEST,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestSRTEndpoint(unittest.TestCase):
|
class TestSRTEndpoint(unittest.TestCase):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST
|
||||||
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300)
|
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -5,17 +5,16 @@ from sglang.srt.utils import kill_child_process
|
|||||||
from sglang.test.run_eval import run_eval
|
from sglang.test.run_eval import run_eval
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_UNIT_TEST,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestTorchCompile(unittest.TestCase):
|
class TestTorchCompile(unittest.TestCase):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST
|
||||||
cls.process = popen_launch_server(
|
cls.process = popen_launch_server(
|
||||||
cls.model, cls.base_url, timeout=300, other_args=["--enable-torch-compile"]
|
cls.model, cls.base_url, timeout=300, other_args=["--enable-torch-compile"]
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,17 +5,16 @@ from sglang.srt.utils import kill_child_process
|
|||||||
from sglang.test.run_eval import run_eval
|
from sglang.test.run_eval import run_eval
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_UNIT_TEST,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestTritonAttnBackend(unittest.TestCase):
|
class TestTritonAttnBackend(unittest.TestCase):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST
|
||||||
cls.process = popen_launch_server(
|
cls.process = popen_launch_server(
|
||||||
cls.model, cls.base_url, timeout=300, other_args=["--disable-flashinfer"]
|
cls.model, cls.base_url, timeout=300, other_args=["--disable-flashinfer"]
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,15 +5,14 @@ import openai
|
|||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.srt.utils import kill_child_process
|
from sglang.srt.utils import kill_child_process
|
||||||
from sglang.test.test_utils import DEFAULT_URL_FOR_TEST, popen_launch_server
|
from sglang.test.test_utils import DEFAULT_URL_FOR_UNIT_TEST, popen_launch_server
|
||||||
|
|
||||||
|
|
||||||
class TestOpenAIVisionServer(unittest.TestCase):
|
class TestOpenAIVisionServer(unittest.TestCase):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "liuhaotian/llava-v1.6-vicuna-7b"
|
cls.model = "liuhaotian/llava-v1.6-vicuna-7b"
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST
|
||||||
cls.api_key = "sk-123456"
|
cls.api_key = "sk-123456"
|
||||||
cls.process = popen_launch_server(
|
cls.process = popen_launch_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
|
|||||||
Reference in New Issue
Block a user