Improve end-to-end throughput test and its coverage (#1039)
This commit is contained in:
27
.github/workflows/e2e-test.yml
vendored
27
.github/workflows/e2e-test.yml
vendored
@@ -37,23 +37,16 @@ jobs:
|
|||||||
|
|
||||||
- name: Benchmark Serving Throughput
|
- name: Benchmark Serving Throughput
|
||||||
run: |
|
run: |
|
||||||
python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache &
|
cd test/srt
|
||||||
SERVER_PID=$!
|
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
|
||||||
|
|
||||||
echo "Waiting for server to start..."
|
- name: Benchmark Serving Throughput (w/o RadixAttention)
|
||||||
for i in {1..120}; do
|
run: |
|
||||||
if curl -s http://127.0.0.1:8413/health; then
|
cd test/srt
|
||||||
echo "Server is up!"
|
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
|
||||||
break
|
|
||||||
fi
|
|
||||||
if [ $i -eq 120 ]; then
|
|
||||||
echo "Server failed to start within 120 seconds"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
|
|
||||||
cd $HOME && python3 -m sglang.bench_serving --backend sglang --port 8413 --dataset-name random --num-prompts 500 --random-input 4096 --random-output 2048
|
- name: Benchmark Serving Throughput (w/o FlashInfer)
|
||||||
|
run: |
|
||||||
|
cd test/srt
|
||||||
|
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_flashinfer
|
||||||
|
|
||||||
echo "Stopping server..."
|
|
||||||
kill -9 $SERVER_PID
|
|
||||||
|
|||||||
@@ -39,6 +39,8 @@ from transformers import (
|
|||||||
|
|
||||||
AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60)
|
AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60)
|
||||||
|
|
||||||
|
global args
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class RequestFuncInput:
|
class RequestFuncInput:
|
||||||
@@ -749,7 +751,11 @@ def check_chat_template(model_path):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def fire(args: argparse.Namespace):
|
def run_benchmark(args_: argparse.Namespace):
|
||||||
|
global args
|
||||||
|
args = args_
|
||||||
|
|
||||||
|
set_ulimit()
|
||||||
random.seed(args.seed)
|
random.seed(args.seed)
|
||||||
np.random.seed(args.seed)
|
np.random.seed(args.seed)
|
||||||
|
|
||||||
@@ -853,7 +859,7 @@ def fire(args: argparse.Namespace):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
asyncio.run(
|
return asyncio.run(
|
||||||
benchmark(
|
benchmark(
|
||||||
backend=backend,
|
backend=backend,
|
||||||
api_url=api_url,
|
api_url=api_url,
|
||||||
@@ -962,11 +968,6 @@ if __name__ == "__main__":
|
|||||||
"Otherwise, we use Poisson process to synthesize the request arrival times. Default is 128.0.",
|
"Otherwise, we use Poisson process to synthesize the request arrival times. Default is 128.0.",
|
||||||
)
|
)
|
||||||
parser.add_argument("--seed", type=int, default=0, help="Default is 0.")
|
parser.add_argument("--seed", type=int, default=0, help="Default is 0.")
|
||||||
parser.add_argument(
|
|
||||||
"--disable-tqdm",
|
|
||||||
action="store_true",
|
|
||||||
help="Specify to disable tqdm progress bar.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--multi",
|
"--multi",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
@@ -979,6 +980,11 @@ if __name__ == "__main__":
|
|||||||
help="Range of request rates in the format start,stop,step. Default is 2,34,2. It also supports a list of request rates, requiring the parameters to not equal three.",
|
help="Range of request rates in the format start,stop,step. Default is 2,34,2. It also supports a list of request rates, requiring the parameters to not equal three.",
|
||||||
)
|
)
|
||||||
parser.add_argument("--output-file", type=str, help="Output JSONL file name.")
|
parser.add_argument("--output-file", type=str, help="Output JSONL file name.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--disable-tqdm",
|
||||||
|
action="store_true",
|
||||||
|
help="Specify to disable tqdm progress bar.",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--disable-stream",
|
"--disable-stream",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
@@ -996,8 +1002,5 @@ if __name__ == "__main__":
|
|||||||
help="Append given JSON object to the request payload. You can use this to specify"
|
help="Append given JSON object to the request payload. You can use this to specify"
|
||||||
"additional generate params like sampling params.",
|
"additional generate params like sampling params.",
|
||||||
)
|
)
|
||||||
|
|
||||||
set_ulimit()
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
fire(args)
|
run_benchmark(args)
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
|||||||
from sglang.utils import get_exception_traceback
|
from sglang.utils import get_exception_traceback
|
||||||
|
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
||||||
|
DEFAULT_URL_FOR_TEST = "http://127.0.0.1:8157"
|
||||||
|
|
||||||
|
|
||||||
def call_generate_lightllm(prompt, temperature, max_tokens, stop=None, url=None):
|
def call_generate_lightllm(prompt, temperature, max_tokens, stop=None, url=None):
|
||||||
|
|||||||
@@ -3,7 +3,11 @@ from types import SimpleNamespace
|
|||||||
|
|
||||||
from sglang.srt.utils import kill_child_process
|
from sglang.srt.utils import kill_child_process
|
||||||
from sglang.test.run_eval import run_eval
|
from sglang.test.run_eval import run_eval
|
||||||
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server
|
from sglang.test.test_utils import (
|
||||||
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
popen_launch_server,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestAccuracy(unittest.TestCase):
|
class TestAccuracy(unittest.TestCase):
|
||||||
@@ -11,7 +15,7 @@ class TestAccuracy(unittest.TestCase):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
cls.base_url = "http://127.0.0.1:8157"
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
cls.process = popen_launch_server(
|
cls.process = popen_launch_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.base_url,
|
cls.base_url,
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import openai
|
|||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.srt.utils import kill_child_process
|
from sglang.srt.utils import kill_child_process
|
||||||
from sglang.test.test_utils import popen_launch_server
|
from sglang.test.test_utils import DEFAULT_URL_FOR_TEST, popen_launch_server
|
||||||
|
|
||||||
|
|
||||||
class TestOpenAIServer(unittest.TestCase):
|
class TestOpenAIServer(unittest.TestCase):
|
||||||
@@ -12,7 +12,7 @@ class TestOpenAIServer(unittest.TestCase):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "intfloat/e5-mistral-7b-instruct"
|
cls.model = "intfloat/e5-mistral-7b-instruct"
|
||||||
cls.base_url = "http://127.0.0.1:8157"
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
cls.api_key = "sk-123456"
|
cls.api_key = "sk-123456"
|
||||||
cls.process = popen_launch_server(
|
cls.process = popen_launch_server(
|
||||||
cls.model, cls.base_url, timeout=300, api_key=cls.api_key
|
cls.model, cls.base_url, timeout=300, api_key=cls.api_key
|
||||||
|
|||||||
@@ -3,7 +3,11 @@ from types import SimpleNamespace
|
|||||||
|
|
||||||
from sglang.srt.utils import kill_child_process
|
from sglang.srt.utils import kill_child_process
|
||||||
from sglang.test.run_eval import run_eval
|
from sglang.test.run_eval import run_eval
|
||||||
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server
|
from sglang.test.test_utils import (
|
||||||
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
popen_launch_server,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestAccuracy(unittest.TestCase):
|
class TestAccuracy(unittest.TestCase):
|
||||||
@@ -11,7 +15,7 @@ class TestAccuracy(unittest.TestCase):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
cls.base_url = "http://127.0.0.1:8157"
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300)
|
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -8,7 +8,11 @@ import openai
|
|||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.srt.utils import kill_child_process
|
from sglang.srt.utils import kill_child_process
|
||||||
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server
|
from sglang.test.test_utils import (
|
||||||
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
popen_launch_server,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestOpenAIServer(unittest.TestCase):
|
class TestOpenAIServer(unittest.TestCase):
|
||||||
@@ -16,7 +20,7 @@ class TestOpenAIServer(unittest.TestCase):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
cls.base_url = "http://127.0.0.1:8157"
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
cls.api_key = "sk-123456"
|
cls.api_key = "sk-123456"
|
||||||
cls.process = popen_launch_server(
|
cls.process = popen_launch_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
|
|||||||
@@ -6,7 +6,11 @@ import openai
|
|||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.srt.utils import kill_child_process
|
from sglang.srt.utils import kill_child_process
|
||||||
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server
|
from sglang.test.test_utils import (
|
||||||
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
popen_launch_server,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestOpenAIServer(unittest.TestCase):
|
class TestOpenAIServer(unittest.TestCase):
|
||||||
@@ -14,7 +18,7 @@ class TestOpenAIServer(unittest.TestCase):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
cls.base_url = "http://127.0.0.1:8157"
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
cls.api_key = "sk-123456"
|
cls.api_key = "sk-123456"
|
||||||
cls.process = popen_launch_server(
|
cls.process = popen_launch_server(
|
||||||
cls.model, cls.base_url, timeout=300, api_key=cls.api_key
|
cls.model, cls.base_url, timeout=300, api_key=cls.api_key
|
||||||
|
|||||||
92
test/srt/test_serving_throughput.py
Normal file
92
test/srt/test_serving_throughput.py
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
import unittest
|
||||||
|
from types import SimpleNamespace
|
||||||
|
|
||||||
|
from sglang.bench_serving import run_benchmark
|
||||||
|
from sglang.srt.utils import kill_child_process
|
||||||
|
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server
|
||||||
|
|
||||||
|
|
||||||
|
class TestServingThroughput(unittest.TestCase):
|
||||||
|
|
||||||
|
def run_test(self, disable_radix_cache, disable_flashinfer, chunked_prefill_size):
|
||||||
|
# Launch the server
|
||||||
|
other_args = []
|
||||||
|
if disable_radix_cache:
|
||||||
|
other_args.append("--disable-radix-cache")
|
||||||
|
if disable_flashinfer:
|
||||||
|
other_args.append("--disable-flashinfer")
|
||||||
|
other_args.extend(["--chunked-prefill-size", str(chunked_prefill_size)])
|
||||||
|
|
||||||
|
model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
|
base_url = "http://127.0.0.1:9157"
|
||||||
|
process = popen_launch_server(
|
||||||
|
model, base_url, timeout=300, other_args=other_args
|
||||||
|
)
|
||||||
|
|
||||||
|
# Run benchmark
|
||||||
|
num_prompts = 400
|
||||||
|
args = SimpleNamespace(
|
||||||
|
backend="sglang",
|
||||||
|
base_url=base_url,
|
||||||
|
host=None,
|
||||||
|
port=None,
|
||||||
|
dataset_name="random",
|
||||||
|
dataset_path="",
|
||||||
|
model=None,
|
||||||
|
tokenizer=None,
|
||||||
|
num_prompts=num_prompts,
|
||||||
|
sharegpt_output_len=None,
|
||||||
|
random_input_len=4096,
|
||||||
|
random_output_len=2048,
|
||||||
|
random_range_ratio=0.0,
|
||||||
|
request_rate=float("inf"),
|
||||||
|
multi=None,
|
||||||
|
seed=0,
|
||||||
|
output_file=None,
|
||||||
|
disable_tqdm=False,
|
||||||
|
disable_stream=False,
|
||||||
|
disable_ignore_eos=False,
|
||||||
|
extra_request_body=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
res = run_benchmark(args)
|
||||||
|
finally:
|
||||||
|
kill_child_process(process.pid)
|
||||||
|
|
||||||
|
assert res["completed"] == num_prompts
|
||||||
|
|
||||||
|
def test_default(self):
|
||||||
|
self.run_test(
|
||||||
|
disable_radix_cache=False,
|
||||||
|
disable_flashinfer=False,
|
||||||
|
chunked_prefill_size=-1,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_default_without_radix_cache(self):
|
||||||
|
self.run_test(
|
||||||
|
disable_radix_cache=True,
|
||||||
|
disable_flashinfer=False,
|
||||||
|
chunked_prefill_size=-1,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_default_without_flashinfer(self):
|
||||||
|
self.run_test(
|
||||||
|
disable_radix_cache=False,
|
||||||
|
disable_flashinfer=True,
|
||||||
|
chunked_prefill_size=-1,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_all_cases(self):
|
||||||
|
for disable_radix_cache in [False, True]:
|
||||||
|
for disable_flashinfer in [False, True]:
|
||||||
|
for chunked_prefill_size in [-1, 2048]:
|
||||||
|
self.run_test(
|
||||||
|
disable_radix_cache=False,
|
||||||
|
disable_flashinfer=False,
|
||||||
|
chunked_prefill_size=-1,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
@@ -4,7 +4,11 @@ import unittest
|
|||||||
import requests
|
import requests
|
||||||
|
|
||||||
from sglang.srt.utils import kill_child_process
|
from sglang.srt.utils import kill_child_process
|
||||||
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server
|
from sglang.test.test_utils import (
|
||||||
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
popen_launch_server,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestSkipTokenizerInit(unittest.TestCase):
|
class TestSkipTokenizerInit(unittest.TestCase):
|
||||||
@@ -12,7 +16,7 @@ class TestSkipTokenizerInit(unittest.TestCase):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
cls.base_url = "http://127.0.0.1:8157"
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
cls.process = popen_launch_server(
|
cls.process = popen_launch_server(
|
||||||
cls.model, cls.base_url, timeout=300, other_args=["--skip-tokenizer-init"]
|
cls.model, cls.base_url, timeout=300, other_args=["--skip-tokenizer-init"]
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -4,7 +4,11 @@ import unittest
|
|||||||
import requests
|
import requests
|
||||||
|
|
||||||
from sglang.srt.utils import kill_child_process
|
from sglang.srt.utils import kill_child_process
|
||||||
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server
|
from sglang.test.test_utils import (
|
||||||
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
popen_launch_server,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestSRTEndpoint(unittest.TestCase):
|
class TestSRTEndpoint(unittest.TestCase):
|
||||||
@@ -12,7 +16,7 @@ class TestSRTEndpoint(unittest.TestCase):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
cls.base_url = "http://127.0.0.1:8157"
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300)
|
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
61
test/srt/test_throughput.py
Normal file
61
test/srt/test_throughput.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
import json
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from sglang.srt.utils import kill_child_process
|
||||||
|
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server
|
||||||
|
|
||||||
|
|
||||||
|
class TestSRTEndpoint(unittest.TestCase):
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
|
cls.base_url = "http://127.0.0.1:8157"
|
||||||
|
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def tearDownClass(cls):
|
||||||
|
kill_child_process(cls.process.pid)
|
||||||
|
|
||||||
|
def run_decode(
|
||||||
|
self, return_logprob=False, top_logprobs_num=0, return_text=False, n=1
|
||||||
|
):
|
||||||
|
response = requests.post(
|
||||||
|
self.base_url + "/generate",
|
||||||
|
json={
|
||||||
|
"text": "The capital of France is",
|
||||||
|
"sampling_params": {
|
||||||
|
"temperature": 0 if n == 1 else 0.5,
|
||||||
|
"max_new_tokens": 32,
|
||||||
|
"n": n,
|
||||||
|
},
|
||||||
|
"stream": False,
|
||||||
|
"return_logprob": return_logprob,
|
||||||
|
"top_logprobs_num": top_logprobs_num,
|
||||||
|
"return_text_in_logprobs": return_text,
|
||||||
|
"logprob_start_len": 0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
print(json.dumps(response.json()))
|
||||||
|
print("=" * 100)
|
||||||
|
|
||||||
|
def test_simple_decode(self):
|
||||||
|
self.run_decode()
|
||||||
|
|
||||||
|
def test_parallel_sample(self):
|
||||||
|
self.run_decode(n=3)
|
||||||
|
|
||||||
|
def test_logprob(self):
|
||||||
|
for top_logprobs_num in [0, 3]:
|
||||||
|
for return_text in [True, False]:
|
||||||
|
self.run_decode(
|
||||||
|
return_logprob=True,
|
||||||
|
top_logprobs_num=top_logprobs_num,
|
||||||
|
return_text=return_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
@@ -3,7 +3,11 @@ from types import SimpleNamespace
|
|||||||
|
|
||||||
from sglang.srt.utils import kill_child_process
|
from sglang.srt.utils import kill_child_process
|
||||||
from sglang.test.run_eval import run_eval
|
from sglang.test.run_eval import run_eval
|
||||||
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server
|
from sglang.test.test_utils import (
|
||||||
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
popen_launch_server,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestAccuracy(unittest.TestCase):
|
class TestAccuracy(unittest.TestCase):
|
||||||
@@ -11,7 +15,7 @@ class TestAccuracy(unittest.TestCase):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
cls.base_url = "http://127.0.0.1:8157"
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
cls.process = popen_launch_server(
|
cls.process = popen_launch_server(
|
||||||
cls.model, cls.base_url, timeout=300, other_args=["--enable-torch-compile"]
|
cls.model, cls.base_url, timeout=300, other_args=["--enable-torch-compile"]
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import openai
|
|||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.srt.utils import kill_child_process
|
from sglang.srt.utils import kill_child_process
|
||||||
from sglang.test.test_utils import popen_launch_server
|
from sglang.test.test_utils import DEFAULT_URL_FOR_TEST, popen_launch_server
|
||||||
|
|
||||||
|
|
||||||
class TestOpenAIVisionServer(unittest.TestCase):
|
class TestOpenAIVisionServer(unittest.TestCase):
|
||||||
@@ -13,7 +13,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "liuhaotian/llava-v1.6-vicuna-7b"
|
cls.model = "liuhaotian/llava-v1.6-vicuna-7b"
|
||||||
cls.base_url = "http://127.0.0.1:8157"
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
cls.api_key = "sk-123456"
|
cls.api_key = "sk-123456"
|
||||||
cls.process = popen_launch_server(
|
cls.process = popen_launch_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
|
|||||||
Reference in New Issue
Block a user