Clean up unit tests (#1020)
This commit is contained in:
10
.github/workflows/unit-test.yml
vendored
10
.github/workflows/unit-test.yml
vendored
@@ -37,12 +37,12 @@ jobs:
|
||||
pip install accelerate
|
||||
pip install sentence_transformers
|
||||
|
||||
- name: Test Frontend Language
|
||||
run: |
|
||||
cd test/lang
|
||||
python3 run_suite.py --suite minimal
|
||||
|
||||
- name: Test Backend Runtime
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite minimal
|
||||
|
||||
- name: Test Frontend Language
|
||||
run: |
|
||||
cd test/lang
|
||||
python3 run_suite.py --suite minimal
|
||||
|
||||
24
README.md
24
README.md
@@ -167,17 +167,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
|
||||
- If the model does not have a template in the Hugging Face tokenizer, you can specify a [custom chat template](docs/en/custom_chat_template.md).
|
||||
- To enable fp8 quantization, you can add `--quantization fp8` on a fp16 checkpoint or directly load a fp8 checkpoint without specifying any arguments.
|
||||
- To enable experimental torch.compile support, you can add `--enable-torch-compile`. It accelerates small models on small batch sizes.
|
||||
|
||||
### Use Models From ModelScope
|
||||
To use model from [ModelScope](https://www.modelscope.cn), setting environment variable SGLANG_USE_MODELSCOPE.
|
||||
```
|
||||
export SGLANG_USE_MODELSCOPE=true
|
||||
```
|
||||
Launch [Qwen2-7B-Instruct](https://www.modelscope.cn/models/qwen/qwen2-7b-instruct) Server
|
||||
```
|
||||
SGLANG_USE_MODELSCOPE=true python -m sglang.launch_server --model-path qwen/Qwen2-7B-Instruct --port 30000
|
||||
```
|
||||
|
||||
|
||||
### Supported Models
|
||||
|
||||
- Llama / Llama 2 / Llama 3 / Llama 3.1
|
||||
@@ -203,7 +193,17 @@ SGLANG_USE_MODELSCOPE=true python -m sglang.launch_server --model-path qwen/Qwen
|
||||
|
||||
Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/en/model_support.md).
|
||||
|
||||
### Run Llama 3.1 405B
|
||||
#### Use Models From ModelScope
|
||||
To use model from [ModelScope](https://www.modelscope.cn), setting environment variable SGLANG_USE_MODELSCOPE.
|
||||
```
|
||||
export SGLANG_USE_MODELSCOPE=true
|
||||
```
|
||||
Launch [Qwen2-7B-Instruct](https://www.modelscope.cn/models/qwen/qwen2-7b-instruct) Server
|
||||
```
|
||||
SGLANG_USE_MODELSCOPE=true python -m sglang.launch_server --model-path qwen/Qwen2-7B-Instruct --port 30000
|
||||
```
|
||||
|
||||
#### Run Llama 3.1 405B
|
||||
|
||||
```bash
|
||||
## Run 405B (fp8) on a single node
|
||||
|
||||
@@ -6,6 +6,9 @@ Use these commands to format your code and pass CI linting tests.
|
||||
```
|
||||
pip3 install pre-commit
|
||||
cd sglang
|
||||
pre-commit install .
|
||||
pre-commit install
|
||||
pre-commit run --all-files
|
||||
```
|
||||
|
||||
## Add Unit Tests
|
||||
Add unit tests under [sglang/test](../../test). You can learn how to add and run tests from the README.md in that folder.
|
||||
|
||||
@@ -461,8 +461,11 @@ class ModelTpServer:
|
||||
next_token_ids = next_token_ids.tolist()
|
||||
else:
|
||||
if self.tokenizer is None:
|
||||
for i, req in enumerate(batch.reqs):
|
||||
next_token_ids.extend(req.sampling_params.stop_token_ids)
|
||||
next_token_ids = []
|
||||
for req in batch.reqs:
|
||||
next_token_ids.append(
|
||||
next(iter(req.sampling_params.stop_token_ids))
|
||||
)
|
||||
else:
|
||||
next_token_ids = [self.tokenizer.eos_token_id] * len(batch.reqs)
|
||||
|
||||
|
||||
@@ -149,7 +149,7 @@ def test_decode_json():
|
||||
assert isinstance(js_obj["population"], int)
|
||||
|
||||
|
||||
def test_expert_answer():
|
||||
def test_expert_answer(check_answer=True):
|
||||
@sgl.function
|
||||
def expert_answer(s, question):
|
||||
s += "Question: " + question + "\n"
|
||||
@@ -167,7 +167,9 @@ def test_expert_answer():
|
||||
)
|
||||
|
||||
ret = expert_answer.run(question="What is the capital of France?", temperature=0.1)
|
||||
assert "paris" in ret.text().lower()
|
||||
|
||||
if check_answer:
|
||||
assert "paris" in ret.text().lower(), f"Answer: {ret.text()}"
|
||||
|
||||
|
||||
def test_tool_use():
|
||||
|
||||
@@ -1,26 +1,32 @@
|
||||
# Run Unit Tests
|
||||
|
||||
## Test Frontend Language
|
||||
SGLang uses the built-in library [unittest](https://docs.python.org/3/library/unittest.html) as the testing framework.
|
||||
|
||||
## Test Backend Runtime
|
||||
```bash
|
||||
cd sglang/test/srt
|
||||
|
||||
# Run a single file
|
||||
python3 test_srt_endpoint.py
|
||||
|
||||
# Run a single test
|
||||
python3 -m unittest test_srt_endpoint.TestSRTEndpoint.test_simple_decode
|
||||
|
||||
# Run a suite with multiple files
|
||||
python3 run_suite.py --suite minimal
|
||||
```
|
||||
|
||||
## Test Frontend Language
|
||||
```bash
|
||||
cd sglang/test/lang
|
||||
export OPENAI_API_KEY=sk-*****
|
||||
|
||||
# Run a single file
|
||||
python3 test_openai_backend.py
|
||||
|
||||
# Run a suite
|
||||
# Run a single test
|
||||
python3 -m unittest test_openai_backend.TestOpenAIBackend.test_few_shot_qa
|
||||
|
||||
# Run a suite with multiple files
|
||||
python3 run_suite.py --suite minimal
|
||||
```
|
||||
|
||||
## Test Backend Runtime
|
||||
```
|
||||
cd sglang/test/srt
|
||||
|
||||
# Run a single file
|
||||
python3 test_eval_accuracy.py
|
||||
|
||||
# Run a suite
|
||||
python3 run_suite.py --suite minimal
|
||||
```
|
||||
|
||||
|
||||
|
||||
@@ -21,11 +21,4 @@ class TestAnthropicBackend(unittest.TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
|
||||
# from sglang.global_config import global_config
|
||||
|
||||
# global_config.verbosity = 2
|
||||
# t = TestAnthropicBackend()
|
||||
# t.setUpClass()
|
||||
# t.test_mt_bench()
|
||||
unittest.main()
|
||||
|
||||
@@ -48,8 +48,4 @@ class TestBind(unittest.TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
|
||||
# t = TestBind()
|
||||
# t.setUpClass()
|
||||
# t.test_cache()
|
||||
unittest.main()
|
||||
|
||||
@@ -87,9 +87,4 @@ class TestChoices(unittest.TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
|
||||
# t = TestChoices()
|
||||
# t.test_token_length_normalized()
|
||||
# t.test_greedy_token_selection()
|
||||
# t.test_unconditional_likelihood_normalized()
|
||||
unittest.main()
|
||||
|
||||
@@ -21,4 +21,4 @@ class TestAnthropicBackend(unittest.TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
unittest.main()
|
||||
|
||||
@@ -88,11 +88,4 @@ class TestOpenAIBackend(unittest.TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
|
||||
# from sglang.global_config import global_config
|
||||
|
||||
# global_config.verbosity = 2
|
||||
# t = TestOpenAIBackend()
|
||||
# t.setUpClass()
|
||||
# t.test_stream()
|
||||
unittest.main()
|
||||
|
||||
@@ -61,12 +61,4 @@ class TestSRTBackend(unittest.TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
|
||||
# from sglang.global_config import global_config
|
||||
|
||||
# global_config.verbosity = 2
|
||||
# t = TestSRTBackend()
|
||||
# t.setUpClass()
|
||||
# t.test_few_shot_qa()
|
||||
# t.tearDownClass()
|
||||
unittest.main()
|
||||
|
||||
@@ -125,7 +125,4 @@ class TestTracing(unittest.TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
|
||||
# t = TestTracing()
|
||||
# t.test_multi_function()
|
||||
unittest.main()
|
||||
|
||||
@@ -14,26 +14,22 @@ from sglang.test.test_programs import (
|
||||
|
||||
class TestVertexAIBackend(unittest.TestCase):
|
||||
backend = None
|
||||
chat_backend = None
|
||||
chat_vision_backend = None
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.backend = VertexAI("gemini-pro")
|
||||
cls.chat_backend = VertexAI("gemini-pro")
|
||||
cls.chat_vision_backend = VertexAI("gemini-pro-vision")
|
||||
cls.backend = VertexAI("gemini-1.5-pro-001")
|
||||
|
||||
def test_few_shot_qa(self):
|
||||
set_default_backend(self.backend)
|
||||
test_few_shot_qa()
|
||||
|
||||
def test_mt_bench(self):
|
||||
set_default_backend(self.chat_backend)
|
||||
set_default_backend(self.backend)
|
||||
test_mt_bench()
|
||||
|
||||
def test_expert_answer(self):
|
||||
set_default_backend(self.backend)
|
||||
test_expert_answer()
|
||||
test_expert_answer(check_answer=False)
|
||||
|
||||
def test_parallel_decoding(self):
|
||||
set_default_backend(self.backend)
|
||||
@@ -44,7 +40,7 @@ class TestVertexAIBackend(unittest.TestCase):
|
||||
test_parallel_encoding()
|
||||
|
||||
def test_image_qa(self):
|
||||
set_default_backend(self.chat_vision_backend)
|
||||
set_default_backend(self.backend)
|
||||
test_image_qa()
|
||||
|
||||
def test_stream(self):
|
||||
@@ -53,11 +49,4 @@ class TestVertexAIBackend(unittest.TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
|
||||
# from sglang.global_config import global_config
|
||||
|
||||
# global_config.verbosity = 2
|
||||
# t = TestVertexAIBackend()
|
||||
# t.setUpClass()
|
||||
# t.test_stream()
|
||||
unittest.main()
|
||||
|
||||
@@ -6,9 +6,9 @@ from sglang.test.test_utils import run_unittest_files
|
||||
suites = {
|
||||
"minimal": [
|
||||
"test_eval_accuracy.py",
|
||||
"test_embedding_openai_server.py",
|
||||
"test_openai_server.py",
|
||||
"test_vision_openai_server.py",
|
||||
"test_embedding_openai_server.py",
|
||||
"test_chunked_prefill.py",
|
||||
"test_torch_compile.py",
|
||||
"test_models_from_modelscope.py",
|
||||
|
||||
@@ -37,9 +37,4 @@ class TestAccuracy(unittest.TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
|
||||
# t = TestAccuracy()
|
||||
# t.setUpClass()
|
||||
# t.test_mmlu()
|
||||
# t.tearDownClass()
|
||||
unittest.main()
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
import json
|
||||
import time
|
||||
import unittest
|
||||
|
||||
import openai
|
||||
|
||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
||||
from sglang.srt.openai_api.protocol import EmbeddingObject
|
||||
from sglang.srt.utils import kill_child_process
|
||||
from sglang.test.test_utils import popen_launch_server
|
||||
|
||||
@@ -65,12 +62,12 @@ class TestOpenAIServer(unittest.TestCase):
|
||||
), f"{response.usage.total_tokens} vs {num_prompt_tokens}"
|
||||
|
||||
def run_batch(self):
|
||||
# FIXME not implemented
|
||||
# FIXME: not implemented
|
||||
pass
|
||||
|
||||
def test_embedding(self):
|
||||
# TODO the fields of encoding_format, dimensions, user are skipped
|
||||
# TODO support use_list_input
|
||||
# TODO: the fields of encoding_format, dimensions, user are skipped
|
||||
# TODO: support use_list_input
|
||||
for use_list_input in [False, True]:
|
||||
for token_input in [False, True]:
|
||||
self.run_embedding(use_list_input, token_input)
|
||||
@@ -80,9 +77,4 @@ class TestOpenAIServer(unittest.TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
|
||||
# t = TestOpenAIServer()
|
||||
# t.setUpClass()
|
||||
# t.test_embedding()
|
||||
# t.tearDownClass()
|
||||
unittest.main()
|
||||
|
||||
@@ -32,9 +32,4 @@ class TestAccuracy(unittest.TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
|
||||
# t = TestAccuracy()
|
||||
# t.setUpClass()
|
||||
# t.test_mmlu()
|
||||
# t.tearDownClass()
|
||||
unittest.main()
|
||||
|
||||
@@ -44,4 +44,4 @@ class TestDownloadFromModelScope(unittest.TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
unittest.main()
|
||||
|
||||
@@ -399,9 +399,4 @@ class TestOpenAIServer(unittest.TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
|
||||
# t = TestOpenAIServer()
|
||||
# t.setUpClass()
|
||||
# t.test_completion()
|
||||
# t.tearDownClass()
|
||||
unittest.main()
|
||||
|
||||
@@ -1,18 +1,13 @@
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
import requests
|
||||
|
||||
from sglang.srt.utils import kill_child_process
|
||||
from sglang.test.run_eval import run_eval
|
||||
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server
|
||||
|
||||
# os.environ["CUDA_VISIBLE_DEVICES"] = "1"
|
||||
|
||||
|
||||
class TestSRTEndpoint(unittest.TestCase):
|
||||
class TestSkipTokenizerInit(unittest.TestCase):
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
@@ -26,9 +21,7 @@ class TestSRTEndpoint(unittest.TestCase):
|
||||
def tearDownClass(cls):
|
||||
kill_child_process(cls.process.pid)
|
||||
|
||||
def run_decode(
|
||||
self, return_logprob=False, top_logprobs_num=0, return_text=False, n=1
|
||||
):
|
||||
def run_decode(self, return_logprob=False, top_logprobs_num=0, n=1):
|
||||
response = requests.post(
|
||||
self.base_url + "/generate",
|
||||
json={
|
||||
@@ -50,7 +43,6 @@ class TestSRTEndpoint(unittest.TestCase):
|
||||
"stream": False,
|
||||
"return_logprob": return_logprob,
|
||||
"top_logprobs_num": top_logprobs_num,
|
||||
"return_text_in_logprobs": return_text,
|
||||
"logprob_start_len": 0,
|
||||
},
|
||||
)
|
||||
@@ -65,13 +57,11 @@ class TestSRTEndpoint(unittest.TestCase):
|
||||
|
||||
def test_logprob(self):
|
||||
for top_logprobs_num in [0, 3]:
|
||||
for return_text in [False, False]:
|
||||
self.run_decode(
|
||||
return_logprob=True,
|
||||
top_logprobs_num=top_logprobs_num,
|
||||
return_text=return_text,
|
||||
)
|
||||
self.run_decode(
|
||||
return_logprob=True,
|
||||
top_logprobs_num=top_logprobs_num,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
unittest.main()
|
||||
@@ -4,7 +4,6 @@ import unittest
|
||||
import requests
|
||||
|
||||
from sglang.srt.utils import kill_child_process
|
||||
from sglang.test.run_eval import run_eval
|
||||
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server
|
||||
|
||||
|
||||
@@ -59,4 +58,4 @@ class TestSRTEndpoint(unittest.TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
unittest.main()
|
||||
|
||||
@@ -34,9 +34,4 @@ class TestAccuracy(unittest.TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
|
||||
# t = TestAccuracy()
|
||||
# t.setUpClass()
|
||||
# t.test_mmlu()
|
||||
# t.tearDownClass()
|
||||
unittest.main()
|
||||
|
||||
@@ -113,9 +113,4 @@ class TestOpenAIVisionServer(unittest.TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
|
||||
# t = TestOpenAIVisionServer()
|
||||
# t.setUpClass()
|
||||
# t.test_chat_completion()
|
||||
# t.tearDownClass()
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user