From 54fb1c80c0d7bbf100d4efc84d1aad4bee094ff0 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sat, 10 Aug 2024 15:09:03 -0700 Subject: [PATCH] Clean up unit tests (#1020) --- .github/workflows/unit-test.yml | 10 +++--- README.md | 24 ++++++------- docs/en/contributor_guide.md | 5 ++- python/sglang/srt/managers/tp_worker.py | 7 ++-- python/sglang/test/test_programs.py | 6 ++-- test/README.md | 36 +++++++++++-------- test/lang/test_anthropic_backend.py | 9 +---- test/lang/test_bind_cache.py | 6 +--- test/lang/test_choices.py | 7 +--- test/lang/test_litellm_backend.py | 2 +- test/lang/test_openai_backend.py | 9 +---- test/lang/test_srt_backend.py | 10 +----- test/lang/test_tracing.py | 5 +-- test/lang/test_vertexai_backend.py | 21 +++-------- test/srt/run_suite.py | 2 +- test/srt/test_chunked_prefill.py | 7 +--- test/srt/test_embedding_openai_server.py | 16 +++------ test/srt/test_eval_accuracy.py | 7 +--- test/srt/test_models_from_modelscope.py | 2 +- test/srt/test_openai_server.py | 7 +--- ...zer_srt.py => test_skip_tokenizer_init.py} | 24 ++++--------- test/srt/test_srt_endpoint.py | 3 +- test/srt/test_torch_compile.py | 7 +--- test/srt/test_vision_openai_server.py | 7 +--- 24 files changed, 82 insertions(+), 157 deletions(-) rename test/srt/{test_skip_tokenizer_srt.py => test_skip_tokenizer_init.py} (73%) diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index 8d4ddcdb7..f9b79dc67 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -37,12 +37,12 @@ jobs: pip install accelerate pip install sentence_transformers - - name: Test Frontend Language - run: | - cd test/lang - python3 run_suite.py --suite minimal - - name: Test Backend Runtime run: | cd test/srt python3 run_suite.py --suite minimal + + - name: Test Frontend Language + run: | + cd test/lang + python3 run_suite.py --suite minimal diff --git a/README.md b/README.md index 9be13509f..8cccd6a37 100644 --- a/README.md +++ b/README.md @@ -167,17 +167,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct - If the model does not have a template in the Hugging Face tokenizer, you can specify a [custom chat template](docs/en/custom_chat_template.md). - To enable fp8 quantization, you can add `--quantization fp8` on a fp16 checkpoint or directly load a fp8 checkpoint without specifying any arguments. - To enable experimental torch.compile support, you can add `--enable-torch-compile`. It accelerates small models on small batch sizes. - -### Use Models From ModelScope -To use model from [ModelScope](https://www.modelscope.cn), setting environment variable SGLANG_USE_MODELSCOPE. -``` -export SGLANG_USE_MODELSCOPE=true -``` -Launch [Qwen2-7B-Instruct](https://www.modelscope.cn/models/qwen/qwen2-7b-instruct) Server -``` -SGLANG_USE_MODELSCOPE=true python -m sglang.launch_server --model-path qwen/Qwen2-7B-Instruct --port 30000 -``` - + ### Supported Models - Llama / Llama 2 / Llama 3 / Llama 3.1 @@ -203,7 +193,17 @@ SGLANG_USE_MODELSCOPE=true python -m sglang.launch_server --model-path qwen/Qwen Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/en/model_support.md). -### Run Llama 3.1 405B +#### Use Models From ModelScope +To use model from [ModelScope](https://www.modelscope.cn), setting environment variable SGLANG_USE_MODELSCOPE. +``` +export SGLANG_USE_MODELSCOPE=true +``` +Launch [Qwen2-7B-Instruct](https://www.modelscope.cn/models/qwen/qwen2-7b-instruct) Server +``` +SGLANG_USE_MODELSCOPE=true python -m sglang.launch_server --model-path qwen/Qwen2-7B-Instruct --port 30000 +``` + +#### Run Llama 3.1 405B ```bash ## Run 405B (fp8) on a single node diff --git a/docs/en/contributor_guide.md b/docs/en/contributor_guide.md index 7a87187c1..1ebdd0379 100644 --- a/docs/en/contributor_guide.md +++ b/docs/en/contributor_guide.md @@ -6,6 +6,9 @@ Use these commands to format your code and pass CI linting tests. ``` pip3 install pre-commit cd sglang -pre-commit install . +pre-commit install pre-commit run --all-files ``` + +## Add Unit Tests +Add unit tests under [sglang/test](../../test). You can learn how to add and run tests from the README.md in that folder. diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py index c66897710..e425a3c37 100644 --- a/python/sglang/srt/managers/tp_worker.py +++ b/python/sglang/srt/managers/tp_worker.py @@ -461,8 +461,11 @@ class ModelTpServer: next_token_ids = next_token_ids.tolist() else: if self.tokenizer is None: - for i, req in enumerate(batch.reqs): - next_token_ids.extend(req.sampling_params.stop_token_ids) + next_token_ids = [] + for req in batch.reqs: + next_token_ids.append( + next(iter(req.sampling_params.stop_token_ids)) + ) else: next_token_ids = [self.tokenizer.eos_token_id] * len(batch.reqs) diff --git a/python/sglang/test/test_programs.py b/python/sglang/test/test_programs.py index 710871ba5..7c7c9bdcb 100644 --- a/python/sglang/test/test_programs.py +++ b/python/sglang/test/test_programs.py @@ -149,7 +149,7 @@ def test_decode_json(): assert isinstance(js_obj["population"], int) -def test_expert_answer(): +def test_expert_answer(check_answer=True): @sgl.function def expert_answer(s, question): s += "Question: " + question + "\n" @@ -167,7 +167,9 @@ def test_expert_answer(): ) ret = expert_answer.run(question="What is the capital of France?", temperature=0.1) - assert "paris" in ret.text().lower() + + if check_answer: + assert "paris" in ret.text().lower(), f"Answer: {ret.text()}" def test_tool_use(): diff --git a/test/README.md b/test/README.md index cdfbbaee8..b9cf63ff1 100644 --- a/test/README.md +++ b/test/README.md @@ -1,26 +1,32 @@ # Run Unit Tests -## Test Frontend Language +SGLang uses the built-in library [unittest](https://docs.python.org/3/library/unittest.html) as the testing framework. + +## Test Backend Runtime +```bash +cd sglang/test/srt + +# Run a single file +python3 test_srt_endpoint.py + +# Run a single test +python3 -m unittest test_srt_endpoint.TestSRTEndpoint.test_simple_decode + +# Run a suite with multiple files +python3 run_suite.py --suite minimal ``` + +## Test Frontend Language +```bash cd sglang/test/lang export OPENAI_API_KEY=sk-***** # Run a single file python3 test_openai_backend.py -# Run a suite +# Run a single test +python3 -m unittest test_openai_backend.TestOpenAIBackend.test_few_shot_qa + +# Run a suite with multiple files python3 run_suite.py --suite minimal ``` - -## Test Backend Runtime -``` -cd sglang/test/srt - -# Run a single file -python3 test_eval_accuracy.py - -# Run a suite -python3 run_suite.py --suite minimal -``` - - diff --git a/test/lang/test_anthropic_backend.py b/test/lang/test_anthropic_backend.py index 87b27a765..03911449d 100644 --- a/test/lang/test_anthropic_backend.py +++ b/test/lang/test_anthropic_backend.py @@ -21,11 +21,4 @@ class TestAnthropicBackend(unittest.TestCase): if __name__ == "__main__": - unittest.main(warnings="ignore") - - # from sglang.global_config import global_config - - # global_config.verbosity = 2 - # t = TestAnthropicBackend() - # t.setUpClass() - # t.test_mt_bench() + unittest.main() diff --git a/test/lang/test_bind_cache.py b/test/lang/test_bind_cache.py index 14a7e5098..5ed68ff45 100644 --- a/test/lang/test_bind_cache.py +++ b/test/lang/test_bind_cache.py @@ -48,8 +48,4 @@ class TestBind(unittest.TestCase): if __name__ == "__main__": - unittest.main(warnings="ignore") - - # t = TestBind() - # t.setUpClass() - # t.test_cache() + unittest.main() diff --git a/test/lang/test_choices.py b/test/lang/test_choices.py index da25e9e49..88cd22dfb 100644 --- a/test/lang/test_choices.py +++ b/test/lang/test_choices.py @@ -87,9 +87,4 @@ class TestChoices(unittest.TestCase): if __name__ == "__main__": - unittest.main(warnings="ignore") - - # t = TestChoices() - # t.test_token_length_normalized() - # t.test_greedy_token_selection() - # t.test_unconditional_likelihood_normalized() + unittest.main() diff --git a/test/lang/test_litellm_backend.py b/test/lang/test_litellm_backend.py index 3c7f5db21..649e2e4d3 100644 --- a/test/lang/test_litellm_backend.py +++ b/test/lang/test_litellm_backend.py @@ -21,4 +21,4 @@ class TestAnthropicBackend(unittest.TestCase): if __name__ == "__main__": - unittest.main(warnings="ignore") + unittest.main() diff --git a/test/lang/test_openai_backend.py b/test/lang/test_openai_backend.py index b1bb47b82..220784ab3 100644 --- a/test/lang/test_openai_backend.py +++ b/test/lang/test_openai_backend.py @@ -88,11 +88,4 @@ class TestOpenAIBackend(unittest.TestCase): if __name__ == "__main__": - unittest.main(warnings="ignore") - - # from sglang.global_config import global_config - - # global_config.verbosity = 2 - # t = TestOpenAIBackend() - # t.setUpClass() - # t.test_stream() + unittest.main() diff --git a/test/lang/test_srt_backend.py b/test/lang/test_srt_backend.py index 778cde8be..b2a07ae36 100644 --- a/test/lang/test_srt_backend.py +++ b/test/lang/test_srt_backend.py @@ -61,12 +61,4 @@ class TestSRTBackend(unittest.TestCase): if __name__ == "__main__": - unittest.main(warnings="ignore") - - # from sglang.global_config import global_config - - # global_config.verbosity = 2 - # t = TestSRTBackend() - # t.setUpClass() - # t.test_few_shot_qa() - # t.tearDownClass() + unittest.main() diff --git a/test/lang/test_tracing.py b/test/lang/test_tracing.py index 5f2bc1d04..7c3af071b 100644 --- a/test/lang/test_tracing.py +++ b/test/lang/test_tracing.py @@ -125,7 +125,4 @@ class TestTracing(unittest.TestCase): if __name__ == "__main__": - unittest.main(warnings="ignore") - - # t = TestTracing() - # t.test_multi_function() + unittest.main() diff --git a/test/lang/test_vertexai_backend.py b/test/lang/test_vertexai_backend.py index b29efaa75..da229854e 100644 --- a/test/lang/test_vertexai_backend.py +++ b/test/lang/test_vertexai_backend.py @@ -14,26 +14,22 @@ from sglang.test.test_programs import ( class TestVertexAIBackend(unittest.TestCase): backend = None - chat_backend = None - chat_vision_backend = None @classmethod def setUpClass(cls): - cls.backend = VertexAI("gemini-pro") - cls.chat_backend = VertexAI("gemini-pro") - cls.chat_vision_backend = VertexAI("gemini-pro-vision") + cls.backend = VertexAI("gemini-1.5-pro-001") def test_few_shot_qa(self): set_default_backend(self.backend) test_few_shot_qa() def test_mt_bench(self): - set_default_backend(self.chat_backend) + set_default_backend(self.backend) test_mt_bench() def test_expert_answer(self): set_default_backend(self.backend) - test_expert_answer() + test_expert_answer(check_answer=False) def test_parallel_decoding(self): set_default_backend(self.backend) @@ -44,7 +40,7 @@ class TestVertexAIBackend(unittest.TestCase): test_parallel_encoding() def test_image_qa(self): - set_default_backend(self.chat_vision_backend) + set_default_backend(self.backend) test_image_qa() def test_stream(self): @@ -53,11 +49,4 @@ class TestVertexAIBackend(unittest.TestCase): if __name__ == "__main__": - unittest.main(warnings="ignore") - - # from sglang.global_config import global_config - - # global_config.verbosity = 2 - # t = TestVertexAIBackend() - # t.setUpClass() - # t.test_stream() + unittest.main() diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 2bc37b682..288645c21 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -6,9 +6,9 @@ from sglang.test.test_utils import run_unittest_files suites = { "minimal": [ "test_eval_accuracy.py", - "test_embedding_openai_server.py", "test_openai_server.py", "test_vision_openai_server.py", + "test_embedding_openai_server.py", "test_chunked_prefill.py", "test_torch_compile.py", "test_models_from_modelscope.py", diff --git a/test/srt/test_chunked_prefill.py b/test/srt/test_chunked_prefill.py index 7f274926a..271b73fab 100644 --- a/test/srt/test_chunked_prefill.py +++ b/test/srt/test_chunked_prefill.py @@ -37,9 +37,4 @@ class TestAccuracy(unittest.TestCase): if __name__ == "__main__": - unittest.main(warnings="ignore") - - # t = TestAccuracy() - # t.setUpClass() - # t.test_mmlu() - # t.tearDownClass() + unittest.main() diff --git a/test/srt/test_embedding_openai_server.py b/test/srt/test_embedding_openai_server.py index 72dc7a009..ed7db6643 100644 --- a/test/srt/test_embedding_openai_server.py +++ b/test/srt/test_embedding_openai_server.py @@ -1,11 +1,8 @@ -import json -import time import unittest import openai from sglang.srt.hf_transformers_utils import get_tokenizer -from sglang.srt.openai_api.protocol import EmbeddingObject from sglang.srt.utils import kill_child_process from sglang.test.test_utils import popen_launch_server @@ -65,12 +62,12 @@ class TestOpenAIServer(unittest.TestCase): ), f"{response.usage.total_tokens} vs {num_prompt_tokens}" def run_batch(self): - # FIXME not implemented + # FIXME: not implemented pass def test_embedding(self): - # TODO the fields of encoding_format, dimensions, user are skipped - # TODO support use_list_input + # TODO: the fields of encoding_format, dimensions, user are skipped + # TODO: support use_list_input for use_list_input in [False, True]: for token_input in [False, True]: self.run_embedding(use_list_input, token_input) @@ -80,9 +77,4 @@ class TestOpenAIServer(unittest.TestCase): if __name__ == "__main__": - unittest.main(warnings="ignore") - - # t = TestOpenAIServer() - # t.setUpClass() - # t.test_embedding() - # t.tearDownClass() + unittest.main() diff --git a/test/srt/test_eval_accuracy.py b/test/srt/test_eval_accuracy.py index b63593626..da9a4f9c6 100644 --- a/test/srt/test_eval_accuracy.py +++ b/test/srt/test_eval_accuracy.py @@ -32,9 +32,4 @@ class TestAccuracy(unittest.TestCase): if __name__ == "__main__": - unittest.main(warnings="ignore") - - # t = TestAccuracy() - # t.setUpClass() - # t.test_mmlu() - # t.tearDownClass() + unittest.main() diff --git a/test/srt/test_models_from_modelscope.py b/test/srt/test_models_from_modelscope.py index 2313053b9..76853c2a6 100644 --- a/test/srt/test_models_from_modelscope.py +++ b/test/srt/test_models_from_modelscope.py @@ -44,4 +44,4 @@ class TestDownloadFromModelScope(unittest.TestCase): if __name__ == "__main__": - unittest.main(warnings="ignore") + unittest.main() diff --git a/test/srt/test_openai_server.py b/test/srt/test_openai_server.py index f8f6ca632..95486d70e 100644 --- a/test/srt/test_openai_server.py +++ b/test/srt/test_openai_server.py @@ -399,9 +399,4 @@ class TestOpenAIServer(unittest.TestCase): if __name__ == "__main__": - unittest.main(warnings="ignore") - - # t = TestOpenAIServer() - # t.setUpClass() - # t.test_completion() - # t.tearDownClass() + unittest.main() diff --git a/test/srt/test_skip_tokenizer_srt.py b/test/srt/test_skip_tokenizer_init.py similarity index 73% rename from test/srt/test_skip_tokenizer_srt.py rename to test/srt/test_skip_tokenizer_init.py index 7f0a1fe1a..7417783f6 100644 --- a/test/srt/test_skip_tokenizer_srt.py +++ b/test/srt/test_skip_tokenizer_init.py @@ -1,18 +1,13 @@ import json -import os -import sys import unittest import requests from sglang.srt.utils import kill_child_process -from sglang.test.run_eval import run_eval from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server -# os.environ["CUDA_VISIBLE_DEVICES"] = "1" - -class TestSRTEndpoint(unittest.TestCase): +class TestSkipTokenizerInit(unittest.TestCase): @classmethod def setUpClass(cls): @@ -26,9 +21,7 @@ class TestSRTEndpoint(unittest.TestCase): def tearDownClass(cls): kill_child_process(cls.process.pid) - def run_decode( - self, return_logprob=False, top_logprobs_num=0, return_text=False, n=1 - ): + def run_decode(self, return_logprob=False, top_logprobs_num=0, n=1): response = requests.post( self.base_url + "/generate", json={ @@ -50,7 +43,6 @@ class TestSRTEndpoint(unittest.TestCase): "stream": False, "return_logprob": return_logprob, "top_logprobs_num": top_logprobs_num, - "return_text_in_logprobs": return_text, "logprob_start_len": 0, }, ) @@ -65,13 +57,11 @@ class TestSRTEndpoint(unittest.TestCase): def test_logprob(self): for top_logprobs_num in [0, 3]: - for return_text in [False, False]: - self.run_decode( - return_logprob=True, - top_logprobs_num=top_logprobs_num, - return_text=return_text, - ) + self.run_decode( + return_logprob=True, + top_logprobs_num=top_logprobs_num, + ) if __name__ == "__main__": - unittest.main(warnings="ignore") + unittest.main() diff --git a/test/srt/test_srt_endpoint.py b/test/srt/test_srt_endpoint.py index b208dfa13..8948e22d7 100644 --- a/test/srt/test_srt_endpoint.py +++ b/test/srt/test_srt_endpoint.py @@ -4,7 +4,6 @@ import unittest import requests from sglang.srt.utils import kill_child_process -from sglang.test.run_eval import run_eval from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server @@ -59,4 +58,4 @@ class TestSRTEndpoint(unittest.TestCase): if __name__ == "__main__": - unittest.main(warnings="ignore") + unittest.main() diff --git a/test/srt/test_torch_compile.py b/test/srt/test_torch_compile.py index fd2c6ebb7..7b4664563 100644 --- a/test/srt/test_torch_compile.py +++ b/test/srt/test_torch_compile.py @@ -34,9 +34,4 @@ class TestAccuracy(unittest.TestCase): if __name__ == "__main__": - unittest.main(warnings="ignore") - - # t = TestAccuracy() - # t.setUpClass() - # t.test_mmlu() - # t.tearDownClass() + unittest.main() diff --git a/test/srt/test_vision_openai_server.py b/test/srt/test_vision_openai_server.py index 982c026db..52764b6b4 100644 --- a/test/srt/test_vision_openai_server.py +++ b/test/srt/test_vision_openai_server.py @@ -113,9 +113,4 @@ class TestOpenAIVisionServer(unittest.TestCase): if __name__ == "__main__": - unittest.main(warnings="ignore") - - # t = TestOpenAIVisionServer() - # t.setUpClass() - # t.test_chat_completion() - # t.tearDownClass() + unittest.main()