sglang/test/srt/test_chunked_prefill.py

"""
python3 -m unittest test_chunked_prefill.TestChunkedPrefill.test_mixed_chunked_prefill_without_radix_cache
"""

import unittest

from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    run_bench_serving,
    run_mmlu_test,
    run_mulit_request_test,
)


class TestChunkedPrefill(unittest.TestCase):
    def test_chunked_prefill(self):
        run_mmlu_test(disable_radix_cache=False, enable_mixed_chunk=False)

    def test_mixed_chunked_prefill(self):
        run_mmlu_test(disable_radix_cache=False, enable_mixed_chunk=True)

    def test_chunked_prefill_without_radix_cache(self):
        run_mmlu_test(disable_radix_cache=True, enable_mixed_chunk=False)

    def test_mixed_chunked_prefill_without_radix_cache(self):
        run_mmlu_test(disable_radix_cache=True, enable_mixed_chunk=True)

    def test_no_chunked_prefill(self):
        run_mmlu_test(
            disable_radix_cache=False, enable_mixed_chunk=False, chunked_prefill_size=-1
        )

    def test_no_chunked_prefill_without_radix_cache(self):
        res = run_bench_serving(
            model=DEFAULT_MODEL_NAME_FOR_TEST,
            num_prompts=10,
            request_rate=float("inf"),
            other_server_args=["--disable-radix-cache", "--chunked-prefill-size", "-1"],
        )

        assert res["completed"] == 10

    def test_mixed_chunked_prefill_multi_requests(self):
        run_mulit_request_test(
            enable_mixed_chunk=True,
            chunked_prefill_size=2048,
        )


if __name__ == "__main__":
    unittest.main()
Enhance the test case for chunked prefill (#1785) 2024-10-24 21:23:09 -07:00			`"""`
			`python3 -m unittest test_chunked_prefill.TestChunkedPrefill.test_mixed_chunked_prefill_without_radix_cache`
			`"""`

Improve the structure of CI (#911) 2024-08-03 23:09:21 -07:00			`import unittest`

Improve end-to-end throughput test and its coverage (#1039) 2024-08-11 18:27:33 -07:00			`from sglang.test.test_utils import (`
			`DEFAULT_MODEL_NAME_FOR_TEST,`
Fix chunked prefill condition (#1594) 2024-10-07 14:34:14 +08:00			`run_bench_serving,`
Enhance the test case for chunked prefill (#1785) 2024-10-24 21:23:09 -07:00			`run_mmlu_test,`
Fix mixed chunked prefill (#1850) 2024-10-30 21:20:41 -07:00			`run_mulit_request_test,`
Improve end-to-end throughput test and its coverage (#1039) 2024-08-11 18:27:33 -07:00			`)`
Improve the structure of CI (#911) 2024-08-03 23:09:21 -07:00

Add longer accuracy test on CI (#1049) 2024-08-12 02:21:38 -07:00			`class TestChunkedPrefill(unittest.TestCase):`
			`def test_chunked_prefill(self):`
Enhance the test case for chunked prefill (#1785) 2024-10-24 21:23:09 -07:00			`run_mmlu_test(disable_radix_cache=False, enable_mixed_chunk=False)`
Mixed style of chunked prefill (#1013) 2024-08-16 02:13:00 -07:00
			`def test_mixed_chunked_prefill(self):`
Enhance the test case for chunked prefill (#1785) 2024-10-24 21:23:09 -07:00			`run_mmlu_test(disable_radix_cache=False, enable_mixed_chunk=True)`
Add longer accuracy test on CI (#1049) 2024-08-12 02:21:38 -07:00
			`def test_chunked_prefill_without_radix_cache(self):`
Enhance the test case for chunked prefill (#1785) 2024-10-24 21:23:09 -07:00			`run_mmlu_test(disable_radix_cache=True, enable_mixed_chunk=False)`
Mixed style of chunked prefill (#1013) 2024-08-16 02:13:00 -07:00
			`def test_mixed_chunked_prefill_without_radix_cache(self):`
Enhance the test case for chunked prefill (#1785) 2024-10-24 21:23:09 -07:00			`run_mmlu_test(disable_radix_cache=True, enable_mixed_chunk=True)`
Improve the structure of CI (#911) 2024-08-03 23:09:21 -07:00
Fix oom issues with fp8 for llama (#1454) 2024-09-18 03:45:19 -07:00			`def test_no_chunked_prefill(self):`
Enhance the test case for chunked prefill (#1785) 2024-10-24 21:23:09 -07:00			`run_mmlu_test(`
Fix oom issues with fp8 for llama (#1454) 2024-09-18 03:45:19 -07:00			`disable_radix_cache=False, enable_mixed_chunk=False, chunked_prefill_size=-1`
			`)`

Fix chunked prefill condition (#1594) 2024-10-07 14:34:14 +08:00			`def test_no_chunked_prefill_without_radix_cache(self):`
			`res = run_bench_serving(`
			`model=DEFAULT_MODEL_NAME_FOR_TEST,`
			`num_prompts=10,`
			`request_rate=float("inf"),`
			`other_server_args=["--disable-radix-cache", "--chunked-prefill-size", "-1"],`
			`)`

			`assert res["completed"] == 10`

Fix mixed chunked prefill (#1850) 2024-10-30 21:20:41 -07:00			`def test_mixed_chunked_prefill_multi_requests(self):`
			`run_mulit_request_test(`
			`enable_mixed_chunk=True,`
			`chunked_prefill_size=2048,`
			`)`

Improve the structure of CI (#911) 2024-08-03 23:09:21 -07:00
			`if __name__ == "__main__":`
Clean up unit tests (#1020) 2024-08-10 15:09:03 -07:00			`unittest.main()`