Fix oom issues with fp8 for llama (#1454)

This commit is contained in:
Lianmin Zheng
2024-09-18 03:45:19 -07:00
committed by GitHub
parent aa2750beb3
commit 1acccb364a
8 changed files with 33 additions and 21 deletions

View File

@@ -12,8 +12,10 @@ from sglang.test.test_utils import (
class TestChunkedPrefill(unittest.TestCase):
def run_mmlu(self, disable_radix_cache, enable_mixed_chunk):
other_args = ["--chunked-prefill-size", "32"]
def run_mmlu(
self, disable_radix_cache, enable_mixed_chunk, chunked_prefill_size=32
):
other_args = ["--chunked-prefill-size", str(chunked_prefill_size)]
if disable_radix_cache:
other_args += ["--disable-radix-cache"]
@@ -55,6 +57,11 @@ class TestChunkedPrefill(unittest.TestCase):
def test_mixed_chunked_prefill_without_radix_cache(self):
self.run_mmlu(disable_radix_cache=True, enable_mixed_chunk=True)
def test_no_chunked_prefill(self):
self.run_mmlu(
disable_radix_cache=False, enable_mixed_chunk=False, chunked_prefill_size=-1
)
if __name__ == "__main__":
unittest.main()