Enable chunked prefill by default (#1040)

This commit is contained in:
Lianmin Zheng
2024-08-14 21:56:20 -07:00
committed by GitHub
parent 8d2d876fc8
commit e86b1ccbf0
4 changed files with 10 additions and 10 deletions

View File

@@ -86,11 +86,11 @@ class TestServingThroughput(unittest.TestCase):
# A100 (PCIE) performance
assert res["output_throughput"] > 930
def test_default_with_chunked_prefill(self):
def test_default_without_chunked_prefill(self):
res = self.run_test(
disable_radix_cache=ServerArgs.disable_radix_cache,
disable_flashinfer=ServerArgs.disable_flashinfer,
chunked_prefill_size=8192,
chunked_prefill_size=-1,
)
if os.getenv("SGLANG_IS_IN_CI", "false") == "true":

View File

@@ -71,7 +71,7 @@ class TestServingThroughput(unittest.TestCase):
if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
# A100 (PCIE) performance
assert res["output_throughput"] >= 1400
assert res["output_throughput"] > 1400
def test_default_without_radix_cache(self):
res = self.run_test(
@@ -82,18 +82,18 @@ class TestServingThroughput(unittest.TestCase):
if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
# A100 (PCIE) performance
assert res["output_throughput"] >= 1450
assert res["output_throughput"] > 1450
def test_default_with_chunked_prefill(self):
def test_default_without_chunked_prefill(self):
res = self.run_test(
disable_radix_cache=ServerArgs.disable_radix_cache,
disable_flashinfer=ServerArgs.disable_flashinfer,
chunked_prefill_size=8192,
chunked_prefill_size=-1,
)
if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
# A100 (PCIE) performance
assert res["output_throughput"] >= 1400
assert res["output_throughput"] > 1400
def test_all_cases(self):
for disable_radix_cache in [False, True]: