Fix memory leak when doing chunked prefill (#1787)

This commit is contained in:
Liangsheng Yin
2024-10-25 08:01:17 -07:00
committed by GitHub
parent 2148914e1b
commit a2f5e7555f
7 changed files with 184 additions and 69 deletions

View File

@@ -660,6 +660,7 @@ def run_mmlu_test(
chunked_prefill_size=32,
):
other_args = ["--chunked-prefill-size", str(chunked_prefill_size)]
other_args += ["--mem-fraction-static", "0.85"]
if disable_radix_cache:
other_args += ["--disable-radix-cache"]
if enable_mixed_chunk: