Mixed style of chunked prefill (#1013)

This commit is contained in:
Liangsheng Yin
2024-08-16 02:13:00 -07:00
committed by GitHub
parent 5a261bd055
commit 3694f8f996
14 changed files with 195 additions and 59 deletions

View File

@@ -80,6 +80,7 @@ class ServerArgs:
disable_regex_jump_forward: bool = False
disable_cuda_graph: bool = False
disable_disk_cache: bool = False
enable_mixed_chunk: bool = False
enable_torch_compile: bool = False
enable_p2p_check: bool = False
enable_mla: bool = False
@@ -396,6 +397,11 @@ class ServerArgs:
action="store_true",
help="Disable disk cache to avoid possible crashes related to file system or high concurrency.",
)
parser.add_argument(
"--enable-mixed-chunk",
action="store_true",
help="Enabling mixing prefill and decode in a chunked batch.",
)
parser.add_argument(
"--enable-torch-compile",
action="store_true",