Mixed style of chunked prefill (#1013)
This commit is contained in:
@@ -80,6 +80,7 @@ class ServerArgs:
|
||||
disable_regex_jump_forward: bool = False
|
||||
disable_cuda_graph: bool = False
|
||||
disable_disk_cache: bool = False
|
||||
enable_mixed_chunk: bool = False
|
||||
enable_torch_compile: bool = False
|
||||
enable_p2p_check: bool = False
|
||||
enable_mla: bool = False
|
||||
@@ -396,6 +397,11 @@ class ServerArgs:
|
||||
action="store_true",
|
||||
help="Disable disk cache to avoid possible crashes related to file system or high concurrency.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-mixed-chunk",
|
||||
action="store_true",
|
||||
help="Enabling mixing prefill and decode in a chunked batch.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-torch-compile",
|
||||
action="store_true",
|
||||
|
||||
Reference in New Issue
Block a user