Clean up the usage of flashinfer (#610)

This commit is contained in:
Lianmin Zheng
2024-07-12 13:00:03 -07:00
committed by GitHub
parent 519e20cfda
commit af4e7910e7
5 changed files with 46 additions and 75 deletions

View File

@@ -53,6 +53,7 @@ class ServerArgs:
disable_flashinfer: bool = False
disable_radix_cache: bool = False
disable_regex_jump_forward: bool = False
disable_cuda_graph: bool = False
disable_disk_cache: bool = False
attention_reduce_in_fp32: bool = False
enable_p2p_check: bool = False
@@ -294,6 +295,11 @@ class ServerArgs:
action="store_true",
help="Disable regex jump-forward",
)
parser.add_argument(
"--disable-cuda-graph",
action="store_true",
help="Disable cuda graph.",
)
parser.add_argument(
"--disable-disk-cache",
action="store_true",