Support gpt-bigcode model class (#681)

This commit is contained in:
Liangsheng Yin
2024-07-20 18:34:37 -07:00
committed by GitHub
parent 69d19188fc
commit caaad53b52
6 changed files with 341 additions and 12 deletions

View File

@@ -55,6 +55,7 @@ class ServerArgs:
disable_regex_jump_forward: bool = False
disable_cuda_graph: bool = False
disable_disk_cache: bool = False
enable_torch_compile: bool = False
attention_reduce_in_fp32: bool = False
enable_p2p_check: bool = False
efficient_weight_load: bool = False
@@ -317,6 +318,11 @@ class ServerArgs:
action="store_true",
help="Disable disk cache to avoid possible crashes related to file system or high concurrency.",
)
parser.add_argument(
"--enable-torch-compile",
action="store_true",
help="Optimize the model with torch.compile, experimental feature.",
)
parser.add_argument(
"--attention-reduce-in-fp32",
action="store_true",