Gemma Support (#256)

This commit is contained in:
Liangsheng Yin
2024-03-11 12:14:27 +08:00
committed by GitHub
parent 64fe311593
commit 89885b31ef
10 changed files with 428 additions and 55 deletions

View File

@@ -28,6 +28,7 @@ class ServerArgs:
log_level: str = "info"
disable_regex_jump_forward: bool = False
disable_disk_cache: bool = False
attention_reduce_in_fp32: bool = False
def __post_init__(self):
if self.tokenizer_path is None:
@@ -189,6 +190,11 @@ class ServerArgs:
action="store_true",
help="Disable disk cache to avoid possible crashes related to file system or high concurrency.",
)
parser.add_argument(
"--attention-reduce-in-fp32",
action="store_true",
help="Cast the intermidiate attention results to fp32 to avoid possible crashes related to fp16.",
)
@classmethod
def from_cli_args(cls, args: argparse.Namespace):