Organize server_args (#277)

This commit is contained in:
Liangsheng Yin
2024-03-11 20:06:52 +08:00
committed by GitHub
parent faba293a0d
commit 1b35547927
12 changed files with 92 additions and 34 deletions

View File

@@ -18,7 +18,6 @@ class ServerArgs:
max_prefill_num_token: Optional[int] = None
context_length: Optional[int] = None
tp_size: int = 1
model_mode: List[str] = ()
schedule_heuristic: str = "lpm"
schedule_conservativeness: float = 1.0
attention_reduce_in_fp32: bool = False
@@ -27,6 +26,10 @@ class ServerArgs:
disable_log_stats: bool = False
log_stats_interval: int = 10
log_level: str = "info"
# optional modes
disable_radix_cache: bool = False
enable_flashinfer: bool = False
disable_regex_jump_forward: bool = False
disable_disk_cache: bool = False
@@ -131,14 +134,6 @@ class ServerArgs:
default=ServerArgs.tp_size,
help="Tensor parallelism degree.",
)
parser.add_argument(
"--model-mode",
type=str,
default=[],
nargs="+",
choices=["flashinfer", "no-cache"],
help="Model mode: [flashinfer, no-cache]",
)
parser.add_argument(
"--schedule-heuristic",
type=str,
@@ -185,6 +180,17 @@ class ServerArgs:
default=ServerArgs.log_stats_interval,
help="Log stats interval in second.",
)
# optional modes
parser.add_argument(
"--disable-radix-cache",
action="store_true",
help="Disable RadixAttention",
)
parser.add_argument(
"--enable-flashinfer",
action="store_true",
help="Enable flashinfer inference kernels",
)
parser.add_argument(
"--disable-regex-jump-forward",
action="store_true",
@@ -204,6 +210,15 @@ class ServerArgs:
def url(self):
return f"http://{self.host}:{self.port}"
def get_optional_modes_logging(self):
return (
f"disable_radix_cache={self.disable_radix_cache}, "
f"enable_flashinfer={self.enable_flashinfer}, "
f"disable_regex_jump_forward={self.disable_regex_jump_forward}, "
f"disable_disk_cache={self.disable_disk_cache}, "
f"attention_reduce_in_fp32={self.attention_reduce_in_fp32}"
)
@dataclasses.dataclass
class PortArgs: