Organize server_args (#277)
This commit is contained in:
@@ -18,7 +18,6 @@ class ServerArgs:
|
||||
max_prefill_num_token: Optional[int] = None
|
||||
context_length: Optional[int] = None
|
||||
tp_size: int = 1
|
||||
model_mode: List[str] = ()
|
||||
schedule_heuristic: str = "lpm"
|
||||
schedule_conservativeness: float = 1.0
|
||||
attention_reduce_in_fp32: bool = False
|
||||
@@ -27,6 +26,10 @@ class ServerArgs:
|
||||
disable_log_stats: bool = False
|
||||
log_stats_interval: int = 10
|
||||
log_level: str = "info"
|
||||
|
||||
# optional modes
|
||||
disable_radix_cache: bool = False
|
||||
enable_flashinfer: bool = False
|
||||
disable_regex_jump_forward: bool = False
|
||||
disable_disk_cache: bool = False
|
||||
|
||||
@@ -131,14 +134,6 @@ class ServerArgs:
|
||||
default=ServerArgs.tp_size,
|
||||
help="Tensor parallelism degree.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model-mode",
|
||||
type=str,
|
||||
default=[],
|
||||
nargs="+",
|
||||
choices=["flashinfer", "no-cache"],
|
||||
help="Model mode: [flashinfer, no-cache]",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--schedule-heuristic",
|
||||
type=str,
|
||||
@@ -185,6 +180,17 @@ class ServerArgs:
|
||||
default=ServerArgs.log_stats_interval,
|
||||
help="Log stats interval in second.",
|
||||
)
|
||||
# optional modes
|
||||
parser.add_argument(
|
||||
"--disable-radix-cache",
|
||||
action="store_true",
|
||||
help="Disable RadixAttention",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-flashinfer",
|
||||
action="store_true",
|
||||
help="Enable flashinfer inference kernels",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--disable-regex-jump-forward",
|
||||
action="store_true",
|
||||
@@ -204,6 +210,15 @@ class ServerArgs:
|
||||
def url(self):
|
||||
return f"http://{self.host}:{self.port}"
|
||||
|
||||
def get_optional_modes_logging(self):
|
||||
return (
|
||||
f"disable_radix_cache={self.disable_radix_cache}, "
|
||||
f"enable_flashinfer={self.enable_flashinfer}, "
|
||||
f"disable_regex_jump_forward={self.disable_regex_jump_forward}, "
|
||||
f"disable_disk_cache={self.disable_disk_cache}, "
|
||||
f"attention_reduce_in_fp32={self.attention_reduce_in_fp32}"
|
||||
)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PortArgs:
|
||||
|
||||
Reference in New Issue
Block a user