Organize server_args (#277)

2024-03-11 20:06:52 +08:00
parent faba293a0d
commit 1b35547927
12 changed files with 92 additions and 34 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -18,7 +18,6 @@ class ServerArgs:
    max_prefill_num_token: Optional[int] = None
    context_length: Optional[int] = None
    tp_size: int = 1
-    model_mode: List[str] = ()
    schedule_heuristic: str = "lpm"
    schedule_conservativeness: float = 1.0
    attention_reduce_in_fp32: bool = False
@@ -27,6 +26,10 @@ class ServerArgs:
    disable_log_stats: bool = False
    log_stats_interval: int = 10
    log_level: str = "info"
+
+    # optional modes
+    disable_radix_cache: bool = False
+    enable_flashinfer: bool = False
    disable_regex_jump_forward: bool = False
    disable_disk_cache: bool = False

@@ -131,14 +134,6 @@ class ServerArgs:
            default=ServerArgs.tp_size,
            help="Tensor parallelism degree.",
        )
-        parser.add_argument(
-            "--model-mode",
-            type=str,
-            default=[],
-            nargs="+",
-            choices=["flashinfer", "no-cache"],
-            help="Model mode: [flashinfer, no-cache]",
-        )
        parser.add_argument(
            "--schedule-heuristic",
            type=str,
@@ -185,6 +180,17 @@ class ServerArgs:
            default=ServerArgs.log_stats_interval,
            help="Log stats interval in second.",
        )
+        # optional modes
+        parser.add_argument(
+            "--disable-radix-cache",
+            action="store_true",
+            help="Disable RadixAttention",
+        )
+        parser.add_argument(
+            "--enable-flashinfer",
+            action="store_true",
+            help="Enable flashinfer inference kernels",
+        )
        parser.add_argument(
            "--disable-regex-jump-forward",
            action="store_true",
@@ -204,6 +210,15 @@ class ServerArgs:
    def url(self):
        return f"http://{self.host}:{self.port}"

+    def get_optional_modes_logging(self):
+        return (
+            f"disable_radix_cache={self.disable_radix_cache}, "
+            f"enable_flashinfer={self.enable_flashinfer}, "
+            f"disable_regex_jump_forward={self.disable_regex_jump_forward}, "
+            f"disable_disk_cache={self.disable_disk_cache}, "
+            f"attention_reduce_in_fp32={self.attention_reduce_in_fp32}"
+        )
+

@dataclasses.dataclass
 class PortArgs: