Add max_prefill_num_token into server arguments (#133)

2024-02-03 02:35:54 -08:00
parent 67be11c790
commit e095b16236
3 changed files with 12 additions and 2 deletions
--- a/python/sglang/srt/server.py
+++ b/python/sglang/srt/server.py
@@ -430,7 +430,8 @@ class Runtime:
        load_format: str = "auto",
        tokenizer_mode: str = "auto",
        trust_remote_code: bool = True,
-        mem_fraction_static: float = 0.9,
+        mem_fraction_static: float = ServerArgs.mem_fraction_static,
+        max_prefill_num_token: int = ServerArgs.max_prefill_num_token,
        tp_size: int = 1,
        model_mode: List[str] = (),
        schedule_heuristic: str = "lpm",
@@ -451,6 +452,7 @@ class Runtime:
            tokenizer_mode=tokenizer_mode,
            trust_remote_code=trust_remote_code,
            mem_fraction_static=mem_fraction_static,
+            max_prefill_num_token=max_prefill_num_token,
            tp_size=tp_size,
            model_mode=model_mode,
            schedule_heuristic=schedule_heuristic,