Add max_prefill_num_token into server arguments (#133)
This commit is contained in:
@@ -430,7 +430,8 @@ class Runtime:
|
||||
load_format: str = "auto",
|
||||
tokenizer_mode: str = "auto",
|
||||
trust_remote_code: bool = True,
|
||||
mem_fraction_static: float = 0.9,
|
||||
mem_fraction_static: float = ServerArgs.mem_fraction_static,
|
||||
max_prefill_num_token: int = ServerArgs.max_prefill_num_token,
|
||||
tp_size: int = 1,
|
||||
model_mode: List[str] = (),
|
||||
schedule_heuristic: str = "lpm",
|
||||
@@ -451,6 +452,7 @@ class Runtime:
|
||||
tokenizer_mode=tokenizer_mode,
|
||||
trust_remote_code=trust_remote_code,
|
||||
mem_fraction_static=mem_fraction_static,
|
||||
max_prefill_num_token=max_prefill_num_token,
|
||||
tp_size=tp_size,
|
||||
model_mode=model_mode,
|
||||
schedule_heuristic=schedule_heuristic,
|
||||
|
||||
Reference in New Issue
Block a user