Use min new token ratio at start (#701)

This commit is contained in:
Liangsheng Yin
2024-07-23 11:52:50 -07:00
committed by GitHub
parent 824a77d04d
commit 268684439b
2 changed files with 2 additions and 5 deletions

View File

@@ -29,7 +29,7 @@ class ServerArgs:
max_prefill_tokens: Optional[int] = None
max_running_requests: Optional[int] = None
schedule_heuristic: str = "lpm"
schedule_conservativeness: float = 0.8
schedule_conservativeness: float = 1.0
# Other runtime options
tp_size: int = 1