Enable chunked prefill by default (#1040)

This commit is contained in:
Lianmin Zheng
2024-08-14 21:56:20 -07:00
committed by GitHub
parent 8d2d876fc8
commit e86b1ccbf0
4 changed files with 10 additions and 10 deletions

View File

@@ -49,7 +49,7 @@ class ServerArgs:
max_running_requests: Optional[int] = None
max_num_reqs: Optional[int] = None
max_total_tokens: Optional[int] = None
chunked_prefill_size: int = -1
chunked_prefill_size: int = 8192
max_prefill_tokens: int = 16384
schedule_policy: str = "lpm"
schedule_conservativeness: float = 1.0