Use min new token ratio at start (#701)
This commit is contained in:
@@ -161,15 +161,12 @@ class ModelTpServer:
|
|||||||
assert (
|
assert (
|
||||||
server_args.schedule_conservativeness >= 0
|
server_args.schedule_conservativeness >= 0
|
||||||
), "Invalid schedule_conservativeness"
|
), "Invalid schedule_conservativeness"
|
||||||
self.new_token_ratio = min(
|
|
||||||
global_config.base_new_token_ratio * server_args.schedule_conservativeness,
|
|
||||||
1.0,
|
|
||||||
)
|
|
||||||
self.min_new_token_ratio = min(
|
self.min_new_token_ratio = min(
|
||||||
global_config.base_min_new_token_ratio
|
global_config.base_min_new_token_ratio
|
||||||
* server_args.schedule_conservativeness,
|
* server_args.schedule_conservativeness,
|
||||||
1.0,
|
1.0,
|
||||||
)
|
)
|
||||||
|
self.new_token_ratio = self.min_new_token_ratio
|
||||||
self.new_token_ratio_decay = global_config.new_token_ratio_decay
|
self.new_token_ratio_decay = global_config.new_token_ratio_decay
|
||||||
self.new_token_ratio_recovery = global_config.new_token_ratio_recovery
|
self.new_token_ratio_recovery = global_config.new_token_ratio_recovery
|
||||||
|
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ class ServerArgs:
|
|||||||
max_prefill_tokens: Optional[int] = None
|
max_prefill_tokens: Optional[int] = None
|
||||||
max_running_requests: Optional[int] = None
|
max_running_requests: Optional[int] = None
|
||||||
schedule_heuristic: str = "lpm"
|
schedule_heuristic: str = "lpm"
|
||||||
schedule_conservativeness: float = 0.8
|
schedule_conservativeness: float = 1.0
|
||||||
|
|
||||||
# Other runtime options
|
# Other runtime options
|
||||||
tp_size: int = 1
|
tp_size: int = 1
|
||||||
|
|||||||
Reference in New Issue
Block a user