Add sanity check for max_running_requests (#5016)
This commit is contained in:
@@ -116,6 +116,7 @@ class TpModelWorker:
|
|||||||
),
|
),
|
||||||
self.model_runner.req_to_token_pool.size,
|
self.model_runner.req_to_token_pool.size,
|
||||||
)
|
)
|
||||||
|
assert self.max_running_requests > 0, "max_running_request is zero"
|
||||||
self.max_req_len = min(
|
self.max_req_len = min(
|
||||||
self.model_config.context_len - 1,
|
self.model_config.context_len - 1,
|
||||||
self.max_total_num_tokens - 1,
|
self.max_total_num_tokens - 1,
|
||||||
|
|||||||
Reference in New Issue
Block a user