Add sanity check for max_running_requests (#5016)

This commit is contained in:
fzyzcjy
2025-04-21 08:56:49 +08:00
committed by GitHub
parent b868526d94
commit 5fc4b6004e

View File

@@ -116,6 +116,7 @@ class TpModelWorker:
),
self.model_runner.req_to_token_pool.size,
)
assert self.max_running_requests > 0, "max_running_request is zero"
self.max_req_len = min(
self.model_config.context_len - 1,
self.max_total_num_tokens - 1,