Adjust default mem fraction to avoid OOM (#823)

This commit is contained in:
Ying Sheng
2024-07-30 01:58:31 -07:00
committed by GitHub
parent ae5c0fc442
commit e7487b08bc
4 changed files with 22 additions and 17 deletions

View File

@@ -212,9 +212,14 @@ class ModelRunner:
)
if max_num_reqs is None:
max_num_reqs = max(
int(self.max_total_num_tokens / self.model_config.context_len * 512),
2048,
max_num_reqs = min(
max(
int(
self.max_total_num_tokens / self.model_config.context_len * 512
),
2048,
),
5120,
)
self.req_to_token_pool = ReqToTokenPool(