Organize sampling batch info better (#1562)

This commit is contained in:
Lianmin Zheng
2024-10-03 18:29:49 -07:00
committed by GitHub
parent e0b5dbcec1
commit 32eb6e96f2
8 changed files with 43 additions and 35 deletions

View File

@@ -411,8 +411,8 @@ class ModelRunner:
device = "cuda"
self.req_to_token_pool = ReqToTokenPool(
max_num_reqs + 1,
self.model_config.context_len + 4,
size=max_num_reqs + 1,
max_context_len=self.model_config.context_len + 4,
device=device,
)
if (