Fuse top_k and top_k in the sampler (#1457)
This commit is contained in:
@@ -400,8 +400,8 @@ class ModelRunner:
|
||||
)
|
||||
|
||||
self.req_to_token_pool = ReqToTokenPool(
|
||||
max_num_reqs,
|
||||
self.model_config.context_len + 8,
|
||||
max_num_reqs + 1,
|
||||
self.model_config.context_len + 4,
|
||||
)
|
||||
if (
|
||||
self.model_config.attention_arch == AttentionArch.MLA
|
||||
|
||||
Reference in New Issue
Block a user