Organize sampling batch info better (#1562)

2024-10-03 18:29:49 -07:00
parent e0b5dbcec1
commit 32eb6e96f2
8 changed files with 43 additions and 35 deletions
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -411,8 +411,8 @@ class ModelRunner:

        device = "cuda"
        self.req_to_token_pool = ReqToTokenPool(
-            max_num_reqs + 1,
-            self.model_config.context_len + 4,
+            size=max_num_reqs + 1,
+            max_context_len=self.model_config.context_len + 4,
            device=device,
        )
        if (