Adjust default mem fraction to avoid OOM (#823)

2024-07-30 01:58:31 -07:00
parent ae5c0fc442
commit e7487b08bc
4 changed files with 22 additions and 17 deletions
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -212,9 +212,14 @@ class ModelRunner:
            )

        if max_num_reqs is None:
-            max_num_reqs = max(
-                int(self.max_total_num_tokens / self.model_config.context_len * 512),
-                2048,
+            max_num_reqs = min(
+                max(
+                    int(
+                        self.max_total_num_tokens / self.model_config.context_len * 512
+                    ),
+                    2048,
+                ),
+                5120,
            )

        self.req_to_token_pool = ReqToTokenPool(