From e94e60d6fbb39d967638347c01a711cbe82e2c42 Mon Sep 17 00:00:00 2001
From: Lianmin Zheng <lianminzheng@gmail.com>
Date: Fri, 21 Jun 2024 17:32:36 -0700
Subject: [PATCH] make flashinfer workspace larger

---
 python/sglang/srt/managers/controller/model_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/sglang/srt/managers/controller/model_runner.py b/python/sglang/srt/managers/controller/model_runner.py
index ecca79976..942f29070 100644
--- a/python/sglang/srt/managers/controller/model_runner.py
+++ b/python/sglang/srt/managers/controller/model_runner.py
@@ -360,7 +360,7 @@ class ModelRunner:
                 use_tensor_cores = False
 
             workspace_buffer = torch.empty(
-                32 * 1024 * 1024, dtype=torch.int8, device="cuda"
+                128 * 1024 * 1024, dtype=torch.int8, device="cuda"
             )
             self.flashinfer_prefill_wrapper = BatchPrefillWithPagedKVCacheWrapper(
                 workspace_buffer, "NHD"