make flashinfer workspace larger

This commit is contained in:
Lianmin Zheng
2024-06-21 17:32:36 -07:00
parent d2f8bfb2e1
commit e94e60d6fb

View File

@@ -360,7 +360,7 @@ class ModelRunner:
use_tensor_cores = False
workspace_buffer = torch.empty(
32 * 1024 * 1024, dtype=torch.int8, device="cuda"
128 * 1024 * 1024, dtype=torch.int8, device="cuda"
)
self.flashinfer_prefill_wrapper = BatchPrefillWithPagedKVCacheWrapper(
workspace_buffer, "NHD"