make flashinfer workspace larger
This commit is contained in:
@@ -360,7 +360,7 @@ class ModelRunner:
|
|||||||
use_tensor_cores = False
|
use_tensor_cores = False
|
||||||
|
|
||||||
workspace_buffer = torch.empty(
|
workspace_buffer = torch.empty(
|
||||||
32 * 1024 * 1024, dtype=torch.int8, device="cuda"
|
128 * 1024 * 1024, dtype=torch.int8, device="cuda"
|
||||||
)
|
)
|
||||||
self.flashinfer_prefill_wrapper = BatchPrefillWithPagedKVCacheWrapper(
|
self.flashinfer_prefill_wrapper = BatchPrefillWithPagedKVCacheWrapper(
|
||||||
workspace_buffer, "NHD"
|
workspace_buffer, "NHD"
|
||||||
|
|||||||
Reference in New Issue
Block a user