Use a single workspace for flashinfer (#1077)

This commit is contained in:
Lianmin Zheng
2024-08-14 19:25:37 -07:00
committed by GitHub
parent 6767e2229f
commit 326df4bab2
5 changed files with 16 additions and 18 deletions

View File

@@ -27,7 +27,7 @@ class GlobalConfig:
# Runtime constants: others
self.num_continue_decode_steps = 10
self.retract_decode_steps = 20
self.flashinfer_workspace_size = 192 * 1024 * 1024
self.flashinfer_workspace_size = 384 * 1024 * 1024
# Output tokenization configs
self.skip_special_tokens_in_output = True