Simplify the event loop and expose --num-continuous-decode-steps as an argument (#1652)

This commit is contained in:
Lianmin Zheng
2024-10-12 21:35:30 -07:00
committed by GitHub
parent 9610fcd469
commit 7ee6c259ff
5 changed files with 85 additions and 62 deletions

View File

@@ -19,7 +19,6 @@ class GlobalConfig:
self.new_token_ratio_decay = 0.001
# Runtime constants: others
self.num_continue_decode_steps = 10
self.retract_decode_steps = 20
self.flashinfer_workspace_size = os.environ.get(
"FLASHINFER_WORKSPACE_SIZE", 384 * 1024 * 1024