Make req_pool_indices on CPU (#960)

This commit is contained in:
Liangsheng Yin
2024-08-07 01:41:25 -07:00
committed by GitHub
parent 05abd1261c
commit 7fa54a1ab3
4 changed files with 110 additions and 114 deletions

View File

@@ -19,7 +19,6 @@ class GlobalConfig:
self.init_new_token_ratio = 0.7
self.base_min_new_token_ratio = 0.1
self.new_token_ratio_decay = 0.001
self.new_token_ratio_recovery = 0.05
# Runtime constants: The threshold (number of tokens) to trigger layer-wise cuda sync.
# This can improve the speed for large batch sizes during prefill.