Revert "Fix memory leak when doing chunked prefill" (#1797)

This commit is contained in:
Lianmin Zheng
2024-10-25 10:24:44 -07:00
committed by GitHub
parent 40900baea7
commit c555ce2ca2
6 changed files with 69 additions and 183 deletions

View File

@@ -15,7 +15,7 @@ class GlobalConfig:
# Runtime constants: New generation token ratio estimation
self.init_new_token_ratio = 0.7
self.min_new_token_ratio = 0.1
self.base_min_new_token_ratio = 0.1
self.new_token_ratio_decay = 0.001
# Runtime constants: others
@@ -32,15 +32,5 @@ class GlobalConfig:
self.enable_precache_with_tracing = True
self.enable_parallel_encoding = True
def adjust_new_token_ratio(self, schedule_conservativeness=1):
assert schedule_conservativeness >= 0, "Invalid schedule_conservativeness"
min_new_token_ratio = min(
self.min_new_token_ratio * schedule_conservativeness,
1.0,
)
init_new_token_ratio = max(self.init_new_token_ratio, min_new_token_ratio)
return min_new_token_ratio, init_new_token_ratio
global_config = GlobalConfig()