Revert "Fix memory leak when doing chunked prefill" (#1797)
This commit is contained in:
@@ -15,7 +15,7 @@ class GlobalConfig:
|
||||
|
||||
# Runtime constants: New generation token ratio estimation
|
||||
self.init_new_token_ratio = 0.7
|
||||
self.min_new_token_ratio = 0.1
|
||||
self.base_min_new_token_ratio = 0.1
|
||||
self.new_token_ratio_decay = 0.001
|
||||
|
||||
# Runtime constants: others
|
||||
@@ -32,15 +32,5 @@ class GlobalConfig:
|
||||
self.enable_precache_with_tracing = True
|
||||
self.enable_parallel_encoding = True
|
||||
|
||||
def adjust_new_token_ratio(self, schedule_conservativeness=1):
|
||||
assert schedule_conservativeness >= 0, "Invalid schedule_conservativeness"
|
||||
min_new_token_ratio = min(
|
||||
self.min_new_token_ratio * schedule_conservativeness,
|
||||
1.0,
|
||||
)
|
||||
init_new_token_ratio = max(self.init_new_token_ratio, min_new_token_ratio)
|
||||
|
||||
return min_new_token_ratio, init_new_token_ratio
|
||||
|
||||
|
||||
global_config = GlobalConfig()
|
||||
|
||||
Reference in New Issue
Block a user