Improve the user control of new_token_ratio (#1811)
This commit is contained in:
@@ -14,9 +14,15 @@ class GlobalConfig:
|
||||
self.default_backend = None
|
||||
|
||||
# Runtime constants: New generation token ratio estimation
|
||||
self.init_new_token_ratio = 0.7
|
||||
self.base_min_new_token_ratio = 0.1
|
||||
self.new_token_ratio_decay = 0.001
|
||||
self.default_init_new_token_ratio = float(
|
||||
os.environ.get("SGLANG_INIT_NEW_TOKEN_RATIO", 0.7)
|
||||
)
|
||||
self.default_min_new_token_ratio_factor = float(
|
||||
os.environ.get("SGLANG_MIN_NEW_TOKEN_RATIO_FACTOR", 0.14)
|
||||
)
|
||||
self.default_new_token_ratio_decay_steps = float(
|
||||
os.environ.get("SGLANG_NEW_TOKEN_RATIO_DECAY_STEPS", 600)
|
||||
)
|
||||
|
||||
# Runtime constants: others
|
||||
self.retract_decode_steps = 20
|
||||
|
||||
Reference in New Issue
Block a user