Move args from global_config to environ (#11332)
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
"""Global configurations"""
|
||||
|
||||
import os
|
||||
# FIXME: deprecate this file and move all usage to sglang.srt.environ or sglang.__init__.py
|
||||
|
||||
|
||||
class GlobalConfig:
|
||||
@@ -20,27 +20,6 @@ class GlobalConfig:
|
||||
# Default backend of the language
|
||||
self.default_backend = None
|
||||
|
||||
# Runtime constants: New generation token ratio estimation
|
||||
self.default_init_new_token_ratio = float(
|
||||
os.environ.get("SGLANG_INIT_NEW_TOKEN_RATIO", 0.7)
|
||||
)
|
||||
self.default_min_new_token_ratio_factor = float(
|
||||
os.environ.get("SGLANG_MIN_NEW_TOKEN_RATIO_FACTOR", 0.14)
|
||||
)
|
||||
self.default_new_token_ratio_decay_steps = float(
|
||||
os.environ.get("SGLANG_NEW_TOKEN_RATIO_DECAY_STEPS", 600)
|
||||
)
|
||||
self.torch_empty_cache_interval = float(
|
||||
os.environ.get(
|
||||
"SGLANG_EMPTY_CACHE_INTERVAL", -1
|
||||
) # in seconds. Set if you observe high memory accumulation over a long serving period.
|
||||
)
|
||||
# Runtime constants: others
|
||||
self.retract_decode_steps = 20
|
||||
self.flashinfer_workspace_size = int(
|
||||
os.environ.get("FLASHINFER_WORKSPACE_SIZE", 384 * 1024 * 1024)
|
||||
)
|
||||
|
||||
# Output tokenization configs
|
||||
self.skip_special_tokens_in_output = True
|
||||
self.spaces_between_special_tokens_in_out = True
|
||||
|
||||
Reference in New Issue
Block a user