2024-01-08 04:37:50 +00:00
|
|
|
"""Global configurations"""
|
|
|
|
|
|
2024-09-15 08:52:18 -07:00
|
|
|
import os
|
|
|
|
|
|
2024-01-08 04:37:50 +00:00
|
|
|
|
|
|
|
|
class GlobalConfig:
|
2025-03-03 07:02:14 -08:00
|
|
|
"""
|
|
|
|
|
Store some global constants.
|
|
|
|
|
|
|
|
|
|
See also python/sglang/srt/managers/schedule_batch.py::global_server_args_dict, which stores
|
|
|
|
|
many global runtime arguments as well.
|
|
|
|
|
"""
|
|
|
|
|
|
2024-01-08 04:37:50 +00:00
|
|
|
def __init__(self):
|
|
|
|
|
# Verbosity level
|
|
|
|
|
# 0: do not output anything
|
|
|
|
|
# 2: output final text after every run
|
|
|
|
|
self.verbosity = 0
|
|
|
|
|
|
2024-07-13 05:29:46 -07:00
|
|
|
# Default backend of the language
|
2024-01-08 04:37:50 +00:00
|
|
|
self.default_backend = None
|
|
|
|
|
|
2024-07-13 05:29:46 -07:00
|
|
|
# Runtime constants: New generation token ratio estimation
|
2024-10-26 16:39:41 -07:00
|
|
|
self.default_init_new_token_ratio = float(
|
|
|
|
|
os.environ.get("SGLANG_INIT_NEW_TOKEN_RATIO", 0.7)
|
|
|
|
|
)
|
|
|
|
|
self.default_min_new_token_ratio_factor = float(
|
|
|
|
|
os.environ.get("SGLANG_MIN_NEW_TOKEN_RATIO_FACTOR", 0.14)
|
|
|
|
|
)
|
|
|
|
|
self.default_new_token_ratio_decay_steps = float(
|
|
|
|
|
os.environ.get("SGLANG_NEW_TOKEN_RATIO_DECAY_STEPS", 600)
|
|
|
|
|
)
|
2024-07-13 05:29:46 -07:00
|
|
|
|
2024-07-15 02:01:09 -07:00
|
|
|
# Runtime constants: others
|
2024-07-23 22:06:02 -07:00
|
|
|
self.retract_decode_steps = 20
|
2024-09-15 08:52:18 -07:00
|
|
|
self.flashinfer_workspace_size = os.environ.get(
|
|
|
|
|
"FLASHINFER_WORKSPACE_SIZE", 384 * 1024 * 1024
|
|
|
|
|
)
|
2024-07-13 05:29:46 -07:00
|
|
|
|
|
|
|
|
# Output tokenization configs
|
2024-01-08 04:37:50 +00:00
|
|
|
self.skip_special_tokens_in_output = True
|
2024-05-01 07:17:12 +08:00
|
|
|
self.spaces_between_special_tokens_in_out = True
|
2024-01-08 04:37:50 +00:00
|
|
|
|
2025-03-03 00:12:04 -08:00
|
|
|
# Language frontend interpreter optimization configs
|
2024-05-13 15:56:00 -07:00
|
|
|
self.enable_precache_with_tracing = True
|
2024-01-08 04:37:50 +00:00
|
|
|
self.enable_parallel_encoding = True
|
|
|
|
|
|
2024-07-13 23:39:37 -07:00
|
|
|
|
2024-01-08 04:37:50 +00:00
|
|
|
global_config = GlobalConfig()
|