[Env] minimal version for organizing envs (#10479)
This commit is contained in:
275
python/sglang/environ.py
Normal file
275
python/sglang/environ.py
Normal file
@@ -0,0 +1,275 @@
|
||||
import os
|
||||
import subprocess
|
||||
import warnings
|
||||
from contextlib import ExitStack, contextmanager
|
||||
from typing import Any
|
||||
|
||||
|
||||
class EnvField:
|
||||
def __init__(self, default: Any):
|
||||
self.default = default
|
||||
# NOTE: we use None to indicate whether the value is set or not
|
||||
# If the value is manually set to None, we need mark it as _set_to_none.
|
||||
# Always use clear() to reset the value, which leads to the default fallback.
|
||||
self._set_to_none = False
|
||||
|
||||
def __set_name__(self, owner, name):
|
||||
self.name = name
|
||||
|
||||
def parse(self, value: str) -> Any:
|
||||
raise NotImplementedError()
|
||||
|
||||
def get(self) -> Any:
|
||||
value = os.getenv(self.name)
|
||||
if self._set_to_none:
|
||||
assert value is None
|
||||
return None
|
||||
|
||||
if value is None:
|
||||
return self.default
|
||||
|
||||
try:
|
||||
return self.parse(value)
|
||||
except ValueError as e:
|
||||
warnings.warn(
|
||||
f'Invalid value for {self.name}: {e}, using default "{self.default}"'
|
||||
)
|
||||
return self.default
|
||||
|
||||
def is_set(self):
|
||||
# NOTE: If None is manually set, it is considered as set.
|
||||
return self.name in os.environ or self._set_to_none
|
||||
|
||||
def get_set_value_or(self, or_value: Any):
|
||||
# NOTE: Ugly usage, but only way to get custom default value.
|
||||
return self.get() if self.is_set() else or_value
|
||||
|
||||
def set(self, value: Any):
|
||||
if value is None:
|
||||
self._set_to_none = True
|
||||
os.environ.pop(self.name, None)
|
||||
else:
|
||||
self._set_to_none = False
|
||||
os.environ[self.name] = str(value)
|
||||
|
||||
@contextmanager
|
||||
def override(self, value: Any):
|
||||
backup_present = self.name in os.environ
|
||||
backup_value = os.environ.get(self.name)
|
||||
backup_set_to_none = self._set_to_none
|
||||
self.set(value)
|
||||
yield
|
||||
if backup_present:
|
||||
os.environ[self.name] = backup_value
|
||||
else:
|
||||
os.environ.pop(self.name, None)
|
||||
self._set_to_none = backup_set_to_none
|
||||
|
||||
def clear(self):
|
||||
os.environ.pop(self.name, None)
|
||||
self._set_to_none = False
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
return self.get()
|
||||
|
||||
|
||||
class EnvStr(EnvField):
|
||||
def parse(self, value: str) -> str:
|
||||
return value
|
||||
|
||||
|
||||
class EnvBool(EnvField):
|
||||
def parse(self, value: str) -> bool:
|
||||
value = value.lower()
|
||||
if value in ["true", "1", "yes", "y"]:
|
||||
return True
|
||||
if value in ["false", "0", "no", "n"]:
|
||||
return False
|
||||
raise ValueError(f'"{value}" is not a valid boolean value')
|
||||
|
||||
|
||||
class EnvInt(EnvField):
|
||||
def parse(self, value: str) -> int:
|
||||
try:
|
||||
return int(value)
|
||||
except ValueError:
|
||||
raise ValueError(f'"{value}" is not a valid integer value')
|
||||
|
||||
|
||||
class EnvFloat(EnvField):
|
||||
def parse(self, value: str) -> float:
|
||||
try:
|
||||
return float(value)
|
||||
except ValueError:
|
||||
raise ValueError(f'"{value}" is not a valid float value')
|
||||
|
||||
|
||||
class Envs:
|
||||
# fmt: off
|
||||
|
||||
# Model & File Download
|
||||
SGLANG_USE_MODELSCOPE = EnvBool(False)
|
||||
|
||||
# Test & Debug
|
||||
SGLANG_IS_IN_CI = EnvBool(False)
|
||||
SGLANG_AMD_CI = EnvBool(False)
|
||||
SGLANG_TEST_RETRACT = EnvBool(False)
|
||||
SGLANG_SET_CPU_AFFINITY = EnvBool(False)
|
||||
SGLANG_PROFILE_WITH_STACK = EnvBool(True)
|
||||
SGLANG_RECORD_STEP_TIME = EnvBool(False)
|
||||
SGLANG_GC_LOG = EnvBool(False)
|
||||
SGLANG_FORCE_SHUTDOWN = EnvBool(False)
|
||||
SGLANG_DEBUG_MEMORY_POOL = EnvBool(False)
|
||||
SGLANG_TEST_REQUEST_TIME_STATS = EnvBool(False)
|
||||
SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK = EnvBool(False)
|
||||
SGLANG_DISABLE_REQUEST_LOGGING = EnvBool(False)
|
||||
|
||||
# Model Parallel
|
||||
SGLANG_USE_MESSAGE_QUEUE_BROADCASTER = EnvBool(True)
|
||||
|
||||
# Constrained Decoding
|
||||
SGLANG_DISABLE_OUTLINES_DISK_CACHE = EnvBool(True)
|
||||
SGLANG_GRAMMAR_TIMEOUT = EnvFloat(300)
|
||||
|
||||
# Hi-Cache
|
||||
SGLANG_HICACHE_HF3FS_CONFIG_PATH = EnvStr(None)
|
||||
|
||||
# Mooncake KV Transfer
|
||||
SGLANG_MOONCAKE_CUSTOM_MEM_POOL = EnvBool(False)
|
||||
ENABLE_ASCEND_TRANSFER_WITH_MOONCAKE = EnvBool(False)
|
||||
|
||||
# AMD & ROCm
|
||||
SGLANG_USE_AITER = EnvBool(False)
|
||||
SGLANG_ROCM_FUSED_DECODE_MLA = EnvBool(False)
|
||||
|
||||
# Quantization
|
||||
SGLANG_INT4_WEIGHT = EnvBool(False)
|
||||
SGLANG_CPU_QUANTIZATION = EnvBool(False)
|
||||
SGLANG_USE_DYNAMIC_MXFP4_LINEAR = EnvBool(False)
|
||||
SGLANG_FORCE_FP8_MARLIN = EnvBool(False)
|
||||
|
||||
# Flashinfer
|
||||
SGLANG_IS_FLASHINFER_AVAILABLE = EnvBool(True)
|
||||
SGLANG_ENABLE_FLASHINFER_GEMM = EnvBool(False)
|
||||
|
||||
# Triton
|
||||
SGLANG_TRITON_DECODE_ATTN_STATIC_KV_SPLITS = EnvBool(False)
|
||||
|
||||
# Torch Compile
|
||||
SGLANG_ENABLE_TORCH_COMPILE = EnvBool(False)
|
||||
|
||||
# EPLB
|
||||
SGLANG_EXPERT_LOCATION_UPDATER_LOG_INPUT = EnvBool(False)
|
||||
SGLANG_EXPERT_LOCATION_UPDATER_CANARY = EnvBool(False)
|
||||
SGLANG_EXPERT_LOCATION_UPDATER_LOG_METRICS = EnvBool(False)
|
||||
SGLANG_LOG_EXPERT_LOCATION_METADATA = EnvBool(False)
|
||||
|
||||
# TBO
|
||||
SGLANG_TBO_DEBUG = EnvBool(False)
|
||||
|
||||
# DeepGemm
|
||||
SGLANG_ENABLE_JIT_DEEPGEMM = EnvBool(True)
|
||||
SGLANG_JIT_DEEPGEMM_PRECOMPILE = EnvBool(True)
|
||||
SGLANG_JIT_DEEPGEMM_COMPILE_WORKERS = EnvInt(4)
|
||||
SGLANG_IN_DEEPGEMM_PRECOMPILE_STAGE = EnvBool(False)
|
||||
SGLANG_DG_CACHE_DIR = EnvStr(os.path.expanduser("~/.cache/deep_gemm"))
|
||||
SGLANG_DG_USE_NVRTC = EnvBool(False)
|
||||
SGLANG_USE_DEEPGEMM_BMM = EnvBool(False)
|
||||
|
||||
# sgl-kernel
|
||||
SGLANG_SKIP_SGL_KERNEL_VERSION_CHECK = EnvBool(False)
|
||||
|
||||
# vLLM dependencies
|
||||
USE_VLLM_CUSTOM_ALLREDUCE = EnvBool(False)
|
||||
USE_VLLM_CUTLASS_W8A8_FP8_KERNEL = EnvBool(False)
|
||||
|
||||
USE_TRITON_W8A8_FP8_KERNEL = EnvBool(False)
|
||||
RETURN_ORIGINAL_LOGPROB = EnvBool(False)
|
||||
SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN = EnvBool(False)
|
||||
SGLANG_MOE_PADDING = EnvBool(False)
|
||||
SGLANG_CUTLASS_MOE = EnvBool(False)
|
||||
HF_HUB_DISABLE_XET = EnvBool(False)
|
||||
DISABLE_OPENAPI_DOC = EnvBool(False)
|
||||
SGLANG_ENABLE_TORCH_INFERENCE_MODE = EnvBool(False)
|
||||
SGLANG_IS_FIRST_RANK_ON_NODE = EnvBool(True)
|
||||
SGLANG_SUPPORT_CUTLASS_BLOCK_FP8 = EnvBool(False)
|
||||
SGLANG_SYNC_TOKEN_IDS_ACROSS_TP = EnvBool(False)
|
||||
SGLANG_ENABLE_COLOCATED_BATCH_GEN = EnvBool(False)
|
||||
|
||||
# fmt: on
|
||||
|
||||
|
||||
envs = Envs()
|
||||
|
||||
|
||||
def _convert_SGL_to_SGLANG():
|
||||
for key, value in os.environ.items():
|
||||
if key.startswith("SGL_"):
|
||||
new_key = key.replace("SGL_", "SGLANG_", 1)
|
||||
warnings.warn(
|
||||
f"Environment variable {key} is deprecated, please use {new_key}"
|
||||
)
|
||||
os.environ[new_key] = value
|
||||
|
||||
|
||||
_convert_SGL_to_SGLANG()
|
||||
|
||||
|
||||
def example_with_exit_stack():
|
||||
# Use this style of context manager in unit test
|
||||
exit_stack = ExitStack()
|
||||
exit_stack.enter_context(envs.SGLANG_TEST_RETRACT.override(False))
|
||||
assert envs.SGLANG_TEST_RETRACT.value is False
|
||||
exit_stack.close()
|
||||
assert envs.SGLANG_TEST_RETRACT.value is None
|
||||
|
||||
|
||||
def example_with_subprocess():
|
||||
command = ["python", "-c", "import os; print(os.getenv('SGLANG_TEST_RETRACT'))"]
|
||||
with envs.SGLANG_TEST_RETRACT.override(True):
|
||||
process = subprocess.Popen(
|
||||
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
)
|
||||
process.wait()
|
||||
output = process.stdout.read().decode("utf-8").strip()
|
||||
assert output == "True"
|
||||
|
||||
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
output = process.stdout.read().decode("utf-8").strip()
|
||||
assert output == "None"
|
||||
|
||||
|
||||
def examples():
|
||||
# Example usage for envs
|
||||
envs.SGLANG_TEST_RETRACT.clear()
|
||||
assert envs.SGLANG_TEST_RETRACT.value is False
|
||||
|
||||
envs.SGLANG_TEST_RETRACT.set(None)
|
||||
assert envs.SGLANG_TEST_RETRACT.is_set() and envs.SGLANG_TEST_RETRACT.value is None
|
||||
|
||||
envs.SGLANG_TEST_RETRACT.clear()
|
||||
assert not envs.SGLANG_TEST_RETRACT.is_set()
|
||||
|
||||
envs.SGLANG_TEST_RETRACT.set(True)
|
||||
assert envs.SGLANG_TEST_RETRACT.value is True
|
||||
|
||||
with envs.SGLANG_TEST_RETRACT.override(None):
|
||||
assert (
|
||||
envs.SGLANG_TEST_RETRACT.is_set() and envs.SGLANG_TEST_RETRACT.value is None
|
||||
)
|
||||
|
||||
assert envs.SGLANG_TEST_RETRACT.value is True
|
||||
|
||||
envs.SGLANG_TEST_RETRACT.set(None)
|
||||
with envs.SGLANG_TEST_RETRACT.override(True):
|
||||
assert envs.SGLANG_TEST_RETRACT.value is True
|
||||
|
||||
assert envs.SGLANG_TEST_RETRACT.is_set() and envs.SGLANG_TEST_RETRACT.value is None
|
||||
|
||||
example_with_exit_stack()
|
||||
example_with_subprocess()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
examples()
|
||||
@@ -191,6 +191,7 @@ _warned_bool_env_var_keys = set()
|
||||
|
||||
|
||||
def get_bool_env_var(name: str, default: str = "false") -> bool:
|
||||
# FIXME: move your environment variable to sglang.environ
|
||||
value = os.getenv(name, default)
|
||||
value = value.lower()
|
||||
|
||||
@@ -208,6 +209,7 @@ def get_bool_env_var(name: str, default: str = "false") -> bool:
|
||||
|
||||
|
||||
def get_int_env_var(name: str, default: int = 0) -> int:
|
||||
# FIXME: move your environment variable to sglang.environ
|
||||
value = os.getenv(name)
|
||||
if value is None or not value.strip():
|
||||
return default
|
||||
|
||||
Reference in New Issue
Block a user