276 lines
8.2 KiB
Python
276 lines
8.2 KiB
Python
import os
|
|
import subprocess
|
|
import warnings
|
|
from contextlib import ExitStack, contextmanager
|
|
from typing import Any
|
|
|
|
|
|
class EnvField:
|
|
def __init__(self, default: Any):
|
|
self.default = default
|
|
# NOTE: we use None to indicate whether the value is set or not
|
|
# If the value is manually set to None, we need mark it as _set_to_none.
|
|
# Always use clear() to reset the value, which leads to the default fallback.
|
|
self._set_to_none = False
|
|
|
|
def __set_name__(self, owner, name):
|
|
self.name = name
|
|
|
|
def parse(self, value: str) -> Any:
|
|
raise NotImplementedError()
|
|
|
|
def get(self) -> Any:
|
|
value = os.getenv(self.name)
|
|
if self._set_to_none:
|
|
assert value is None
|
|
return None
|
|
|
|
if value is None:
|
|
return self.default
|
|
|
|
try:
|
|
return self.parse(value)
|
|
except ValueError as e:
|
|
warnings.warn(
|
|
f'Invalid value for {self.name}: {e}, using default "{self.default}"'
|
|
)
|
|
return self.default
|
|
|
|
def is_set(self):
|
|
# NOTE: If None is manually set, it is considered as set.
|
|
return self.name in os.environ or self._set_to_none
|
|
|
|
def get_set_value_or(self, or_value: Any):
|
|
# NOTE: Ugly usage, but only way to get custom default value.
|
|
return self.get() if self.is_set() else or_value
|
|
|
|
def set(self, value: Any):
|
|
if value is None:
|
|
self._set_to_none = True
|
|
os.environ.pop(self.name, None)
|
|
else:
|
|
self._set_to_none = False
|
|
os.environ[self.name] = str(value)
|
|
|
|
@contextmanager
|
|
def override(self, value: Any):
|
|
backup_present = self.name in os.environ
|
|
backup_value = os.environ.get(self.name)
|
|
backup_set_to_none = self._set_to_none
|
|
self.set(value)
|
|
yield
|
|
if backup_present:
|
|
os.environ[self.name] = backup_value
|
|
else:
|
|
os.environ.pop(self.name, None)
|
|
self._set_to_none = backup_set_to_none
|
|
|
|
def clear(self):
|
|
os.environ.pop(self.name, None)
|
|
self._set_to_none = False
|
|
|
|
@property
|
|
def value(self):
|
|
return self.get()
|
|
|
|
|
|
class EnvStr(EnvField):
|
|
def parse(self, value: str) -> str:
|
|
return value
|
|
|
|
|
|
class EnvBool(EnvField):
|
|
def parse(self, value: str) -> bool:
|
|
value = value.lower()
|
|
if value in ["true", "1", "yes", "y"]:
|
|
return True
|
|
if value in ["false", "0", "no", "n"]:
|
|
return False
|
|
raise ValueError(f'"{value}" is not a valid boolean value')
|
|
|
|
|
|
class EnvInt(EnvField):
|
|
def parse(self, value: str) -> int:
|
|
try:
|
|
return int(value)
|
|
except ValueError:
|
|
raise ValueError(f'"{value}" is not a valid integer value')
|
|
|
|
|
|
class EnvFloat(EnvField):
|
|
def parse(self, value: str) -> float:
|
|
try:
|
|
return float(value)
|
|
except ValueError:
|
|
raise ValueError(f'"{value}" is not a valid float value')
|
|
|
|
|
|
class Envs:
|
|
# fmt: off
|
|
|
|
# Model & File Download
|
|
SGLANG_USE_MODELSCOPE = EnvBool(False)
|
|
|
|
# Test & Debug
|
|
SGLANG_IS_IN_CI = EnvBool(False)
|
|
SGLANG_AMD_CI = EnvBool(False)
|
|
SGLANG_TEST_RETRACT = EnvBool(False)
|
|
SGLANG_SET_CPU_AFFINITY = EnvBool(False)
|
|
SGLANG_PROFILE_WITH_STACK = EnvBool(True)
|
|
SGLANG_RECORD_STEP_TIME = EnvBool(False)
|
|
SGLANG_GC_LOG = EnvBool(False)
|
|
SGLANG_FORCE_SHUTDOWN = EnvBool(False)
|
|
SGLANG_DEBUG_MEMORY_POOL = EnvBool(False)
|
|
SGLANG_TEST_REQUEST_TIME_STATS = EnvBool(False)
|
|
SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK = EnvBool(False)
|
|
SGLANG_DISABLE_REQUEST_LOGGING = EnvBool(False)
|
|
|
|
# Model Parallel
|
|
SGLANG_USE_MESSAGE_QUEUE_BROADCASTER = EnvBool(True)
|
|
|
|
# Constrained Decoding
|
|
SGLANG_DISABLE_OUTLINES_DISK_CACHE = EnvBool(True)
|
|
SGLANG_GRAMMAR_TIMEOUT = EnvFloat(300)
|
|
|
|
# Hi-Cache
|
|
SGLANG_HICACHE_HF3FS_CONFIG_PATH = EnvStr(None)
|
|
|
|
# Mooncake KV Transfer
|
|
SGLANG_MOONCAKE_CUSTOM_MEM_POOL = EnvBool(False)
|
|
ENABLE_ASCEND_TRANSFER_WITH_MOONCAKE = EnvBool(False)
|
|
|
|
# AMD & ROCm
|
|
SGLANG_USE_AITER = EnvBool(False)
|
|
SGLANG_ROCM_FUSED_DECODE_MLA = EnvBool(False)
|
|
|
|
# Quantization
|
|
SGLANG_INT4_WEIGHT = EnvBool(False)
|
|
SGLANG_CPU_QUANTIZATION = EnvBool(False)
|
|
SGLANG_USE_DYNAMIC_MXFP4_LINEAR = EnvBool(False)
|
|
SGLANG_FORCE_FP8_MARLIN = EnvBool(False)
|
|
|
|
# Flashinfer
|
|
SGLANG_IS_FLASHINFER_AVAILABLE = EnvBool(True)
|
|
SGLANG_ENABLE_FLASHINFER_GEMM = EnvBool(False)
|
|
|
|
# Triton
|
|
SGLANG_TRITON_DECODE_ATTN_STATIC_KV_SPLITS = EnvBool(False)
|
|
|
|
# Torch Compile
|
|
SGLANG_ENABLE_TORCH_COMPILE = EnvBool(False)
|
|
|
|
# EPLB
|
|
SGLANG_EXPERT_LOCATION_UPDATER_LOG_INPUT = EnvBool(False)
|
|
SGLANG_EXPERT_LOCATION_UPDATER_CANARY = EnvBool(False)
|
|
SGLANG_EXPERT_LOCATION_UPDATER_LOG_METRICS = EnvBool(False)
|
|
SGLANG_LOG_EXPERT_LOCATION_METADATA = EnvBool(False)
|
|
|
|
# TBO
|
|
SGLANG_TBO_DEBUG = EnvBool(False)
|
|
|
|
# DeepGemm
|
|
SGLANG_ENABLE_JIT_DEEPGEMM = EnvBool(True)
|
|
SGLANG_JIT_DEEPGEMM_PRECOMPILE = EnvBool(True)
|
|
SGLANG_JIT_DEEPGEMM_COMPILE_WORKERS = EnvInt(4)
|
|
SGLANG_IN_DEEPGEMM_PRECOMPILE_STAGE = EnvBool(False)
|
|
SGLANG_DG_CACHE_DIR = EnvStr(os.path.expanduser("~/.cache/deep_gemm"))
|
|
SGLANG_DG_USE_NVRTC = EnvBool(False)
|
|
SGLANG_USE_DEEPGEMM_BMM = EnvBool(False)
|
|
|
|
# sgl-kernel
|
|
SGLANG_SKIP_SGL_KERNEL_VERSION_CHECK = EnvBool(False)
|
|
|
|
# vLLM dependencies
|
|
USE_VLLM_CUSTOM_ALLREDUCE = EnvBool(False)
|
|
USE_VLLM_CUTLASS_W8A8_FP8_KERNEL = EnvBool(False)
|
|
|
|
USE_TRITON_W8A8_FP8_KERNEL = EnvBool(False)
|
|
RETURN_ORIGINAL_LOGPROB = EnvBool(False)
|
|
SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN = EnvBool(False)
|
|
SGLANG_MOE_PADDING = EnvBool(False)
|
|
SGLANG_CUTLASS_MOE = EnvBool(False)
|
|
HF_HUB_DISABLE_XET = EnvBool(False)
|
|
DISABLE_OPENAPI_DOC = EnvBool(False)
|
|
SGLANG_ENABLE_TORCH_INFERENCE_MODE = EnvBool(False)
|
|
SGLANG_IS_FIRST_RANK_ON_NODE = EnvBool(True)
|
|
SGLANG_SUPPORT_CUTLASS_BLOCK_FP8 = EnvBool(False)
|
|
SGLANG_SYNC_TOKEN_IDS_ACROSS_TP = EnvBool(False)
|
|
SGLANG_ENABLE_COLOCATED_BATCH_GEN = EnvBool(False)
|
|
|
|
# fmt: on
|
|
|
|
|
|
envs = Envs()
|
|
|
|
|
|
def _convert_SGL_to_SGLANG():
|
|
for key, value in os.environ.items():
|
|
if key.startswith("SGL_"):
|
|
new_key = key.replace("SGL_", "SGLANG_", 1)
|
|
warnings.warn(
|
|
f"Environment variable {key} is deprecated, please use {new_key}"
|
|
)
|
|
os.environ[new_key] = value
|
|
|
|
|
|
_convert_SGL_to_SGLANG()
|
|
|
|
|
|
def example_with_exit_stack():
|
|
# Use this style of context manager in unit test
|
|
exit_stack = ExitStack()
|
|
exit_stack.enter_context(envs.SGLANG_TEST_RETRACT.override(False))
|
|
assert envs.SGLANG_TEST_RETRACT.value is False
|
|
exit_stack.close()
|
|
assert envs.SGLANG_TEST_RETRACT.value is None
|
|
|
|
|
|
def example_with_subprocess():
|
|
command = ["python", "-c", "import os; print(os.getenv('SGLANG_TEST_RETRACT'))"]
|
|
with envs.SGLANG_TEST_RETRACT.override(True):
|
|
process = subprocess.Popen(
|
|
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
|
)
|
|
process.wait()
|
|
output = process.stdout.read().decode("utf-8").strip()
|
|
assert output == "True"
|
|
|
|
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
output = process.stdout.read().decode("utf-8").strip()
|
|
assert output == "None"
|
|
|
|
|
|
def examples():
|
|
# Example usage for envs
|
|
envs.SGLANG_TEST_RETRACT.clear()
|
|
assert envs.SGLANG_TEST_RETRACT.value is False
|
|
|
|
envs.SGLANG_TEST_RETRACT.set(None)
|
|
assert envs.SGLANG_TEST_RETRACT.is_set() and envs.SGLANG_TEST_RETRACT.value is None
|
|
|
|
envs.SGLANG_TEST_RETRACT.clear()
|
|
assert not envs.SGLANG_TEST_RETRACT.is_set()
|
|
|
|
envs.SGLANG_TEST_RETRACT.set(True)
|
|
assert envs.SGLANG_TEST_RETRACT.value is True
|
|
|
|
with envs.SGLANG_TEST_RETRACT.override(None):
|
|
assert (
|
|
envs.SGLANG_TEST_RETRACT.is_set() and envs.SGLANG_TEST_RETRACT.value is None
|
|
)
|
|
|
|
assert envs.SGLANG_TEST_RETRACT.value is True
|
|
|
|
envs.SGLANG_TEST_RETRACT.set(None)
|
|
with envs.SGLANG_TEST_RETRACT.override(True):
|
|
assert envs.SGLANG_TEST_RETRACT.value is True
|
|
|
|
assert envs.SGLANG_TEST_RETRACT.is_set() and envs.SGLANG_TEST_RETRACT.value is None
|
|
|
|
example_with_exit_stack()
|
|
example_with_subprocess()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
examples()
|