This commit is contained in:
Lianmin Zheng
2025-08-09 13:33:42 -07:00
committed by GitHub
parent 41d71ca488
commit 9a44b643c6
9 changed files with 24 additions and 20 deletions

View File

@@ -67,6 +67,7 @@ from sglang.srt.utils import (
MultiprocessingSerializer,
assert_pkg_version,
configure_logger,
get_bool_env_var,
get_zmq_socket,
is_cuda,
kill_process_tree,
@@ -627,7 +628,6 @@ def _set_envs_and_config(server_args: ServerArgs):
os.environ["NCCL_CUMEM_ENABLE"] = str(int(server_args.enable_symm_mem))
if not server_args.enable_symm_mem:
os.environ["NCCL_NVLS_ENABLE"] = str(int(server_args.enable_nccl_nvls))
os.environ["TORCH_NCCL_AVOID_RECORD_STREAMS"] = "1"
os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "4"
os.environ["CUDA_MODULE_LOADING"] = "AUTO"
@@ -647,7 +647,7 @@ def _set_envs_and_config(server_args: ServerArgs):
"reinstall the latest version by following the instructions "
"at https://docs.flashinfer.ai/installation.html.",
)
if _is_cuda:
if _is_cuda and not get_bool_env_var("SGLANG_SKIP_SGL_KERNEL_VERSION_CHECK"):
assert_pkg_version(
"sgl-kernel",
"0.3.3",