From 0cb099e20a0b9ccd308fff5ef133a2e4b26a7f7a Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Thu, 15 Aug 2024 10:47:39 -0700 Subject: [PATCH] set CUDA_DEVICE_MAX_CONNECTIONS=1 (#1113) --- python/sglang/srt/server.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index ae886796c..4f06f7630 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -360,6 +360,7 @@ def _set_envs_and_config(server_args: ServerArgs): os.environ["NCCL_CUMEM_ENABLE"] = "0" os.environ["NCCL_NVLS_ENABLE"] = "0" os.environ["TORCH_NCCL_AVOID_RECORD_STREAMS"] = "1" + os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "1" # Set ulimit set_ulimit()