diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 19d17a3bb..e1b53e6f9 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -1166,15 +1166,12 @@ class ServerArgs: # Check TP size if self.tp_size > 1: - raise ValueError( - "Currently only TP size 1 is supported for deterministic inference." + os.environ["NCCL_ALGO"] = "allreduce:tree" + self.disable_custom_all_reduce = True + logger.warning( + "NCCL_ALGO is set to 'allreduce:tree' and custom all reduce is disabled for deterministic inference when TP size > 1." ) - # Warnings on MoE models - logger.warning( - "Currently deterministic inference is only tested on dense models. Please be cautious when using it on MoE models." - ) - def _handle_other_validations(self): pass diff --git a/python/sglang/test/test_deterministic.py b/python/sglang/test/test_deterministic.py index 8c4e45c7c..286902677 100644 --- a/python/sglang/test/test_deterministic.py +++ b/python/sglang/test/test_deterministic.py @@ -19,7 +19,7 @@ from sglang.profiler import run_profile PROMPT_1 = "Tell me about Richard Feynman: " PROMPT_2 = "Generate 1000 random numbers. Go directly into it, don't say Sure and don't say here are numbers. Just start with a number." dirpath = os.path.dirname(__file__) -with open("python/sglang/test/long_prompt.txt", "r") as f: +with open(os.path.join(dirpath, "long_prompt.txt"), "r") as f: LONG_PROMPT = f.read()