fix: make inference deterministic for large TP (#10930)

Co-authored-by: yhyang201 <yhyang201@gmail.com> Co-authored-by: Yangmin Li <yangminl@nvidia.com> Co-authored-by: Yuan Luo <yuan.luo@hotmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2025-09-27 02:46:45 -07:00
parent 8ebf72fef3
commit 62e2e99db6
2 changed files with 5 additions and 8 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -1166,15 +1166,12 @@ class ServerArgs:

            # Check TP size
            if self.tp_size > 1:
-                raise ValueError(
-                    "Currently only TP size 1 is supported for deterministic inference."
+                os.environ["NCCL_ALGO"] = "allreduce:tree"
+                self.disable_custom_all_reduce = True
+                logger.warning(
+                    "NCCL_ALGO is set to 'allreduce:tree' and custom all reduce is disabled for deterministic inference when TP size > 1."
                )

-            # Warnings on MoE models
-            logger.warning(
-                "Currently deterministic inference is only tested on dense models. Please be cautious when using it on MoE models."
-            )
-
    def _handle_other_validations(self):
        pass

--- a/python/sglang/test/test_deterministic.py
+++ b/python/sglang/test/test_deterministic.py
@@ -19,7 +19,7 @@ from sglang.profiler import run_profile
 PROMPT_1 = "Tell me about Richard Feynman: "
 PROMPT_2 = "Generate 1000 random numbers. Go directly into it, don't say Sure and don't say here are numbers. Just start with a number."
 dirpath = os.path.dirname(__file__)
-with open("python/sglang/test/long_prompt.txt", "r") as f:
+with open(os.path.join(dirpath, "long_prompt.txt"), "r") as f:
    LONG_PROMPT = f.read()