fix: make inference deterministic for large TP (#10930)

Co-authored-by: yhyang201 <yhyang201@gmail.com>
Co-authored-by: Yangmin Li <yangminl@nvidia.com>
Co-authored-by: Yuan Luo <yuan.luo@hotmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Xinyuan Tong
2025-09-27 02:46:45 -07:00
committed by GitHub
parent 8ebf72fef3
commit 62e2e99db6
2 changed files with 5 additions and 8 deletions

View File

@@ -1166,15 +1166,12 @@ class ServerArgs:
# Check TP size
if self.tp_size > 1:
raise ValueError(
"Currently only TP size 1 is supported for deterministic inference."
os.environ["NCCL_ALGO"] = "allreduce:tree"
self.disable_custom_all_reduce = True
logger.warning(
"NCCL_ALGO is set to 'allreduce:tree' and custom all reduce is disabled for deterministic inference when TP size > 1."
)
# Warnings on MoE models
logger.warning(
"Currently deterministic inference is only tested on dense models. Please be cautious when using it on MoE models."
)
def _handle_other_validations(self):
pass

View File

@@ -19,7 +19,7 @@ from sglang.profiler import run_profile
PROMPT_1 = "Tell me about Richard Feynman: "
PROMPT_2 = "Generate 1000 random numbers. Go directly into it, don't say Sure and don't say here are numbers. Just start with a number."
dirpath = os.path.dirname(__file__)
with open("python/sglang/test/long_prompt.txt", "r") as f:
with open(os.path.join(dirpath, "long_prompt.txt"), "r") as f:
LONG_PROMPT = f.read()