fix: make inference deterministic for large TP (#10930)
Co-authored-by: yhyang201 <yhyang201@gmail.com> Co-authored-by: Yangmin Li <yangminl@nvidia.com> Co-authored-by: Yuan Luo <yuan.luo@hotmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -1166,15 +1166,12 @@ class ServerArgs:
|
|||||||
|
|
||||||
# Check TP size
|
# Check TP size
|
||||||
if self.tp_size > 1:
|
if self.tp_size > 1:
|
||||||
raise ValueError(
|
os.environ["NCCL_ALGO"] = "allreduce:tree"
|
||||||
"Currently only TP size 1 is supported for deterministic inference."
|
self.disable_custom_all_reduce = True
|
||||||
|
logger.warning(
|
||||||
|
"NCCL_ALGO is set to 'allreduce:tree' and custom all reduce is disabled for deterministic inference when TP size > 1."
|
||||||
)
|
)
|
||||||
|
|
||||||
# Warnings on MoE models
|
|
||||||
logger.warning(
|
|
||||||
"Currently deterministic inference is only tested on dense models. Please be cautious when using it on MoE models."
|
|
||||||
)
|
|
||||||
|
|
||||||
def _handle_other_validations(self):
|
def _handle_other_validations(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ from sglang.profiler import run_profile
|
|||||||
PROMPT_1 = "Tell me about Richard Feynman: "
|
PROMPT_1 = "Tell me about Richard Feynman: "
|
||||||
PROMPT_2 = "Generate 1000 random numbers. Go directly into it, don't say Sure and don't say here are numbers. Just start with a number."
|
PROMPT_2 = "Generate 1000 random numbers. Go directly into it, don't say Sure and don't say here are numbers. Just start with a number."
|
||||||
dirpath = os.path.dirname(__file__)
|
dirpath = os.path.dirname(__file__)
|
||||||
with open("python/sglang/test/long_prompt.txt", "r") as f:
|
with open(os.path.join(dirpath, "long_prompt.txt"), "r") as f:
|
||||||
LONG_PROMPT = f.read()
|
LONG_PROMPT = f.read()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user