From d01b9214823a20d1f43f01fdcf332cfed783a9ea Mon Sep 17 00:00:00 2001 From: Alex Chi Z <4198311+skyzh@users.noreply.github.com> Date: Fri, 3 Oct 2025 23:41:46 -0400 Subject: [PATCH] fix sampling_seed handling when deterministic is enabled (#11096) Signed-off-by: Alex Chi --- python/sglang/srt/sampling/sampling_params.py | 3 +++ python/sglang/test/test_deterministic.py | 13 ++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/python/sglang/srt/sampling/sampling_params.py b/python/sglang/srt/sampling/sampling_params.py index 0bff4d397..d978e9587 100644 --- a/python/sglang/srt/sampling/sampling_params.py +++ b/python/sglang/srt/sampling/sampling_params.py @@ -142,6 +142,9 @@ class SamplingParams: f"logit_bias must has keys in [0, {vocab_size - 1}], got " f"{token_id}." ) + if self.sampling_seed is None: + raise ValueError("sampling_seed should not be None") + grammars = [ self.json_schema, self.regex, diff --git a/python/sglang/test/test_deterministic.py b/python/sglang/test/test_deterministic.py index aa6116043..3f56b6539 100644 --- a/python/sglang/test/test_deterministic.py +++ b/python/sglang/test/test_deterministic.py @@ -96,12 +96,15 @@ def send_single( "max_new_tokens": args.max_new_tokens, "frequency_penalty": args.frequency_penalty, "presence_penalty": args.presence_penalty, - "sampling_seed": args.sampling_seed, }, "return_logprob": args.return_logprob, "stream": args.stream, } + if args.sampling_seed is not None: + # sglang server cannot parse None value for sampling_seed + json_data["sampling_params"]["sampling_seed"] = args.sampling_seed + if profile: run_profile( base_url, profile_steps, ["CPU", "GPU"], None, None, profile_by_stage @@ -145,12 +148,14 @@ def send_mixed(args, batch_size: int): "max_new_tokens": args.max_new_tokens, "frequency_penalty": args.frequency_penalty, "presence_penalty": args.presence_penalty, - "sampling_seed": args.sampling_seed, }, "return_logprob": args.return_logprob, "stream": args.stream, } + if args.sampling_seed is not None: + json_data["sampling_params"]["sampling_seed"] = args.sampling_seed + response = requests.post( f"http://{args.host}:{args.port}/generate", json=json_data, @@ -192,12 +197,14 @@ def send_prefix(args, batch_size: int, prompts: List[str]): "max_new_tokens": args.max_new_tokens, "frequency_penalty": args.frequency_penalty, "presence_penalty": args.presence_penalty, - "sampling_seed": args.sampling_seed, }, "return_logprob": args.return_logprob, "stream": args.stream, } + if args.sampling_seed is not None: + json_data["sampling_params"]["sampling_seed"] = args.sampling_seed + response = requests.post( f"http://{args.host}:{args.port}/generate", json=json_data,