From d01b9214823a20d1f43f01fdcf332cfed783a9ea Mon Sep 17 00:00:00 2001
From: Alex Chi Z <4198311+skyzh@users.noreply.github.com>
Date: Fri, 3 Oct 2025 23:41:46 -0400
Subject: [PATCH] fix sampling_seed handling when deterministic is enabled
 (#11096)

Signed-off-by: Alex Chi <iskyzh@gmail.com>
---
 python/sglang/srt/sampling/sampling_params.py |  3 +++
 python/sglang/test/test_deterministic.py      | 13 ++++++++++---
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/python/sglang/srt/sampling/sampling_params.py b/python/sglang/srt/sampling/sampling_params.py
index 0bff4d397..d978e9587 100644
--- a/python/sglang/srt/sampling/sampling_params.py
+++ b/python/sglang/srt/sampling/sampling_params.py
@@ -142,6 +142,9 @@ class SamplingParams:
                         f"logit_bias must has keys in [0, {vocab_size - 1}], got "
                         f"{token_id}."
                     )
+        if self.sampling_seed is None:
+            raise ValueError("sampling_seed should not be None")
+
         grammars = [
             self.json_schema,
             self.regex,
diff --git a/python/sglang/test/test_deterministic.py b/python/sglang/test/test_deterministic.py
index aa6116043..3f56b6539 100644
--- a/python/sglang/test/test_deterministic.py
+++ b/python/sglang/test/test_deterministic.py
@@ -96,12 +96,15 @@ def send_single(
             "max_new_tokens": args.max_new_tokens,
             "frequency_penalty": args.frequency_penalty,
             "presence_penalty": args.presence_penalty,
-            "sampling_seed": args.sampling_seed,
         },
         "return_logprob": args.return_logprob,
         "stream": args.stream,
     }
 
+    if args.sampling_seed is not None:
+        # sglang server cannot parse None value for sampling_seed
+        json_data["sampling_params"]["sampling_seed"] = args.sampling_seed
+
     if profile:
         run_profile(
             base_url, profile_steps, ["CPU", "GPU"], None, None, profile_by_stage
@@ -145,12 +148,14 @@ def send_mixed(args, batch_size: int):
             "max_new_tokens": args.max_new_tokens,
             "frequency_penalty": args.frequency_penalty,
             "presence_penalty": args.presence_penalty,
-            "sampling_seed": args.sampling_seed,
         },
         "return_logprob": args.return_logprob,
         "stream": args.stream,
     }
 
+    if args.sampling_seed is not None:
+        json_data["sampling_params"]["sampling_seed"] = args.sampling_seed
+
     response = requests.post(
         f"http://{args.host}:{args.port}/generate",
         json=json_data,
@@ -192,12 +197,14 @@ def send_prefix(args, batch_size: int, prompts: List[str]):
             "max_new_tokens": args.max_new_tokens,
             "frequency_penalty": args.frequency_penalty,
             "presence_penalty": args.presence_penalty,
-            "sampling_seed": args.sampling_seed,
         },
         "return_logprob": args.return_logprob,
         "stream": args.stream,
     }
 
+    if args.sampling_seed is not None:
+        json_data["sampling_params"]["sampling_seed"] = args.sampling_seed
+
     response = requests.post(
         f"http://{args.host}:{args.port}/generate",
         json=json_data,