[router][grpc] Fix wram-up random token ids for small models (#11887)

2025-10-20 19:22:17 -07:00
parent 01f14a7ad2
commit 9c0b1eb5ad
1 changed files with 9 additions and 10 deletions
--- a/python/sglang/srt/entrypoints/grpc_server.py
+++ b/python/sglang/srt/entrypoints/grpc_server.py
@@ -998,20 +998,19 @@ def _execute_grpc_server_warmup(
        max_new_tokens = 8 if is_generation else 1

        if is_generation:
-            # Create tokenized input for warmup
            warmup_request_kwargs = {
                "request_id": f"WARMUP_{time.time()}",
                "tokenized": sglang_scheduler_pb2.TokenizedInput(
                    input_ids=[
-                        954,
-                        15541,
-                        2181,
-                        23496,
-                        1476,
-                        64710,
-                        280,
-                    ],  # Simple token sequence
-                    original_text="The capital city of France is",
+                        123,
+                        456,
+                        789,
+                        234,
+                        567,
+                        890,
+                        345,
+                    ],  # Random-looking but safe token IDs
+                    original_text="warmup request",
                ),
                "sampling_params": sglang_scheduler_pb2.SamplingParams(
                    temperature=0.0,