From 9c0b1eb5adb5b37f6bc99658c042b7499fd5510d Mon Sep 17 00:00:00 2001 From: Chang Su Date: Mon, 20 Oct 2025 19:22:17 -0700 Subject: [PATCH] [router][grpc] Fix wram-up random token ids for small models (#11887) --- python/sglang/srt/entrypoints/grpc_server.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/python/sglang/srt/entrypoints/grpc_server.py b/python/sglang/srt/entrypoints/grpc_server.py index 7bc59dfb9..70fc9c7a3 100644 --- a/python/sglang/srt/entrypoints/grpc_server.py +++ b/python/sglang/srt/entrypoints/grpc_server.py @@ -998,20 +998,19 @@ def _execute_grpc_server_warmup( max_new_tokens = 8 if is_generation else 1 if is_generation: - # Create tokenized input for warmup warmup_request_kwargs = { "request_id": f"WARMUP_{time.time()}", "tokenized": sglang_scheduler_pb2.TokenizedInput( input_ids=[ - 954, - 15541, - 2181, - 23496, - 1476, - 64710, - 280, - ], # Simple token sequence - original_text="The capital city of France is", + 123, + 456, + 789, + 234, + 567, + 890, + 345, + ], # Random-looking but safe token IDs + original_text="warmup request", ), "sampling_params": sglang_scheduler_pb2.SamplingParams( temperature=0.0,