[router][grpc] Fix wram-up random token ids for small models (#11887)

This commit is contained in:
Chang Su
2025-10-20 19:22:17 -07:00
committed by GitHub
parent 01f14a7ad2
commit 9c0b1eb5ad

View File

@@ -998,20 +998,19 @@ def _execute_grpc_server_warmup(
max_new_tokens = 8 if is_generation else 1
if is_generation:
# Create tokenized input for warmup
warmup_request_kwargs = {
"request_id": f"WARMUP_{time.time()}",
"tokenized": sglang_scheduler_pb2.TokenizedInput(
input_ids=[
954,
15541,
2181,
23496,
1476,
64710,
280,
], # Simple token sequence
original_text="The capital city of France is",
123,
456,
789,
234,
567,
890,
345,
], # Random-looking but safe token IDs
original_text="warmup request",
),
"sampling_params": sglang_scheduler_pb2.SamplingParams(
temperature=0.0,