[router][grpc] Fix wram-up random token ids for small models (#11887)

This commit is contained in:
Chang Su
2025-10-20 19:22:17 -07:00
committed by GitHub
parent 01f14a7ad2
commit 9c0b1eb5ad

View File

@@ -998,20 +998,19 @@ def _execute_grpc_server_warmup(
max_new_tokens = 8 if is_generation else 1 max_new_tokens = 8 if is_generation else 1
if is_generation: if is_generation:
# Create tokenized input for warmup
warmup_request_kwargs = { warmup_request_kwargs = {
"request_id": f"WARMUP_{time.time()}", "request_id": f"WARMUP_{time.time()}",
"tokenized": sglang_scheduler_pb2.TokenizedInput( "tokenized": sglang_scheduler_pb2.TokenizedInput(
input_ids=[ input_ids=[
954, 123,
15541, 456,
2181, 789,
23496, 234,
1476, 567,
64710, 890,
280, 345,
], # Simple token sequence ], # Random-looking but safe token IDs
original_text="The capital city of France is", original_text="warmup request",
), ),
"sampling_params": sglang_scheduler_pb2.SamplingParams( "sampling_params": sglang_scheduler_pb2.SamplingParams(
temperature=0.0, temperature=0.0,