[router][grpc] Fix wram-up random token ids for small models (#11887)
This commit is contained in:
@@ -998,20 +998,19 @@ def _execute_grpc_server_warmup(
|
|||||||
max_new_tokens = 8 if is_generation else 1
|
max_new_tokens = 8 if is_generation else 1
|
||||||
|
|
||||||
if is_generation:
|
if is_generation:
|
||||||
# Create tokenized input for warmup
|
|
||||||
warmup_request_kwargs = {
|
warmup_request_kwargs = {
|
||||||
"request_id": f"WARMUP_{time.time()}",
|
"request_id": f"WARMUP_{time.time()}",
|
||||||
"tokenized": sglang_scheduler_pb2.TokenizedInput(
|
"tokenized": sglang_scheduler_pb2.TokenizedInput(
|
||||||
input_ids=[
|
input_ids=[
|
||||||
954,
|
123,
|
||||||
15541,
|
456,
|
||||||
2181,
|
789,
|
||||||
23496,
|
234,
|
||||||
1476,
|
567,
|
||||||
64710,
|
890,
|
||||||
280,
|
345,
|
||||||
], # Simple token sequence
|
], # Random-looking but safe token IDs
|
||||||
original_text="The capital city of France is",
|
original_text="warmup request",
|
||||||
),
|
),
|
||||||
"sampling_params": sglang_scheduler_pb2.SamplingParams(
|
"sampling_params": sglang_scheduler_pb2.SamplingParams(
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
|
|||||||
Reference in New Issue
Block a user