From 6b7038babd562de099b583957ff19b78c4689a37 Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Tue, 25 Mar 2025 12:08:05 +0800 Subject: [PATCH] Speedup warmup when DP > 1 (#4695) --- python/sglang/srt/entrypoints/http_server.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/python/sglang/srt/entrypoints/http_server.py b/python/sglang/srt/entrypoints/http_server.py index 5a97072de..62b151162 100644 --- a/python/sglang/srt/entrypoints/http_server.py +++ b/python/sglang/srt/entrypoints/http_server.py @@ -730,9 +730,9 @@ def _wait_and_warmup( }, } if server_args.skip_tokenizer_init: - json_data["input_ids"] = [10, 11, 12] + json_data["input_ids"] = [[10, 11, 12] for _ in range(server_args.dp_size)] else: - json_data["text"] = "The capital city of France is" + json_data["text"] = ["The capital city of France is"] * server_args.dp_size # Debug dumping if server_args.debug_tensor_dump_input_file: @@ -743,14 +743,13 @@ def _wait_and_warmup( json_data["sampling_params"]["max_new_tokens"] = 0 try: - for i in range(server_args.dp_size): - res = requests.post( - url + request_name, - json=json_data, - headers=headers, - timeout=600, - ) - assert res.status_code == 200, f"{res}" + res = requests.post( + url + request_name, + json=json_data, + headers=headers, + timeout=600, + ) + assert res.status_code == 200, f"{res}" except Exception: last_traceback = get_exception_traceback() if pipe_finish_writer is not None: