From 6412c5e49380dfbfdb59ecc6771498c26a3d16c2 Mon Sep 17 00:00:00 2001 From: Conghui Tan Date: Thu, 13 Mar 2025 12:38:17 +0800 Subject: [PATCH] Avoid duplicated request ids in batch APIs (#4026) Co-authored-by: conghuitan --- python/sglang/srt/openai_api/adapter.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py index 43c3625bb..a8c6167a9 100644 --- a/python/sglang/srt/openai_api/adapter.py +++ b/python/sglang/srt/openai_api/adapter.py @@ -284,11 +284,11 @@ async def process_batch(tokenizer_manager, batch_id: str, batch_request: BatchRe file_request_list = [] all_requests = [] request_ids = [] - for line in lines: + for line_id, line in enumerate(lines): request_data = json.loads(line) file_request_list.append(request_data) body = request_data["body"] - request_ids.append(request_data["custom_id"]) + request_ids.append(f"{batch_id}-req_{line_id}") # Although streaming is supported for standalone completions, it is not supported in # batch mode (multiple completions in single request). @@ -438,15 +438,9 @@ async def cancel_batch(tokenizer_manager, batch_id: str, input_file_id: str): with open(input_file_path, "r", encoding="utf-8") as f: lines = f.readlines() - file_request_list = [] - request_ids = [] - for line in lines: - request_data = json.loads(line) - file_request_list.append(request_data) - request_ids.append(request_data["custom_id"]) - # Cancel requests by request_ids - for rid in request_ids: + for line_id in range(len(lines)): + rid = f"{batch_id}-req_{line_id}" tokenizer_manager.abort_request(rid=rid) retrieve_batch = batch_storage[batch_id]