From 6412c5e49380dfbfdb59ecc6771498c26a3d16c2 Mon Sep 17 00:00:00 2001
From: Conghui Tan <tanconghui@users.noreply.github.com>
Date: Thu, 13 Mar 2025 12:38:17 +0800
Subject: [PATCH] Avoid duplicated request ids in batch APIs (#4026)

Co-authored-by: conghuitan <conghuitan@tencent.com>
---
 python/sglang/srt/openai_api/adapter.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py
index 43c3625bb..a8c6167a9 100644
--- a/python/sglang/srt/openai_api/adapter.py
+++ b/python/sglang/srt/openai_api/adapter.py
@@ -284,11 +284,11 @@ async def process_batch(tokenizer_manager, batch_id: str, batch_request: BatchRe
         file_request_list = []
         all_requests = []
         request_ids = []
-        for line in lines:
+        for line_id, line in enumerate(lines):
             request_data = json.loads(line)
             file_request_list.append(request_data)
             body = request_data["body"]
-            request_ids.append(request_data["custom_id"])
+            request_ids.append(f"{batch_id}-req_{line_id}")
 
             # Although streaming is supported for standalone completions, it is not supported in
             # batch mode (multiple completions in single request).
@@ -438,15 +438,9 @@ async def cancel_batch(tokenizer_manager, batch_id: str, input_file_id: str):
         with open(input_file_path, "r", encoding="utf-8") as f:
             lines = f.readlines()
 
-        file_request_list = []
-        request_ids = []
-        for line in lines:
-            request_data = json.loads(line)
-            file_request_list.append(request_data)
-            request_ids.append(request_data["custom_id"])
-
         # Cancel requests by request_ids
-        for rid in request_ids:
+        for line_id in range(len(lines)):
+            rid = f"{batch_id}-req_{line_id}"
             tokenizer_manager.abort_request(rid=rid)
 
         retrieve_batch = batch_storage[batch_id]