diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py index 988cb2e0d..c24979ae8 100644 --- a/python/sglang/srt/openai_api/adapter.py +++ b/python/sglang/srt/openai_api/adapter.py @@ -1193,6 +1193,7 @@ def v1_chat_generate_request( top_logprobs_nums = top_logprobs_nums[0] modalities_list = modalities_list[0] lora_paths = lora_paths[0] + request_ids = request_ids[0] else: if tokenizer_manager.model_config.is_multimodal: # processor will need text input @@ -1429,7 +1430,9 @@ async def v1_chat_completions( return create_error_response("Invalid request body, error: ", str(e)) all_requests = [ChatCompletionRequest(**request_json)] created = int(time.time()) - adapted_request, request = v1_chat_generate_request(all_requests, tokenizer_manager) + adapted_request, request = v1_chat_generate_request( + all_requests, tokenizer_manager, request_ids=[all_requests[0].rid] + ) if adapted_request.stream: parser_dict = {} diff --git a/python/sglang/srt/openai_api/protocol.py b/python/sglang/srt/openai_api/protocol.py index c37442248..0cdab70cd 100644 --- a/python/sglang/srt/openai_api/protocol.py +++ b/python/sglang/srt/openai_api/protocol.py @@ -392,6 +392,9 @@ class ChatCompletionRequest(BaseModel): stream_reasoning: bool = True chat_template_kwargs: Optional[Dict] = None + # The request id. + rid: Optional[Union[List[str], str]] = None + # For PD disaggregation bootstrap_host: Optional[str] = None bootstrap_port: Optional[int] = None