Refine OpenAI serving entrypoint to remove batch requests (#7372)

Signed-off-by: Xinyuan Tong <justinning0323@outlook.com>
Co-authored-by: Chang Su <csu272@usc.edu>
This commit is contained in:
Xinyuan Tong
2025-06-20 14:33:43 -07:00
committed by GitHub
parent 794be55af2
commit 0998808009
8 changed files with 488 additions and 645 deletions

View File

@@ -20,7 +20,7 @@ import logging
import os
from enum import auto
from sglang.srt.openai_api.protocol import ChatCompletionRequest
from sglang.srt.entrypoints.openai.protocol import CompletionRequest
logger = logging.getLogger(__name__)
completion_template_name = None
@@ -116,7 +116,7 @@ def is_completion_template_defined() -> bool:
return completion_template_name is not None
def generate_completion_prompt_from_request(request: ChatCompletionRequest) -> str:
def generate_completion_prompt_from_request(request: CompletionRequest) -> str:
global completion_template_name
if request.suffix == "":
return request.prompt