add response_format support for completion API (#9665)

This commit is contained in:
cicirori
2025-08-27 00:01:29 +02:00
committed by GitHub
parent 43de1d7304
commit b6c14ec0b4
3 changed files with 90 additions and 17 deletions

View File

@@ -23,6 +23,7 @@ from sglang.srt.entrypoints.openai.utils import (
from sglang.srt.managers.io_struct import GenerateReqInput
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.utils import convert_json_schema_to_str
logger = logging.getLogger(__name__)
@@ -125,6 +126,20 @@ class OpenAIServingCompletion(OpenAIServingBase):
"logit_bias": request.logit_bias,
}
# Handle response_format constraints
if request.response_format and request.response_format.type == "json_schema":
sampling_params["json_schema"] = convert_json_schema_to_str(
request.response_format.json_schema.schema_
)
elif request.response_format and request.response_format.type == "json_object":
sampling_params["json_schema"] = '{"type": "object"}'
elif (
request.response_format and request.response_format.type == "structural_tag"
):
sampling_params["structural_tag"] = convert_json_schema_to_str(
request.response_format.model_dump(by_alias=True)
)
return sampling_params
async def _handle_streaming_request(