Temporary fix OpenAI API for Pydantic v1/v2 (#153)

This commit is contained in:
Cody Yu
2024-02-06 11:34:15 -08:00
committed by GitHub
parent e2bf732bc3
commit ccbe1e67d8

View File

@@ -13,11 +13,13 @@ setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
import aiohttp import aiohttp
import psutil import psutil
import pydantic
import requests import requests
import uvicorn import uvicorn
import uvloop import uvloop
from fastapi import FastAPI, HTTPException, Request from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import Response, StreamingResponse from fastapi.responses import Response, StreamingResponse
from pydantic import BaseModel
from sglang.backend.runtime_endpoint import RuntimeEndpoint from sglang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.srt.conversation import ( from sglang.srt.conversation import (
Conversation, Conversation,
@@ -57,6 +59,15 @@ tokenizer_manager = None
chat_template_name = None chat_template_name = None
# FIXME: Remove this once we drop support for pydantic 1.x
IS_PYDANTIC_1 = int(pydantic.VERSION.split(".")[0]) == 1
def jsonify_pydantic_model(obj: BaseModel):
if IS_PYDANTIC_1:
return obj.json(ensure_ascii=False)
return obj.model_dump_json()
@app.get("/health") @app.get("/health")
async def health() -> Response: async def health() -> Response:
"""Health check.""" """Health check."""
@@ -75,7 +86,8 @@ async def get_model_info():
async def flush_cache(): async def flush_cache():
await tokenizer_manager.flush_cache() await tokenizer_manager.flush_cache()
return Response( return Response(
content="Cache flushed.\nPlease check backend logs for more details. (When there are running or waiting requests, the operation will not be performed.)\n", content="Cache flushed.\nPlease check backend logs for more details. "
"(When there are running or waiting requests, the operation will not be performed.)\n",
status_code=200, status_code=200,
) )
@@ -152,7 +164,7 @@ async def v1_completions(raw_request: Request):
total_tokens=prompt_tokens + completion_tokens, total_tokens=prompt_tokens + completion_tokens,
), ),
) )
yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n" yield f"data: {jsonify_pydantic_model(chunk)}\n\n"
yield "data: [DONE]\n\n" yield "data: [DONE]\n\n"
return StreamingResponse(gnerate_stream_resp(), media_type="text/event-stream") return StreamingResponse(gnerate_stream_resp(), media_type="text/event-stream")
@@ -262,7 +274,7 @@ async def v1_chat_completions(raw_request: Request):
choices=[choice_data], choices=[choice_data],
model=request.model, model=request.model,
) )
yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n" yield f"data: {jsonify_pydantic_model(chunk)}\n\n"
text = content["text"] text = content["text"]
delta = text[len(stream_buffer) :] delta = text[len(stream_buffer) :]
@@ -275,7 +287,7 @@ async def v1_chat_completions(raw_request: Request):
choices=[choice_data], choices=[choice_data],
model=request.model, model=request.model,
) )
yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n" yield f"data: {jsonify_pydantic_model(chunk)}\n\n"
yield "data: [DONE]\n\n" yield "data: [DONE]\n\n"
return StreamingResponse(gnerate_stream_resp(), media_type="text/event-stream") return StreamingResponse(gnerate_stream_resp(), media_type="text/event-stream")