Temporary fix OpenAI API for Pydantic v1/v2 (#153)
This commit is contained in:
@@ -13,11 +13,13 @@ setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
|
|||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import psutil
|
import psutil
|
||||||
|
import pydantic
|
||||||
import requests
|
import requests
|
||||||
import uvicorn
|
import uvicorn
|
||||||
import uvloop
|
import uvloop
|
||||||
from fastapi import FastAPI, HTTPException, Request
|
from fastapi import FastAPI, HTTPException, Request
|
||||||
from fastapi.responses import Response, StreamingResponse
|
from fastapi.responses import Response, StreamingResponse
|
||||||
|
from pydantic import BaseModel
|
||||||
from sglang.backend.runtime_endpoint import RuntimeEndpoint
|
from sglang.backend.runtime_endpoint import RuntimeEndpoint
|
||||||
from sglang.srt.conversation import (
|
from sglang.srt.conversation import (
|
||||||
Conversation,
|
Conversation,
|
||||||
@@ -57,6 +59,15 @@ tokenizer_manager = None
|
|||||||
chat_template_name = None
|
chat_template_name = None
|
||||||
|
|
||||||
|
|
||||||
|
# FIXME: Remove this once we drop support for pydantic 1.x
|
||||||
|
IS_PYDANTIC_1 = int(pydantic.VERSION.split(".")[0]) == 1
|
||||||
|
|
||||||
|
def jsonify_pydantic_model(obj: BaseModel):
|
||||||
|
if IS_PYDANTIC_1:
|
||||||
|
return obj.json(ensure_ascii=False)
|
||||||
|
return obj.model_dump_json()
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
async def health() -> Response:
|
async def health() -> Response:
|
||||||
"""Health check."""
|
"""Health check."""
|
||||||
@@ -75,7 +86,8 @@ async def get_model_info():
|
|||||||
async def flush_cache():
|
async def flush_cache():
|
||||||
await tokenizer_manager.flush_cache()
|
await tokenizer_manager.flush_cache()
|
||||||
return Response(
|
return Response(
|
||||||
content="Cache flushed.\nPlease check backend logs for more details. (When there are running or waiting requests, the operation will not be performed.)\n",
|
content="Cache flushed.\nPlease check backend logs for more details. "
|
||||||
|
"(When there are running or waiting requests, the operation will not be performed.)\n",
|
||||||
status_code=200,
|
status_code=200,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -152,7 +164,7 @@ async def v1_completions(raw_request: Request):
|
|||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n"
|
yield f"data: {jsonify_pydantic_model(chunk)}\n\n"
|
||||||
yield "data: [DONE]\n\n"
|
yield "data: [DONE]\n\n"
|
||||||
|
|
||||||
return StreamingResponse(gnerate_stream_resp(), media_type="text/event-stream")
|
return StreamingResponse(gnerate_stream_resp(), media_type="text/event-stream")
|
||||||
@@ -262,7 +274,7 @@ async def v1_chat_completions(raw_request: Request):
|
|||||||
choices=[choice_data],
|
choices=[choice_data],
|
||||||
model=request.model,
|
model=request.model,
|
||||||
)
|
)
|
||||||
yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n"
|
yield f"data: {jsonify_pydantic_model(chunk)}\n\n"
|
||||||
|
|
||||||
text = content["text"]
|
text = content["text"]
|
||||||
delta = text[len(stream_buffer) :]
|
delta = text[len(stream_buffer) :]
|
||||||
@@ -275,7 +287,7 @@ async def v1_chat_completions(raw_request: Request):
|
|||||||
choices=[choice_data],
|
choices=[choice_data],
|
||||||
model=request.model,
|
model=request.model,
|
||||||
)
|
)
|
||||||
yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n"
|
yield f"data: {jsonify_pydantic_model(chunk)}\n\n"
|
||||||
yield "data: [DONE]\n\n"
|
yield "data: [DONE]\n\n"
|
||||||
|
|
||||||
return StreamingResponse(gnerate_stream_resp(), media_type="text/event-stream")
|
return StreamingResponse(gnerate_stream_resp(), media_type="text/event-stream")
|
||||||
|
|||||||
Reference in New Issue
Block a user