ORJson. Faster Json serialization (#1694)
This commit is contained in:
@@ -28,7 +28,9 @@ import os
|
||||
import threading
|
||||
import time
|
||||
from http import HTTPStatus
|
||||
from typing import Dict, List, Optional, Union
|
||||
from typing import AsyncIterator, Dict, List, Optional, Union
|
||||
|
||||
import orjson
|
||||
|
||||
# Fix a bug of Python threading
|
||||
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
|
||||
@@ -192,14 +194,18 @@ async def generate_request(obj: GenerateReqInput, request: Request):
|
||||
"""Handle a generate request."""
|
||||
if obj.stream:
|
||||
|
||||
async def stream_results():
|
||||
async def stream_results() -> AsyncIterator[bytes]:
|
||||
try:
|
||||
async for out in tokenizer_manager.generate_request(obj, request):
|
||||
yield f"data: {json.dumps(out, ensure_ascii=False)}\n\n"
|
||||
yield b"data: " + orjson.dumps(
|
||||
out, option=orjson.OPT_NON_STR_KEYS
|
||||
) + b"\n\n"
|
||||
except ValueError as e:
|
||||
out = {"error": {"message": str(e)}}
|
||||
yield f"data: {json.dumps(out, ensure_ascii=False)}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
yield b"data: " + orjson.dumps(
|
||||
out, option=orjson.OPT_NON_STR_KEYS
|
||||
) + b"\n\n"
|
||||
yield b"data: [DONE]\n\n"
|
||||
|
||||
return StreamingResponse(
|
||||
stream_results(),
|
||||
@@ -260,13 +266,13 @@ async def openai_v1_chat_completions(raw_request: Request):
|
||||
return await v1_chat_completions(tokenizer_manager, raw_request)
|
||||
|
||||
|
||||
@app.post("/v1/embeddings")
|
||||
@app.post("/v1/embeddings", response_class=ORJSONResponse)
|
||||
async def openai_v1_embeddings(raw_request: Request):
|
||||
response = await v1_embeddings(tokenizer_manager, raw_request)
|
||||
return response
|
||||
|
||||
|
||||
@app.get("/v1/models")
|
||||
@app.get("/v1/models", response_class=ORJSONResponse)
|
||||
def available_models():
|
||||
"""Show available models."""
|
||||
served_model_names = [tokenizer_manager.served_model_name]
|
||||
|
||||
Reference in New Issue
Block a user