Add endpoint for file support, purely to speed up processing of input_embeds. (#2797)
This commit is contained in:
@@ -19,6 +19,7 @@ This file implements HTTP APIs for the inference engine via fastapi.
|
||||
|
||||
import asyncio
|
||||
import dataclasses
|
||||
import json
|
||||
import logging
|
||||
import multiprocessing as multiprocessing
|
||||
import os
|
||||
@@ -259,6 +260,29 @@ async def generate_request(obj: GenerateReqInput, request: Request):
|
||||
return _create_error_response(e)
|
||||
|
||||
|
||||
@app.api_route("/generate_from_file", methods=["POST"])
|
||||
async def generate_from_file_request(file: UploadFile, request: Request):
|
||||
"""Handle a generate request, this is purely to work with input_embeds."""
|
||||
content = await file.read()
|
||||
input_embeds = json.loads(content.decode("utf-8"))
|
||||
|
||||
obj = GenerateReqInput(
|
||||
input_embeds=input_embeds,
|
||||
sampling_params={
|
||||
"repetition_penalty": 1.2,
|
||||
"temperature": 0.2,
|
||||
"max_new_tokens": 512,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
ret = await _global_state.generate_request(obj, request).__anext__()
|
||||
return ret
|
||||
except ValueError as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return _create_error_response(e)
|
||||
|
||||
|
||||
@app.api_route("/encode", methods=["POST", "PUT"])
|
||||
async def encode_request(obj: EmbeddingReqInput, request: Request):
|
||||
"""Handle an embedding request."""
|
||||
|
||||
Reference in New Issue
Block a user