Add endpoint for file support, purely to speed up processing of input_embeds. (#2797)

This commit is contained in:
Rin Intachuen
2025-03-17 08:30:37 +07:00
committed by GitHub
parent 48efec7b05
commit d1112d8548
2 changed files with 63 additions and 4 deletions

View File

@@ -19,6 +19,7 @@ This file implements HTTP APIs for the inference engine via fastapi.
import asyncio
import dataclasses
import json
import logging
import multiprocessing as multiprocessing
import os
@@ -259,6 +260,29 @@ async def generate_request(obj: GenerateReqInput, request: Request):
return _create_error_response(e)
@app.api_route("/generate_from_file", methods=["POST"])
async def generate_from_file_request(file: UploadFile, request: Request):
"""Handle a generate request, this is purely to work with input_embeds."""
content = await file.read()
input_embeds = json.loads(content.decode("utf-8"))
obj = GenerateReqInput(
input_embeds=input_embeds,
sampling_params={
"repetition_penalty": 1.2,
"temperature": 0.2,
"max_new_tokens": 512,
},
)
try:
ret = await _global_state.generate_request(obj, request).__anext__()
return ret
except ValueError as e:
logger.error(f"Error: {e}")
return _create_error_response(e)
@app.api_route("/encode", methods=["POST", "PUT"])
async def encode_request(obj: EmbeddingReqInput, request: Request):
"""Handle an embedding request."""