[Minor] clean up multimodal processor and tokenizer manager (#7624)
This commit is contained in:
@@ -353,8 +353,7 @@ async def generate_from_file_request(file: UploadFile, request: Request):
|
||||
obj = GenerateReqInput(
|
||||
input_embeds=input_embeds,
|
||||
sampling_params={
|
||||
"repetition_penalty": 1.2,
|
||||
"temperature": 0.2,
|
||||
"temperature": 0.0,
|
||||
"max_new_tokens": 512,
|
||||
},
|
||||
)
|
||||
@@ -393,16 +392,6 @@ async def classify_request(obj: EmbeddingReqInput, request: Request):
|
||||
return _create_error_response(e)
|
||||
|
||||
|
||||
@app.api_route(
|
||||
"/v1/rerank", methods=["POST", "PUT"], dependencies=[Depends(validate_json_request)]
|
||||
)
|
||||
async def v1_rerank_request(request: V1RerankReqInput, raw_request: Request):
|
||||
"""Endpoint for reranking documents based on query relevance."""
|
||||
return await raw_request.app.state.openai_serving_rerank.handle_request(
|
||||
request, raw_request
|
||||
)
|
||||
|
||||
|
||||
@app.api_route("/flush_cache", methods=["GET", "POST"])
|
||||
async def flush_cache():
|
||||
"""Flush the radix cache."""
|
||||
@@ -841,6 +830,16 @@ async def v1_score_request(request: ScoringRequest, raw_request: Request):
|
||||
)
|
||||
|
||||
|
||||
@app.api_route(
|
||||
"/v1/rerank", methods=["POST", "PUT"], dependencies=[Depends(validate_json_request)]
|
||||
)
|
||||
async def v1_rerank_request(request: V1RerankReqInput, raw_request: Request):
|
||||
"""Endpoint for reranking documents based on query relevance."""
|
||||
return await raw_request.app.state.openai_serving_rerank.handle_request(
|
||||
request, raw_request
|
||||
)
|
||||
|
||||
|
||||
def _create_error_response(e):
|
||||
return ORJSONResponse(
|
||||
{"error": {"message": str(e)}}, status_code=HTTPStatus.BAD_REQUEST
|
||||
|
||||
Reference in New Issue
Block a user