[Minor] clean up multimodal processor and tokenizer manager (#7624)

This commit is contained in:
Lianmin Zheng
2025-06-29 02:50:14 -07:00
committed by GitHub
parent 7c0db3a6c5
commit 071a1f51ae
9 changed files with 147 additions and 165 deletions

View File

@@ -353,8 +353,7 @@ async def generate_from_file_request(file: UploadFile, request: Request):
obj = GenerateReqInput(
input_embeds=input_embeds,
sampling_params={
"repetition_penalty": 1.2,
"temperature": 0.2,
"temperature": 0.0,
"max_new_tokens": 512,
},
)
@@ -393,16 +392,6 @@ async def classify_request(obj: EmbeddingReqInput, request: Request):
return _create_error_response(e)
@app.api_route(
"/v1/rerank", methods=["POST", "PUT"], dependencies=[Depends(validate_json_request)]
)
async def v1_rerank_request(request: V1RerankReqInput, raw_request: Request):
"""Endpoint for reranking documents based on query relevance."""
return await raw_request.app.state.openai_serving_rerank.handle_request(
request, raw_request
)
@app.api_route("/flush_cache", methods=["GET", "POST"])
async def flush_cache():
"""Flush the radix cache."""
@@ -841,6 +830,16 @@ async def v1_score_request(request: ScoringRequest, raw_request: Request):
)
@app.api_route(
"/v1/rerank", methods=["POST", "PUT"], dependencies=[Depends(validate_json_request)]
)
async def v1_rerank_request(request: V1RerankReqInput, raw_request: Request):
"""Endpoint for reranking documents based on query relevance."""
return await raw_request.app.state.openai_serving_rerank.handle_request(
request, raw_request
)
def _create_error_response(e):
return ORJSONResponse(
{"error": {"message": str(e)}}, status_code=HTTPStatus.BAD_REQUEST