Add openai embedding API (#997)

This commit is contained in:
Ying Sheng
2024-08-09 11:19:18 -07:00
committed by GitHub
parent 05c50a82b8
commit b16e856f11
8 changed files with 135 additions and 19 deletions

View File

@@ -60,6 +60,7 @@ from sglang.srt.openai_api.adapter import (
v1_chat_completions,
v1_completions,
v1_delete_file,
v1_embeddings,
v1_files_create,
v1_retrieve_batch,
v1_retrieve_file,
@@ -176,6 +177,12 @@ async def openai_v1_chat_completions(raw_request: Request):
return await v1_chat_completions(tokenizer_manager, raw_request)
@app.post("/v1/embeddings")
async def openai_v1_embeddings(raw_request: Request):
response = await v1_embeddings(tokenizer_manager, raw_request)
return response
@app.get("/v1/models")
def available_models():
"""Show available models."""
@@ -412,7 +419,7 @@ def _wait_and_warmup(server_args, pipe_finish_writer):
# Send a warmup request
request_name = "/generate" if model_info["is_generation"] else "/encode"
max_new_tokens = 8 if model_info["is_generation"] else 0
max_new_tokens = 8 if model_info["is_generation"] else 1
try:
for _ in range(server_args.dp_size):
res = requests.post(