Sync from v0.13
This commit is contained in:
64
examples/pooling/embed/embedding_requests_base64_client.py
Normal file
64
examples/pooling/embed/embedding_requests_base64_client.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""Example Python client for embedding API using vLLM API server
|
||||
NOTE:
|
||||
start a supported embeddings model server with `vllm serve`, e.g.
|
||||
vllm serve intfloat/e5-small
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import base64
|
||||
|
||||
import requests
|
||||
import torch
|
||||
|
||||
from vllm.utils.serial_utils import (
|
||||
EMBED_DTYPE_TO_TORCH_DTYPE,
|
||||
ENDIANNESS,
|
||||
binary2tensor,
|
||||
)
|
||||
|
||||
|
||||
def post_http_request(prompt: dict, api_url: str) -> requests.Response:
|
||||
headers = {"User-Agent": "Test Client"}
|
||||
response = requests.post(api_url, headers=headers, json=prompt)
|
||||
return response
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--host", type=str, default="localhost")
|
||||
parser.add_argument("--port", type=int, default=8000)
|
||||
parser.add_argument("--model", type=str, default="intfloat/e5-small")
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main(args):
|
||||
api_url = f"http://{args.host}:{args.port}/v1/embeddings"
|
||||
model_name = args.model
|
||||
|
||||
# The OpenAI client does not support the embed_dtype and endianness parameters.
|
||||
for embed_dtype in EMBED_DTYPE_TO_TORCH_DTYPE:
|
||||
for endianness in ENDIANNESS:
|
||||
prompt = {
|
||||
"model": model_name,
|
||||
"input": "vLLM is great!",
|
||||
"encoding_format": "base64",
|
||||
"embed_dtype": embed_dtype,
|
||||
"endianness": endianness,
|
||||
}
|
||||
response = post_http_request(prompt=prompt, api_url=api_url)
|
||||
|
||||
embedding = []
|
||||
for data in response.json()["data"]:
|
||||
binary = base64.b64decode(data["embedding"])
|
||||
tensor = binary2tensor(binary, (-1,), embed_dtype, endianness)
|
||||
embedding.append(tensor.to(torch.float32))
|
||||
embedding = torch.cat(embedding)
|
||||
print(embed_dtype, endianness, embedding.shape)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
main(args)
|
||||
Reference in New Issue
Block a user