Sync from v0.13
This commit is contained in:
75
examples/online_serving/openai_translation_client.py
Normal file
75
examples/online_serving/openai_translation_client.py
Normal file
@@ -0,0 +1,75 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
import httpx
|
||||
from openai import OpenAI
|
||||
|
||||
from vllm.assets.audio import AudioAsset
|
||||
|
||||
|
||||
def sync_openai(audio_path: str, client: OpenAI):
|
||||
with open(audio_path, "rb") as f:
|
||||
translation = client.audio.translations.create(
|
||||
file=f,
|
||||
model="openai/whisper-large-v3",
|
||||
response_format="json",
|
||||
temperature=0.0,
|
||||
# Additional params not provided by OpenAI API.
|
||||
extra_body=dict(
|
||||
language="it",
|
||||
seed=4419,
|
||||
repetition_penalty=1.3,
|
||||
),
|
||||
)
|
||||
print("translation result:", translation.text)
|
||||
|
||||
|
||||
async def stream_openai_response(audio_path: str, base_url: str, api_key: str):
|
||||
data = {
|
||||
"language": "it",
|
||||
"stream": True,
|
||||
"model": "openai/whisper-large-v3",
|
||||
}
|
||||
url = base_url + "/audio/translations"
|
||||
headers = {"Authorization": f"Bearer {api_key}"}
|
||||
print("translation result:", end=" ")
|
||||
# OpenAI translation API client does not support streaming.
|
||||
async with httpx.AsyncClient() as client:
|
||||
with open(audio_path, "rb") as f:
|
||||
async with client.stream(
|
||||
"POST", url, files={"file": f}, data=data, headers=headers
|
||||
) as response:
|
||||
async for line in response.aiter_lines():
|
||||
# Each line is a JSON object prefixed with 'data: '
|
||||
if line:
|
||||
if line.startswith("data: "):
|
||||
line = line[len("data: ") :]
|
||||
# Last chunk, stream ends
|
||||
if line.strip() == "[DONE]":
|
||||
break
|
||||
# Parse the JSON response
|
||||
chunk = json.loads(line)
|
||||
# Extract and print the content
|
||||
content = chunk["choices"][0].get("delta", {}).get("content")
|
||||
print(content, end="")
|
||||
|
||||
|
||||
def main():
|
||||
foscolo = str(AudioAsset("azacinto_foscolo").get_local_path())
|
||||
|
||||
# Modify OpenAI's API key and API base to use vLLM's API server.
|
||||
openai_api_key = "EMPTY"
|
||||
openai_api_base = "http://localhost:8000/v1"
|
||||
client = OpenAI(
|
||||
api_key=openai_api_key,
|
||||
base_url=openai_api_base,
|
||||
)
|
||||
sync_openai(foscolo, client)
|
||||
# Run the asynchronous function
|
||||
asyncio.run(stream_openai_response(foscolo, openai_api_base, openai_api_key))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user