# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from collections.abc import AsyncGenerator from fastapi import Request from vllm.engine.protocol import EngineClient from vllm.entrypoints.logger import RequestLogger from vllm.entrypoints.openai.protocol import ( ErrorResponse, RequestResponseMetadata, TranscriptionRequest, TranscriptionResponse, TranscriptionResponseStreamChoice, TranscriptionResponseVerbose, TranscriptionStreamResponse, TranslationRequest, TranslationResponse, TranslationResponseStreamChoice, TranslationResponseVerbose, TranslationStreamResponse, ) from vllm.entrypoints.openai.serving_models import OpenAIServingModels from vllm.entrypoints.openai.speech_to_text import OpenAISpeechToText from vllm.logger import init_logger from vllm.outputs import RequestOutput logger = init_logger(__name__) class OpenAIServingTranscription(OpenAISpeechToText): """Handles transcription requests.""" def __init__( self, engine_client: EngineClient, models: OpenAIServingModels, *, request_logger: RequestLogger | None, return_tokens_as_token_ids: bool = False, log_error_stack: bool = False, enable_force_include_usage: bool = False, ): super().__init__( engine_client=engine_client, models=models, request_logger=request_logger, return_tokens_as_token_ids=return_tokens_as_token_ids, task_type="transcribe", log_error_stack=log_error_stack, enable_force_include_usage=enable_force_include_usage, ) async def create_transcription( self, audio_data: bytes, request: TranscriptionRequest, raw_request: Request ) -> ( TranscriptionResponse | TranscriptionResponseVerbose | AsyncGenerator[str, None] | ErrorResponse ): """Transcription API similar to OpenAI's API. See https://platform.openai.com/docs/api-reference/audio/createTranscription for the API specification. This API mimics the OpenAI transcription API. """ return await self._create_speech_to_text( audio_data=audio_data, request=request, raw_request=raw_request, response_class=( TranscriptionResponseVerbose if request.response_format == "verbose_json" else TranscriptionResponse ), stream_generator_method=self.transcription_stream_generator, ) async def transcription_stream_generator( self, request: TranscriptionRequest, result_generator: list[AsyncGenerator[RequestOutput, None]], request_id: str, request_metadata: RequestResponseMetadata, audio_duration_s: float, ) -> AsyncGenerator[str, None]: generator = self._speech_to_text_stream_generator( request=request, list_result_generator=result_generator, request_id=request_id, request_metadata=request_metadata, audio_duration_s=audio_duration_s, chunk_object_type="transcription.chunk", response_stream_choice_class=TranscriptionResponseStreamChoice, stream_response_class=TranscriptionStreamResponse, ) async for chunk in generator: yield chunk class OpenAIServingTranslation(OpenAISpeechToText): """Handles translation requests.""" def __init__( self, engine_client: EngineClient, models: OpenAIServingModels, *, request_logger: RequestLogger | None, return_tokens_as_token_ids: bool = False, log_error_stack: bool = False, enable_force_include_usage: bool = False, ): super().__init__( engine_client=engine_client, models=models, request_logger=request_logger, return_tokens_as_token_ids=return_tokens_as_token_ids, task_type="translate", log_error_stack=log_error_stack, enable_force_include_usage=enable_force_include_usage, ) async def create_translation( self, audio_data: bytes, request: TranslationRequest, raw_request: Request ) -> ( TranslationResponse | TranslationResponseVerbose | AsyncGenerator[str, None] | ErrorResponse ): """Translation API similar to OpenAI's API. See https://platform.openai.com/docs/api-reference/audio/createTranslation for the API specification. This API mimics the OpenAI translation API. """ return await self._create_speech_to_text( audio_data=audio_data, request=request, raw_request=raw_request, response_class=( TranslationResponseVerbose if request.response_format == "verbose_json" else TranslationResponse ), stream_generator_method=self.translation_stream_generator, ) async def translation_stream_generator( self, request: TranslationRequest, result_generator: list[AsyncGenerator[RequestOutput, None]], request_id: str, request_metadata: RequestResponseMetadata, audio_duration_s: float, ) -> AsyncGenerator[str, None]: generator = self._speech_to_text_stream_generator( request=request, list_result_generator=result_generator, request_id=request_id, request_metadata=request_metadata, audio_duration_s=audio_duration_s, chunk_object_type="translation.chunk", response_stream_choice_class=TranslationResponseStreamChoice, stream_response_class=TranslationStreamResponse, ) async for chunk in generator: yield chunk