Upgrade to vllm 0.17.0 corex v4.1 overlay
This commit is contained in:
@@ -38,6 +38,7 @@ from vllm.logprobs import Logprob
|
||||
from vllm.renderers import ChatParams, TokenizeParams, merge_kwargs
|
||||
from vllm.sampling_params import (
|
||||
BeamSearchParams,
|
||||
RepetitionDetectionParams,
|
||||
RequestOutputKind,
|
||||
SamplingParams,
|
||||
StructuredOutputsParams,
|
||||
@@ -336,6 +337,16 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
),
|
||||
)
|
||||
|
||||
repetition_detection: RepetitionDetectionParams | None = Field(
|
||||
default=None,
|
||||
description="Parameters for detecting repetitive N-gram patterns "
|
||||
"in output tokens. If such repetition is detected, generation will "
|
||||
"be ended early. LLMs can sometimes generate repetitive, unhelpful "
|
||||
"token patterns, stopping only when they hit the maximum output length "
|
||||
"(e.g. 'abcdabcdabcd...' or '\emoji \emoji \emoji ...'). This feature "
|
||||
"can detect such behavior and terminate early, saving time and tokens.",
|
||||
)
|
||||
|
||||
# --8<-- [end:chat-completion-extra-params]
|
||||
|
||||
def build_chat_params(
|
||||
@@ -490,7 +501,6 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
skip_special_tokens=self.skip_special_tokens,
|
||||
spaces_between_special_tokens=self.spaces_between_special_tokens,
|
||||
include_stop_str_in_output=self.include_stop_str_in_output,
|
||||
truncate_prompt_tokens=self.truncate_prompt_tokens,
|
||||
output_kind=RequestOutputKind.DELTA
|
||||
if self.stream
|
||||
else RequestOutputKind.FINAL_ONLY,
|
||||
@@ -500,8 +510,37 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
allowed_token_ids=self.allowed_token_ids,
|
||||
extra_args=extra_args or None,
|
||||
skip_clone=True, # Created fresh per request, safe to skip clone
|
||||
repetition_detection=self.repetition_detection,
|
||||
)
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_response_format(cls, data):
|
||||
response_format = data.get("response_format")
|
||||
if response_format is None:
|
||||
return data
|
||||
|
||||
rf_type = (
|
||||
response_format.get("type")
|
||||
if isinstance(response_format, dict)
|
||||
else getattr(response_format, "type", None)
|
||||
)
|
||||
|
||||
if rf_type == "json_schema":
|
||||
json_schema = (
|
||||
response_format.get("json_schema")
|
||||
if isinstance(response_format, dict)
|
||||
else getattr(response_format, "json_schema", None)
|
||||
)
|
||||
if json_schema is None:
|
||||
raise VLLMValidationError(
|
||||
"When response_format type is 'json_schema', the "
|
||||
"'json_schema' field must be provided.",
|
||||
parameter="response_format",
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_stream_options(cls, data):
|
||||
|
||||
@@ -1249,13 +1249,23 @@ class OpenAIServingChat(OpenAIServing):
|
||||
)
|
||||
|
||||
# get the expected call based on partial JSON
|
||||
# parsing which "autocompletes" the JSON
|
||||
expected_call = json.dumps(
|
||||
tool_parser.prev_tool_call_arr[index].get(
|
||||
"arguments", {}
|
||||
),
|
||||
ensure_ascii=False,
|
||||
# parsing which "autocompletes" the JSON.
|
||||
# Tool parsers (e.g. Qwen3Coder) store
|
||||
# arguments as a JSON string in
|
||||
# prev_tool_call_arr. Calling json.dumps()
|
||||
# on an already-serialized string would
|
||||
# double-serialize it (e.g. '{"k":1}' becomes
|
||||
# '"{\\"k\\":1}"'), which then causes the
|
||||
# replace() below to fail and append the
|
||||
# entire double-serialized string as a
|
||||
# spurious final delta.
|
||||
args = tool_parser.prev_tool_call_arr[index].get(
|
||||
"arguments", {}
|
||||
)
|
||||
if isinstance(args, str):
|
||||
expected_call = args
|
||||
else:
|
||||
expected_call = json.dumps(args, ensure_ascii=False)
|
||||
|
||||
# get what we've streamed so far for arguments
|
||||
# for the current tool
|
||||
|
||||
@@ -143,7 +143,8 @@ class BaseFrontendArgs:
|
||||
templates and other tokenizer configuration."""
|
||||
enable_log_outputs: bool = False
|
||||
"""If set to True, log model outputs (generations).
|
||||
Requires --enable-log-requests."""
|
||||
Requires `--enable-log-requests`. As with `--enable-log-requests`,
|
||||
information is only logged at INFO level at maximum."""
|
||||
enable_log_deltas: bool = True
|
||||
"""If set to False, output deltas will not be logged. Relevant only if
|
||||
--enable-log-outputs is set.
|
||||
@@ -277,6 +278,10 @@ class FrontendArgs(BaseFrontendArgs):
|
||||
Enable offline FastAPI documentation for air-gapped environments.
|
||||
Uses vendored static assets bundled with vLLM.
|
||||
"""
|
||||
use_gpu_for_pooling_score: bool = False
|
||||
"""If set, run pooling score MaxSim on GPU in the API server process.
|
||||
Can significantly improve late-interaction scoring performance.
|
||||
https://github.com/vllm-project/vllm/pull/35330"""
|
||||
|
||||
@classmethod
|
||||
def _customize_cli_kwargs(
|
||||
|
||||
@@ -26,6 +26,7 @@ from vllm.logprobs import Logprob
|
||||
from vllm.renderers import TokenizeParams
|
||||
from vllm.sampling_params import (
|
||||
BeamSearchParams,
|
||||
RepetitionDetectionParams,
|
||||
RequestOutputKind,
|
||||
SamplingParams,
|
||||
StructuredOutputsParams,
|
||||
@@ -166,6 +167,16 @@ class CompletionRequest(OpenAIBaseModel):
|
||||
),
|
||||
)
|
||||
|
||||
repetition_detection: RepetitionDetectionParams | None = Field(
|
||||
default=None,
|
||||
description="Parameters for detecting repetitive N-gram patterns "
|
||||
"in output tokens. If such repetition is detected, generation will "
|
||||
"be ended early. LLMs can sometimes generate repetitive, unhelpful "
|
||||
"token patterns, stopping only when they hit the maximum output length "
|
||||
"(e.g. 'abcdabcdabcd...' or '\emoji \emoji \emoji ...'). This feature "
|
||||
"can detect such behavior and terminate early, saving time and tokens.",
|
||||
)
|
||||
|
||||
# --8<-- [end:completion-extra-params]
|
||||
|
||||
def build_tok_params(self, model_config: ModelConfig) -> TokenizeParams:
|
||||
@@ -259,7 +270,7 @@ class CompletionRequest(OpenAIBaseModel):
|
||||
structured_outputs_kwargs["json"] = json_schema.json_schema
|
||||
elif response_format.type == "structural_tag":
|
||||
structural_tag = response_format
|
||||
assert structural_tag is not None and isinstance(
|
||||
assert isinstance(
|
||||
structural_tag,
|
||||
(
|
||||
LegacyStructuralTagResponseFormat,
|
||||
@@ -302,7 +313,6 @@ class CompletionRequest(OpenAIBaseModel):
|
||||
skip_special_tokens=self.skip_special_tokens,
|
||||
spaces_between_special_tokens=self.spaces_between_special_tokens,
|
||||
include_stop_str_in_output=self.include_stop_str_in_output,
|
||||
truncate_prompt_tokens=self.truncate_prompt_tokens,
|
||||
output_kind=RequestOutputKind.DELTA
|
||||
if self.stream
|
||||
else RequestOutputKind.FINAL_ONLY,
|
||||
@@ -311,8 +321,37 @@ class CompletionRequest(OpenAIBaseModel):
|
||||
allowed_token_ids=self.allowed_token_ids,
|
||||
extra_args=extra_args or None,
|
||||
skip_clone=True, # Created fresh per request, safe to skip clone
|
||||
repetition_detection=self.repetition_detection,
|
||||
)
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_response_format(cls, data):
|
||||
response_format = data.get("response_format")
|
||||
if response_format is None:
|
||||
return data
|
||||
|
||||
rf_type = (
|
||||
response_format.get("type")
|
||||
if isinstance(response_format, dict)
|
||||
else getattr(response_format, "type", None)
|
||||
)
|
||||
|
||||
if rf_type == "json_schema":
|
||||
json_schema = (
|
||||
response_format.get("json_schema")
|
||||
if isinstance(response_format, dict)
|
||||
else getattr(response_format, "json_schema", None)
|
||||
)
|
||||
if json_schema is None:
|
||||
raise VLLMValidationError(
|
||||
"When response_format type is 'json_schema', the "
|
||||
"'json_schema' field must be provided.",
|
||||
parameter="response_format",
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def check_structured_outputs_count(cls, data):
|
||||
|
||||
@@ -62,11 +62,6 @@ from vllm.entrypoints.openai.speech_to_text.protocol import (
|
||||
TranscriptionResponse,
|
||||
TranslationRequest,
|
||||
)
|
||||
from vllm.entrypoints.pooling.classify.protocol import (
|
||||
ClassificationChatRequest,
|
||||
ClassificationCompletionRequest,
|
||||
ClassificationResponse,
|
||||
)
|
||||
from vllm.entrypoints.pooling.embed.protocol import (
|
||||
EmbeddingBytesResponse,
|
||||
EmbeddingChatRequest,
|
||||
@@ -161,7 +156,6 @@ CompletionLikeRequest: TypeAlias = (
|
||||
| TokenizeCompletionRequest
|
||||
| DetokenizeRequest
|
||||
| EmbeddingCompletionRequest
|
||||
| ClassificationCompletionRequest
|
||||
| RerankRequest
|
||||
| ScoreRequest
|
||||
| PoolingCompletionRequest
|
||||
@@ -171,7 +165,6 @@ ChatLikeRequest: TypeAlias = (
|
||||
ChatCompletionRequest
|
||||
| TokenizeChatRequest
|
||||
| EmbeddingChatRequest
|
||||
| ClassificationChatRequest
|
||||
| PoolingChatRequest
|
||||
)
|
||||
|
||||
@@ -194,12 +187,10 @@ AnyResponse: TypeAlias = (
|
||||
| TranscriptionResponse
|
||||
| TokenizeResponse
|
||||
| PoolingResponse
|
||||
| ClassificationResponse
|
||||
| ScoreResponse
|
||||
| GenerateResponse
|
||||
)
|
||||
|
||||
|
||||
RequestT = TypeVar("RequestT", bound=AnyRequest)
|
||||
|
||||
|
||||
@@ -223,8 +214,8 @@ class ServeContext(Generic[RequestT]):
|
||||
|
||||
class OpenAIServing:
|
||||
request_id_prefix: ClassVar[str] = """
|
||||
A short string prepended to every request’s ID (e.g. "embd", "classify")
|
||||
so you can easily tell “this ID came from Embedding vs Classification.”
|
||||
A short string prepended to every request’s ID (e.g. "embd")
|
||||
so you can easily tell “this ID came from Embedding.”
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -456,7 +447,7 @@ class OpenAIServing:
|
||||
) -> ErrorResponse | None:
|
||||
"""
|
||||
Default preprocessing hook. Subclasses may override
|
||||
to prepare `ctx` (classification, embedding, etc.).
|
||||
to prepare `ctx` (embedding, etc.).
|
||||
"""
|
||||
return None
|
||||
|
||||
@@ -817,7 +808,7 @@ class OpenAIServing:
|
||||
token_num = len(input_ids)
|
||||
max_model_len = self.model_config.max_model_len
|
||||
|
||||
# Note: EmbeddingRequest, ClassificationRequest,
|
||||
# Note: EmbeddingRequest,
|
||||
# and ScoreRequest doesn't have max_tokens
|
||||
if isinstance(
|
||||
request,
|
||||
@@ -828,8 +819,6 @@ class OpenAIServing:
|
||||
ScoreTextRequest,
|
||||
ScoreQueriesDocumentsRequest,
|
||||
RerankRequest,
|
||||
ClassificationCompletionRequest,
|
||||
ClassificationChatRequest,
|
||||
),
|
||||
):
|
||||
# Note: input length can be up to the entire model context length
|
||||
@@ -839,8 +828,6 @@ class OpenAIServing:
|
||||
ScoreDataRequest: "score",
|
||||
ScoreTextRequest: "score",
|
||||
ScoreQueriesDocumentsRequest: "score",
|
||||
ClassificationCompletionRequest: "classification",
|
||||
ClassificationChatRequest: "classification",
|
||||
}
|
||||
operation = operations.get(type(request), "embedding generation")
|
||||
raise VLLMValidationError(
|
||||
|
||||
@@ -328,8 +328,9 @@ class ResponsesRequest(OpenAIBaseModel):
|
||||
# Also check text.format for OpenAI-style json_schema
|
||||
if self.text is not None and self.text.format is not None:
|
||||
if structured_outputs is not None:
|
||||
raise ValueError(
|
||||
"Cannot specify both structured_outputs and text.format"
|
||||
raise VLLMValidationError(
|
||||
"Cannot specify both structured_outputs and text.format",
|
||||
parameter="structured_outputs",
|
||||
)
|
||||
response_format = self.text.format
|
||||
if (
|
||||
@@ -378,14 +379,19 @@ class ResponsesRequest(OpenAIBaseModel):
|
||||
)
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_background(cls, data):
|
||||
if not data.get("background"):
|
||||
return data
|
||||
if not data.get("store", True):
|
||||
raise ValueError("background can only be used when `store` is true")
|
||||
raise VLLMValidationError(
|
||||
"background can only be used when `store` is true",
|
||||
parameter="background",
|
||||
)
|
||||
return data
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_prompt(cls, data):
|
||||
if data.get("prompt") is not None:
|
||||
raise VLLMValidationError(
|
||||
@@ -394,16 +400,19 @@ class ResponsesRequest(OpenAIBaseModel):
|
||||
return data
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def check_cache_salt_support(cls, data):
|
||||
if data.get("cache_salt") is not None and (
|
||||
not isinstance(data["cache_salt"], str) or not data["cache_salt"]
|
||||
):
|
||||
raise ValueError(
|
||||
"Parameter 'cache_salt' must be a non-empty string if provided."
|
||||
raise VLLMValidationError(
|
||||
"Parameter 'cache_salt' must be a non-empty string if provided.",
|
||||
parameter="cache_salt",
|
||||
)
|
||||
return data
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def function_call_parsing(cls, data):
|
||||
"""Parse function_call dictionaries into ResponseFunctionToolCall objects.
|
||||
This ensures Pydantic can properly resolve union types in the input field.
|
||||
|
||||
@@ -85,6 +85,8 @@ from vllm.entrypoints.openai.responses.protocol import (
|
||||
ResponseCreatedEvent,
|
||||
ResponseInProgressEvent,
|
||||
ResponseInputOutputMessage,
|
||||
ResponseReasoningPartAddedEvent,
|
||||
ResponseReasoningPartDoneEvent,
|
||||
ResponsesRequest,
|
||||
ResponsesResponse,
|
||||
ResponseUsage,
|
||||
@@ -1339,6 +1341,19 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
),
|
||||
)
|
||||
)
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseReasoningPartAddedEvent(
|
||||
type="response.reasoning_part.added",
|
||||
sequence_number=-1,
|
||||
output_index=current_output_index,
|
||||
item_id=current_item_id,
|
||||
content_index=current_content_index,
|
||||
part=ResponseReasoningTextContent(
|
||||
text="",
|
||||
type="reasoning_text",
|
||||
),
|
||||
)
|
||||
)
|
||||
else:
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseOutputItemAddedEvent(
|
||||
@@ -1354,22 +1369,21 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
),
|
||||
)
|
||||
)
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseContentPartAddedEvent(
|
||||
type="response.content_part.added",
|
||||
sequence_number=-1,
|
||||
output_index=current_output_index,
|
||||
item_id=current_item_id,
|
||||
content_index=current_content_index,
|
||||
part=ResponseOutputText(
|
||||
type="output_text",
|
||||
text="",
|
||||
annotations=[],
|
||||
logprobs=[],
|
||||
),
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseContentPartAddedEvent(
|
||||
type="response.content_part.added",
|
||||
sequence_number=-1,
|
||||
output_index=current_output_index,
|
||||
item_id=current_item_id,
|
||||
content_index=current_content_index,
|
||||
part=ResponseOutputText(
|
||||
type="output_text",
|
||||
text="",
|
||||
annotations=[],
|
||||
logprobs=[],
|
||||
),
|
||||
)
|
||||
)
|
||||
)
|
||||
current_content_index += 1
|
||||
first_delta_sent = True
|
||||
# todo(kebe7jun) tool call support
|
||||
|
||||
@@ -1397,6 +1411,19 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
text=reason_content,
|
||||
)
|
||||
)
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseReasoningPartDoneEvent(
|
||||
type="response.reasoning_part.done",
|
||||
sequence_number=-1,
|
||||
item_id=current_item_id,
|
||||
output_index=current_output_index,
|
||||
content_index=current_content_index,
|
||||
part=ResponseReasoningTextContent(
|
||||
text=reason_content,
|
||||
type="reasoning_text",
|
||||
),
|
||||
)
|
||||
)
|
||||
current_content_index = 0
|
||||
reasoning_item = ResponseReasoningItem(
|
||||
type="reasoning",
|
||||
@@ -1418,6 +1445,8 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
item=reasoning_item,
|
||||
)
|
||||
)
|
||||
current_output_index += 1
|
||||
current_item_id = str(uuid.uuid4())
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseOutputItemAddedEvent(
|
||||
type="response.output_item.added",
|
||||
@@ -1432,8 +1461,6 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
),
|
||||
)
|
||||
)
|
||||
current_output_index += 1
|
||||
current_item_id = str(uuid.uuid4())
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseContentPartAddedEvent(
|
||||
type="response.content_part.added",
|
||||
@@ -1449,7 +1476,6 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
),
|
||||
)
|
||||
)
|
||||
current_content_index += 1
|
||||
# reset previous delta messages
|
||||
previous_delta_messages = []
|
||||
|
||||
@@ -1485,7 +1511,6 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
),
|
||||
)
|
||||
)
|
||||
current_content_index += 1
|
||||
|
||||
previous_delta_messages.append(delta_message)
|
||||
if previous_delta_messages:
|
||||
@@ -1505,7 +1530,19 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
text=reason_content,
|
||||
)
|
||||
)
|
||||
current_content_index += 1
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseReasoningPartDoneEvent(
|
||||
type="response.reasoning_part.done",
|
||||
sequence_number=-1,
|
||||
item_id=current_item_id,
|
||||
output_index=current_output_index,
|
||||
content_index=current_content_index,
|
||||
part=ResponseReasoningTextContent(
|
||||
text=reason_content,
|
||||
type="reasoning_text",
|
||||
),
|
||||
)
|
||||
)
|
||||
reasoning_item = ResponseReasoningItem(
|
||||
type="reasoning",
|
||||
content=[
|
||||
@@ -1543,7 +1580,6 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
item_id=current_item_id,
|
||||
)
|
||||
)
|
||||
current_content_index += 1
|
||||
part = ResponseOutputText(
|
||||
text=final_content,
|
||||
type="output_text",
|
||||
@@ -1559,7 +1595,6 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
part=part,
|
||||
)
|
||||
)
|
||||
current_content_index += 1
|
||||
item = ResponseOutputMessage(
|
||||
type="message",
|
||||
role="assistant",
|
||||
|
||||
@@ -11,6 +11,7 @@ from typing import Final, Literal, TypeAlias, TypeVar, cast
|
||||
|
||||
import numpy as np
|
||||
from fastapi import Request
|
||||
from soundfile import LibsndfileError
|
||||
from transformers import PreTrainedTokenizerBase
|
||||
|
||||
import vllm.envs as envs
|
||||
@@ -57,6 +58,14 @@ try:
|
||||
except ImportError:
|
||||
librosa = PlaceholderModule("librosa") # type: ignore[assignment]
|
||||
|
||||
# Public libsndfile error codes exposed via `soundfile.LibsndfileError.code`, soundfile
|
||||
# being librosa's main backend. Used to validate if an audio loading error is due to a
|
||||
# server error vs a client error (invalid audio file).
|
||||
# 1 = unrecognised format (file is not a supported audio container)
|
||||
# 3 = malformed file (corrupt or structurally invalid audio)
|
||||
# 4 = unsupported encoding (codec not supported by this libsndfile build)
|
||||
_BAD_SF_CODES = {1, 3, 4}
|
||||
|
||||
SpeechToTextResponse: TypeAlias = TranscriptionResponse | TranslationResponse
|
||||
SpeechToTextResponseVerbose: TypeAlias = (
|
||||
TranscriptionResponseVerbose | TranslationResponseVerbose
|
||||
@@ -315,9 +324,15 @@ class OpenAISpeechToText(OpenAIServing):
|
||||
)
|
||||
|
||||
with io.BytesIO(audio_data) as bytes_:
|
||||
# NOTE resample to model SR here for efficiency. This is also a
|
||||
# pre-requisite for chunking, as it assumes Whisper SR.
|
||||
y, sr = librosa.load(bytes_, sr=self.asr_config.sample_rate)
|
||||
try:
|
||||
# NOTE resample to model SR here for efficiency. This is also a
|
||||
# pre-requisite for chunking, as it assumes Whisper SR.
|
||||
y, sr = librosa.load(bytes_, sr=self.asr_config.sample_rate)
|
||||
except LibsndfileError as exc:
|
||||
# Distinguish client errors (invalid audio) from server errors
|
||||
if exc.code in _BAD_SF_CODES:
|
||||
raise ValueError("Invalid or unsupported audio file.") from exc
|
||||
raise
|
||||
|
||||
duration = librosa.get_duration(y=y, sr=sr)
|
||||
do_split_audio = (
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"The 'vllm.entrypoints.openai.translations' module has been renamed to "
|
||||
"'vllm.entrypoints.openai.speech_to_text'. Please update your imports. "
|
||||
"This backward-compatible alias will be removed in version 0.17+.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
@@ -1,14 +0,0 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"'vllm.entrypoints.openai.translations.api_router' has been moved to "
|
||||
"'vllm.entrypoints.openai.speech_to_text.api_router'. Please update your "
|
||||
"imports. This backward-compatible alias will be removed in version 0.17+.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
from vllm.entrypoints.openai.speech_to_text.api_router import * # noqa: F401,F403,E402
|
||||
@@ -1,14 +0,0 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"'vllm.entrypoints.openai.translations.protocol' has been moved to "
|
||||
"'vllm.entrypoints.openai.speech_to_text.protocol'. Please update your "
|
||||
"imports. This backward-compatible alias will be removed in version 0.17+.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
from vllm.entrypoints.openai.speech_to_text.protocol import * # noqa: F401,F403,E402
|
||||
@@ -1,14 +0,0 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"'vllm.entrypoints.openai.translations.serving' has been moved to "
|
||||
"'vllm.entrypoints.openai.speech_to_text.serving'. Please update your "
|
||||
"imports. This backward-compatible alias will be removed in version 0.17+.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
from vllm.entrypoints.openai.speech_to_text.serving import * # noqa: F401,F403,E402
|
||||
@@ -1,15 +0,0 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"'vllm.entrypoints.openai.translations.speech_to_text' has been moved to "
|
||||
"'vllm.entrypoints.openai.speech_to_text.speech_to_text'. Please update "
|
||||
"your imports. This backward-compatible alias will be removed in version "
|
||||
"0.17+.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
from vllm.entrypoints.openai.speech_to_text.speech_to_text import * # noqa: F401,F403,E402
|
||||
Reference in New Issue
Block a user