Files
2026-01-19 10:38:50 +08:00

128 lines
3.8 KiB
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import warnings
from typing import Any
from typing_extensions import deprecated
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
logger = init_logger(__name__)
def __getattr__(name: str):
if name == "AnyTokenizer":
warnings.warn(
"`vllm.transformers_utils.tokenizer.AnyTokenizer` has been moved to "
"`vllm.tokenizers.TokenizerLike`. "
"The old name will be removed in v0.14.",
DeprecationWarning,
stacklevel=2,
)
return TokenizerLike
if name == "get_tokenizer":
from vllm.tokenizers import get_tokenizer
warnings.warn(
"`vllm.transformers_utils.tokenizer.get_tokenizer` "
"has been moved to `vllm.tokenizers.get_tokenizer`. "
"The old name will be removed in v0.14.",
DeprecationWarning,
stacklevel=2,
)
return get_tokenizer
if name == "cached_get_tokenizer":
from vllm.tokenizers import cached_get_tokenizer
warnings.warn(
"`vllm.transformers_utils.tokenizer.cached_get_tokenizer` "
"has been moved to `vllm.tokenizers.cached_get_tokenizer`. "
"The old name will be removed in v0.14.",
DeprecationWarning,
stacklevel=2,
)
return cached_get_tokenizer
if name == "cached_tokenizer_from_config":
from vllm.tokenizers import cached_tokenizer_from_config
warnings.warn(
"`vllm.transformers_utils.tokenizer.cached_tokenizer_from_config` "
"has been moved to `vllm.tokenizers.cached_tokenizer_from_config`. "
"The old name will be removed in v0.14.",
DeprecationWarning,
stacklevel=2,
)
return cached_tokenizer_from_config
if name == "init_tokenizer_from_configs":
from vllm.tokenizers import cached_tokenizer_from_config
warnings.warn(
"`vllm.transformers_utils.tokenizer.init_tokenizer_from_configs` "
"has been moved to `vllm.tokenizers.cached_tokenizer_from_config`. "
"The old name will be removed in v0.14.",
DeprecationWarning,
stacklevel=2,
)
return cached_tokenizer_from_config
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@deprecated("Will be removed in v0.14. Please use `tokenizer.decode()` instead.")
def decode_tokens(
tokenizer: TokenizerLike,
token_ids: list[int],
*,
skip_special_tokens: bool | None = None,
) -> str:
"""
Backend-agnostic equivalent of HF's
`tokenizer.decode(token_ids, ...)`.
`skip_special_tokens=None` means to use the backend's default
settings.
"""
kw_args: dict[str, Any] = {}
if skip_special_tokens is not None:
kw_args["skip_special_tokens"] = skip_special_tokens
return tokenizer.decode(token_ids, **kw_args)
@deprecated("Will be removed in v0.14. Please use `tokenizer.encode()` instead.")
def encode_tokens(
tokenizer: TokenizerLike,
text: str,
*,
truncation: bool | None = None,
max_length: int | None = None,
add_special_tokens: bool | None = None,
) -> list[int]:
"""
Backend-agnostic equivalent of HF's
`tokenizer.encode(text, ...)`.
`add_special_tokens=None` means to use the backend's default
settings.
"""
kw_args: dict[str, Any] = {}
if max_length is not None:
kw_args["max_length"] = max_length
if truncation is not None:
kw_args["truncation"] = truncation
if add_special_tokens is not None:
kw_args["add_special_tokens"] = add_special_tokens
return tokenizer.encode(text, **kw_args)