Upgrade to vllm 0.17.0 corex v4.1 overlay

This commit is contained in:
2026-04-29 19:38:22 +08:00
parent 8fac6062e4
commit 938d0854a5
430 changed files with 35969 additions and 14511 deletions

View File

@@ -7,9 +7,9 @@ from transformers import AutoTokenizer
from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
from . import TokenizerLike
from .deepseek_v32_encoding import encode_messages
from .hf import HfTokenizer, get_cached_tokenizer
from .protocol import TokenizerLike
def get_deepseek_v32_tokenizer(tokenizer: HfTokenizer) -> HfTokenizer:

View File

@@ -0,0 +1,67 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import copy
import unicodedata
from collections.abc import Collection, Set
from transformers import AutoTokenizer
from .hf import HfTokenizer, get_cached_tokenizer
from .protocol import TokenizerLike
def get_qwen_vl_tokenizer(tokenizer: HfTokenizer) -> HfTokenizer:
"""
The logic of adding image pad tokens should only be applied in
`QwenVLProcessor`, so they are patched out here.
The definition of the wrapped tokenizer can be found here:
https://huggingface.co/Qwen/Qwen-VL/blob/main/tokenization_qwen.py
"""
new_tokenizer = copy.copy(tokenizer)
class TokenizerWithoutImagePad(tokenizer.__class__): # type: ignore
def tokenize(
self,
text: str,
allowed_special: Set[str] | str = "all",
disallowed_special: Collection[str] | str = (),
**kwargs,
) -> list[bytes | str]:
text = unicodedata.normalize("NFC", text)
return [
self.decoder[t]
for t in self.tokenizer.encode(
text,
allowed_special=allowed_special,
disallowed_special=disallowed_special,
)
]
def _decode(
self,
token_ids: int | list[int],
skip_special_tokens: bool = False,
errors: str | None = None,
**kwargs,
) -> str:
if isinstance(token_ids, int):
token_ids = [token_ids]
return self.tokenizer.decode(
token_ids,
errors=errors or self.errors,
)
TokenizerWithoutImagePad.__name__ = f"{tokenizer.__class__.__name__}WithoutImagePad"
new_tokenizer.__class__ = TokenizerWithoutImagePad
return new_tokenizer
class QwenVLTokenizer(TokenizerLike):
@classmethod
def from_pretrained(cls, *args, **kwargs) -> HfTokenizer:
tokenizer = AutoTokenizer.from_pretrained(*args, **kwargs)
return get_cached_tokenizer(get_qwen_vl_tokenizer(tokenizer))

View File

@@ -36,6 +36,7 @@ _VLLM_TOKENIZERS = {
"grok2": ("grok2", "Grok2Tokenizer"),
"hf": ("hf", "CachedHfTokenizer"),
"mistral": ("mistral", "MistralTokenizer"),
"qwen_vl": ("qwen_vl", "QwenVLTokenizer"),
}
@@ -165,6 +166,10 @@ def resolve_tokenizer_args(
):
tokenizer_mode = "grok2"
# Model-specific tokenizers
if tokenizer_mode == "auto" and "/Qwen-VL" in str(tokenizer_name):
tokenizer_mode = "qwen_vl"
# Fallback to HF tokenizer
if tokenizer_mode == "auto":
tokenizer_mode = "hf"