init src 0.9.2

2026-01-09 15:09:53 +08:00
parent 0eb2c0a4b3
commit 41d98d4359
1438 changed files with 417605 additions and 683 deletions
--- a/vllm/transformers_utils/tokenizers/init.py
+++ b/vllm/transformers_utils/tokenizers/init.py
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from .mistral import (MistralTokenizer, maybe_serialize_tool_calls,
+                      truncate_tool_call_ids, validate_request_params)
+from vllm.transformers_utils.tokenizers.cpm_9g import CPM9GTokenizer
+
+__all__ = [
+    "MistralTokenizer", "maybe_serialize_tool_calls", "truncate_tool_call_ids",
+    "validate_request_params",
+    "CPM9GTokenizer"
+]
--- a/vllm/transformers_utils/tokenizers/cpm_9g.py
+++ b/vllm/transformers_utils/tokenizers/cpm_9g.py
@@ -0,0 +1,483 @@
+import io
+import json
+import os
+from shutil import copyfile
+from typing import Any, Dict, IO, List, Optional, Tuple
+
+# import pkg_resources
+import sentencepiece as spm
+from pytrie import StringTrie
+from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer
+from transformers.utils import logging
+
+logger = logging.get_logger(__name__)
+
+VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
+
+PRETRAINED_VOCAB_FILES_MAP = {
+    "vocab_file": {},
+    "tokenizer_file": {},
+}
+PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {}
+
+
+class CPM9GTokenizer(PreTrainedTokenizer):
+    """
+    CPM9G 分词器类。用于基于字节对编码的分词。
+
+    参数:
+        path (str, 可选): 词汇表文件的路径。
+    """
+
+    vocab_files_names = VOCAB_FILES_NAMES
+    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
+    max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
+    model_input_names = ["input_ids", "attention_mask"]
+
+    def __init__(
+        self,
+        vocab_file: Optional[str] = None,
+        unk_token: str = "<unk>",
+        bos_token: str = "<s>",
+        eos_token: str = "</s>",
+        pad_token: Optional[str] = None,
+        sp_model_kwargs: Optional[Dict[str, Any]] = None,
+        add_bos_token: bool = True,
+        add_eos_token: bool = False,
+        clean_up_tokenization_spaces: bool = False,
+        **kwargs,
+    ):
+        self.sp_model_kwargs = sp_model_kwargs or {}
+        self.vocab_file = vocab_file
+        self.add_bos_token = add_bos_token
+        self.add_eos_token = add_eos_token
+
+        self.unk_token = unk_token
+        self.bos_token = bos_token
+        self.eos_token = eos_token
+        self.pad_token = pad_token
+
+        self.byte_list: List[str] = (
+            [f"<0x0{hex(i).upper()[2:]}>" for i in range(0x10)] +
+            [f"<0x{hex(i).upper()[2:]}>" for i in range(0x10, 0x100)]
+        )
+
+        self._special_token_set = set([self.unk_token, self.bos_token, self.eos_token] + self.byte_list)
+
+        if vocab_file:
+            if 'vocab.txt' not in vocab_file:
+                all_tokens = self.load_vocab(io.FileIO(os.path.join(vocab_file, VOCAB_FILES_NAMES['vocab_file']), "rb"))
+        else:
+            all_tokens = self.load_vocab(io.FileIO(VOCAB_FILES_NAMES['vocab_file'], "rb"))
+
+        self.encoder: Dict[str, int] = {}
+        self._special_encoder: Dict[str, int] = {}
+        for token, token_id in all_tokens.items():
+            if token in self._special_token_set:
+                self._special_encoder[token] = token_id
+            else:
+                self.encoder[token] = token_id
+
+        self.decoder = {v: k for k, v in self.encoder.items()}
+        self._byte_decoder = {self._special_encoder[token]: i for i, token in enumerate(self.byte_list)}
+
+        self._max_word_len = max([len(x) for x in self.encoder.keys()])
+
+        self._len_word_first = {}
+        for x in self.encoder.keys():
+            if not x[0] in self._len_word_first:
+                self._len_word_first[x[0]] = 1
+            if len(x) > self._len_word_first[x[0]]:
+                self._len_word_first[x[0]] = len(x)
+        self.tencoder = StringTrie(self.encoder)
+        
+        self._max_token_id = self.vocab_size - 1
+
+        super().__init__(
+            bos_token=AddedToken(bos_token, lstrip=False, rstrip=False),
+            eos_token=AddedToken(eos_token, lstrip=False, rstrip=False),
+            unk_token=AddedToken(unk_token, lstrip=False, rstrip=False),
+            pad_token=AddedToken(pad_token, lstrip=False, rstrip=False) if pad_token else None,
+            add_bos_token=add_bos_token,
+            add_eos_token=add_eos_token,
+            sp_model_kwargs=self.sp_model_kwargs,
+            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
+            **kwargs,
+        )
+
+    def __getstate__(self) -> Dict[str, Any]:
+        state = self.__dict__.copy()
+        state["sp_model"] = None
+        return state
+
+    def __setstate__(self, d: Dict[str, Any]) -> None:
+        self.__dict__ = d
+
+    def load_vocab(self, fp: IO[bytes]) -> Dict[str, int]:
+        """
+        加载词汇表文件到字典中。
+
+        参数:
+            fp (IO[bytes]): 词汇表文件指针。
+
+        返回:
+            Dict[str, int]: 词汇表字典。
+        """
+        vocab: Dict[str, int] = {}
+        reader = io.TextIOWrapper(fp, encoding="utf-8")
+        for token in reader.readlines():
+            token = token.strip()
+            if len(token) == 0:
+                continue
+            token = json.loads(token)
+            vocab[token] = len(vocab)
+        return vocab
+
+    @property
+    def vocab_size(self) -> int:
+        """返回词汇表大小"""
+        return len(self.encoder) + len(self._special_encoder)
+    
+    @property
+    def max_token_id(self) -> int:
+        return self._max_token_id
+
+    @property
+    def eos_id(self):
+        return self._special_encoder[self.eos_token]
+
+    @property
+    def bos_id(self):
+        return self._special_encoder[self.bos_token]
+
+    @property
+    def unk_id(self):
+        return self._special_encoder[self.unk_token]
+        
+    def get_vocab(self) -> Dict[str, int]:
+        """返回词汇表作为字典"""
+        vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
+        vocab.update(self.added_tokens_encoder)
+        return vocab
+
+    def _tokenize(self, text: str) -> List[str]:
+        """返回分词后的字符串"""
+        output_tokens: List[str] = []
+        st = 0
+        while st < len(text):
+            piece = self.get_piece(text[st:])
+            output_tokens.append(piece)
+            st += len(piece)
+        return output_tokens
+
+    def _convert_token_to_id(self, token: str) -> int:
+        """使用词汇表将标记（字符串）转换为 id"""
+        return self.encoder.get(token, self.unk_id)
+
+    def _convert_id_to_token(self, index: int) -> str:
+        """使用词汇表将索引（整数）转换为标记（字符串）"""
+        return self.decoder.get(index, self.unk_token)
+
+    def convert_tokens_to_string(self, tokens: List[str]) -> str:
+        """将标记序列（字符串）转换为单个字符串"""
+        current_sub_tokens: List[str] = []
+        out_string = ""
+        prev_is_special = False
+        for i, token in enumerate(tokens):
+            if token in self._special_token_set:
+                if not prev_is_special and i != 0:
+                    out_string += " "
+                out_string += self.decode(current_sub_tokens) + token
+                prev_is_special = True
+                current_sub_tokens = []
+            else:
+                current_sub_tokens.append(token)
+                prev_is_special = False
+        out_string += self.sp_model.decode(current_sub_tokens)
+        return out_string
+
+    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
+        """
+        保存词汇表和特殊标记文件到目录。
+
+        参数:
+            save_directory (str): 要保存词汇表的目录。
+
+        返回:
+            Tuple[str]: 保存的文件路径。
+        """
+        if not os.path.isdir(save_directory):
+            raise ValueError(f"Vocabulary path ({save_directory}) should be a directory")
+
+        out_vocab_file = os.path.join(
+            save_directory,
+            (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"],
+        )
+
+        if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
+            copyfile(self.vocab_file, out_vocab_file)
+        elif not os.path.isfile(self.vocab_file):
+            with open(out_vocab_file, "wb") as fi:
+                fi.write(self.sp_model.serialized_model_proto())
+
+        return (out_vocab_file, )
+
+    def build_inputs_with_special_tokens(
+        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
+    ) -> List[int]:
+        bos_token_id = [self.bos_token_id] if self.add_bos_token else []
+        eos_token_id = [self.eos_token_id] if self.add_eos_token else []
+
+        output = bos_token_id + token_ids_0 + eos_token_id
+
+        if token_ids_1 is not None:
+            output = output + bos_token_id + token_ids_1 + eos_token_id
+
+        return output
+
+    def get_special_tokens_mask(
+        self, 
+        token_ids_0: List[int], 
+        token_ids_1: Optional[List[int]] = None, 
+        already_has_special_tokens: bool = False
+    ) -> List[int]:
+        """
+        获取从未添加特殊标记的标记列表中检索到的序列 id。
+        在使用分词器的 `prepare_for_model` 方法添加特殊标记时调用此方法。
+
+        参数:
+            token_ids_0 (List[int]): id 列表。
+            token_ids_1 (List[int], 可选): 序列对的可选第二 id 列表。
+            already_has_special_tokens (bool, 可选, 默认值为 False): 
+                标记列表是否已使用模型的特殊标记进行格式化。
+
+        返回:
+            List[int]: 一个包含整数（0 或 1）的列表。1 表示特殊标记，0 表示序列标记。
+        """
+        if already_has_special_tokens:
+            return super().get_special_tokens_mask(
+                token_ids_0=token_ids_0,
+                token_ids_1=token_ids_1,
+                already_has_special_tokens=True,
+            )
+
+        bos_token_id = [1] if self.add_bos_token else []
+        eos_token_id = [1] if self.add_eos_token else []
+
+        if token_ids_1 is None:
+            return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
+        return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id + bos_token_id + ([0] * len(token_ids_1)) + eos_token_id
+
+    def create_token_type_ids_from_sequences(
+        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
+    ) -> List[int]:
+        """
+        从传递的两个序列创建掩码，用于序列对分类任务。
+
+        参数:
+            token_ids_0 (List[int]): id 列表。
+            token_ids_1 (List[int], 可选): 序列对的可选第二 id 列表。
+
+        返回:
+            List[int]: 根据给定序列的标记类型 id 列表。
+        """
+        bos_token_id = [self.bos_token_id] if self.add_bos_token else []
+        eos_token_id = [self.eos_token_id] if self.add_eos_token else []
+
+        output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)
+
+        if token_ids_1 is not None:
+            output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)
+
+        return output
+
+    def get_piece(self, text: str) -> str:
+        """
+        获取文本中的分词片段。
+
+        参数:
+            text (str): 输入文本。
+
+        返回:
+            str: 分词片段。
+        """
+        if text[0] in self._len_word_first:
+            text = text[: self._len_word_first[text[0]]]
+            len_text = len(text)
+            for i in range(len(text)):
+                sub = text[: len_text - i]
+                if sub in self.encoder:
+                    return sub
+        return text[0]
+
+
+    def encode(self, text: str) -> List[int]:
+        """
+        将文本编码为 ID 列表。
+
+        参数:
+            text (str): 输入文本。
+
+        返回:
+            List[int]: 编码后的 ID 列表。
+        """
+        #if len(text) > 20480:
+        #    return [0 for _ in range(20480)]
+        ret = []
+        for x in self._tokenize(text):
+            if x in self.encoder:
+                ret.append(self.encoder[x])
+            else:
+                ret.extend(self._encode_unicode(x))
+        return ret
+
+
+    def decode_all(self, tokens: List[int]):
+        """Decode ids into a string."""
+        ret = []
+        st = 0
+
+        while st < len(tokens):
+            if tokens[st] in self.decoder:
+                ret.append(self.decoder[tokens[st]])
+                st += 1
+            elif tokens[st] in self._byte_decoder:
+                if (
+                    st + 3 < len(tokens)
+                    and tokens[st + 1] in self._byte_decoder
+                    and tokens[st + 2] in self._byte_decoder
+                    and tokens[st + 3] in self._byte_decoder
+                ):
+                    first_id = self._byte_decoder[tokens[st]]
+                    plane_id = self._byte_decoder[tokens[st + 1]]
+                    row_id = self._byte_decoder[tokens[st + 2]]
+                    cell_id = self._byte_decoder[tokens[st + 3]]
+                    ret.append(
+                        int.to_bytes(first_id << 24 | plane_id << 16 | row_id << 8 | cell_id, 4, "big").decode("utf-8")
+                    )
+                    st += 4
+                elif (
+                    st + 2 < len(tokens)
+                    and tokens[st + 1] in self._byte_decoder
+                    and tokens[st + 2] in self._byte_decoder
+                ):
+                    plane_id = self._byte_decoder[tokens[st]]
+                    row_id = self._byte_decoder[tokens[st + 1]]
+                    cell_id = self._byte_decoder[tokens[st + 2]]
+                    ret.append(int.to_bytes(plane_id << 16 | row_id << 8 | cell_id, 3, "big").decode("utf-8"))
+                    st += 3
+                elif st + 1 < len(tokens) and tokens[st + 1] in self._byte_decoder:
+                    row_id = self._byte_decoder[tokens[st]]
+                    cell_id = self._byte_decoder[tokens[st + 1]]
+                    ret.append(int.to_bytes(row_id << 8 | cell_id, 2, "big").decode("utf-8"))
+                    st += 2
+                else:
+                    cell_id = self._byte_decoder[tokens[st]]
+                    ret.append(int.to_bytes(cell_id, 1, "big").decode("utf-8"))
+                    st += 1
+            elif tokens[st] == self.eos_id:
+                ret.append(self.eos_token)
+                st += 1
+            elif tokens[st] == self.bos_id:
+                ret.append(self.bos_token)
+                st += 1
+            else:
+                ret.append(self.unk_token)
+                st += 1
+        return "".join(ret)
+    
+    def decode(self, tokens: List[int]) -> str:
+        """
+        将 ID 列表解码为字符串。
+
+        参数:
+            tokens (List[int]): ID 列表。
+
+        返回:
+            str: 解码后的字符串。
+        """
+        ret = []
+        st = 0
+
+        while st < len(tokens):
+            if tokens[st] in self._byte_decoder:
+                if (
+                    st + 3 < len(tokens)
+                    and tokens[st + 1] in self._byte_decoder
+                    and tokens[st + 2] in self._byte_decoder
+                    and tokens[st + 3] in self._byte_decoder
+                ):
+                    first_id = self._byte_decoder[tokens[st]]
+                    plane_id = self._byte_decoder[tokens[st + 1]]
+                    row_id = self._byte_decoder[tokens[st + 2]]
+                    cell_id = self._byte_decoder[tokens[st + 3]]
+                    ret.append(
+                        int.to_bytes(first_id << 24 | plane_id << 16 | row_id << 8 | cell_id, 4, "big").decode("utf-8")
+                    )
+                    st += 4
+                elif (
+                    st + 2 < len(tokens)
+                    and tokens[st + 1] in self._byte_decoder
+                    and tokens[st + 2] in self._byte_decoder
+                ):
+                    plane_id = self._byte_decoder[tokens[st]]
+                    row_id = self._byte_decoder[tokens[st + 1]]
+                    cell_id = self._byte_decoder[tokens[st + 2]]
+                    ret.append(int.to_bytes(plane_id << 16 | row_id << 8 | cell_id, 3, "big").decode("utf-8"))
+                    st += 3
+                elif st + 1 < len(tokens) and tokens[st + 1] in self._byte_decoder:
+                    row_id = self._byte_decoder[tokens[st]]
+                    cell_id = self._byte_decoder[tokens[st + 1]]
+                    ret.append(int.to_bytes(row_id << 8 | cell_id, 2, "big").decode("utf-8"))
+                    st += 2
+                else:
+                    cell_id = self._byte_decoder[tokens[st]]
+                    ret.append(int.to_bytes(cell_id, 1, "big").decode("utf-8"))
+                    st += 1
+            elif tokens[st] == self.eos_id:
+                ret.append(self.eos_token)
+                st += 1
+            elif tokens[st] == self.bos_id:
+                ret.append(self.bos_token)
+                st += 1
+            else:
+                ret.append(tokens[st])
+                st += 1
+            #else:
+            #    ret.append(self.unk_token)
+            #    st += 1
+        return ''.join(ret)
+
+    def _encode_unicode(self, token: str) -> List[int]:
+        """
+        将 Unicode 编码包装到一个辅助函数中。
+
+        参数:
+            token (str): 要编码的标记。
+
+        返回:
+            List[int]: 编码后的 ID 列表。
+        """
+        ids = []
+        utf8_id = token.encode("utf-8")
+        for _id in utf8_id:
+            ids.append(self._special_encoder[self.byte_list[_id]])
+        return ids
+
+    def next_token(self, text: str) -> Tuple[str, List[int]]:
+        """
+        快速获取下一个匹配的标记。
+
+        参数:
+            text (str): 输入文本。
+
+        返回:
+            Tuple[str, List[int]]: 匹配的标记及其 ID 列表。
+        """
+        token, token_id = self.tencoder.longest_prefix_item(text, (None, None))
+        if token is None:
+            token = text[0]
+            token_ids = self._encode_unicode(token)
+        else:
+            token_ids = [token_id]
+        return token, token_ids
--- a/vllm/transformers_utils/tokenizers/mistral.py
+++ b/vllm/transformers_utils/tokenizers/mistral.py
@@ -0,0 +1,493 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import os
+from dataclasses import dataclass
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Optional, Union, cast
+
+import huggingface_hub
+import regex as re
+from huggingface_hub import HfApi, hf_hub_download
+
+from vllm.logger import init_logger
+from vllm.transformers_utils.tokenizer_base import TokenizerBase
+from vllm.utils import is_list_of
+
+if TYPE_CHECKING:
+    # make sure `mistral_common` is lazy imported,
+    # so that users who only use non-mistral models
+    # will not be bothered by the dependency.
+    from mistral_common.protocol.instruct.request import ChatCompletionRequest
+    from mistral_common.tokens.tokenizers.mistral import (
+        MistralTokenizer as PublicMistralTokenizer)
+
+    from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
+
+logger = init_logger(__name__)
+
+
+@dataclass
+class Encoding:
+    input_ids: Union[list[int], list[list[int]]]
+
+
+def maybe_serialize_tool_calls(request: "ChatCompletionRequest"):
+    # SEE: https://github.com/vllm-project/vllm/pull/9951
+    # Credits go to: @gcalmettes
+    # NOTE: There is currently a bug in pydantic where attributes
+    # declared as iterables are replaced in in the instances by
+    # pydantic-core ValidatorIterator instance. In particular, this
+    # affects tool_calls defined in ChatCompletionAssistantMessageParam
+    # model:
+    # see:
+    #   - https://github.com/pydantic/pydantic/issues/9467
+    # As a result, tool_calls from assistant messages are never
+    # deserialized in the request object if the tool_calls iterator is
+    # not consumed. This affect messages passed to the MistralTokenizer
+    # since no chat template is applied and therefore the tools_calls
+    # iterator is not directly consumed.
+    # Issue is tracked on Pydantic side, with resolution planned for
+    # v2.11 release. In the meantime, the official workaround is to
+    # consume the iterator so the tool_calls are correctly deserialized
+    # in the OpenAI ChatCompletionAssistantMessageParam object
+    # https://github.com/pydantic/pydantic/issues/9467#issuecomment-2442097291 # noqa: E501
+    # Official Pydantic Issues:
+    #   - https://github.com/pydantic/pydantic/issues/9541
+    # TODO: remove when pydantic v2.11 is released
+    for i, message in enumerate(request.messages):
+        if message.get("role") == 'assistant':
+            tool_calls_validator = message.get("tool_calls", ().__iter__())
+            validated_tool_calls = []
+            while True:
+                try:
+                    tool_call = next(tool_calls_validator)  # type: ignore
+                    validated_tool_calls.append(tool_call)
+                except StopIteration:
+                    break
+
+            request.messages[i]["tool_calls"] = validated_tool_calls
+
+
+def truncate_tool_call_ids(request: "ChatCompletionRequest"):
+    """Truncates tool call IDs for Mistral's ID requirements."""
+    for i, message in enumerate(request.messages):
+        if message.get("role") == 'assistant':
+            tool_calls = message.get("tool_calls", [])
+            for tool_call in tool_calls:
+                if len(tool_call["id"]) > 9:
+                    logger.warning(
+                        "Truncating tool call ID: %s to %s",
+                        tool_call["id"],
+                        tool_call["id"][-9:],
+                    )
+                    tool_call["id"] = tool_call["id"][-9:]
+
+            request.messages[i]["tool_calls"] = tool_calls
+
+        elif message.get("role") in {"tool_results", "tool"}:
+            if "tool_call_id" in message:
+                tool_call_id = message["tool_call_id"]
+
+                if len(tool_call_id) > 9:
+                    logger.warning(
+                        "Truncating tool_call_id: %s to %s",
+                        tool_call_id,
+                        tool_call_id[-9:],
+                    )
+                    tool_call_id = tool_call_id[-9:]
+                request.messages[i]["tool_call_id"] = tool_call_id
+
+
+def validate_request_params(request: "ChatCompletionRequest"):
+    if (request.skip_special_tokens is not None
+            and not request.skip_special_tokens):
+        raise ValueError("skip_special_tokens=False is not supported "
+                         "for Mistral tokenizers.")
+
+
+def list_local_repo_files(repo_id: str, revision: Optional[str]) -> list[str]:
+    repo_cache = os.path.join(
+        huggingface_hub.constants.HF_HUB_CACHE,
+        huggingface_hub.constants.REPO_ID_SEPARATOR.join(
+            ["models", *repo_id.split("/")]))
+
+    if revision is None:
+        revision_file = os.path.join(repo_cache, "refs", "main")
+        if os.path.isfile(revision_file):
+            with open(revision_file) as file:
+                revision = file.read()
+
+    if revision:
+        revision_dir = os.path.join(repo_cache, "snapshots", revision)
+        if os.path.isdir(revision_dir):
+            return os.listdir(revision_dir)
+
+    return []
+
+
+def find_tokenizer_file(files: list[str]):
+    file_pattern = re.compile(
+        r"^tokenizer\.model\.v.*$|^tekken\.json$|^tokenizer\.mm\.model\.v.*$")
+
+    matched_files = [file for file in files if file_pattern.match(file)]
+    if len(matched_files) > 1:
+        raise OSError(
+            f"Found {len(matched_files)} files matching the "
+            f"pattern: `{file_pattern.pattern}`. Make sure only one Mistral "
+            f"tokenizer is present in {files}.")
+    elif len(matched_files) == 0:
+        raise OSError(
+            f"Found {len(matched_files)} files matching the "
+            f"pattern: `{file_pattern.pattern}`. Make sure that a Mistral "
+            f"tokenizer is present in {files}.")
+
+    return matched_files[0]
+
+
+def make_mistral_chat_completion_request(
+        messages: list["ChatCompletionMessageParam"],
+        tools: Optional[list[dict[str,
+                                  Any]]] = None) -> "ChatCompletionRequest":
+    last_message = cast(dict[str, Any], messages[-1])
+    if last_message["role"] == "assistant":
+        last_message["prefix"] = True
+
+    # mistral-common requires AssistantMessage content to be string [1].
+    #
+    # [1]: https://github.com/mistralai/mistral-common/blob/f4a06998b75ed78bbf5aaf569590b772ea26c9f6/src/mistral_common/protocol/instruct/messages.py#L80
+    for message in messages:
+        # Remove reasoning_content as unsupported by Mistral
+        _ = message.pop("reasoning_content", None)  # type: ignore
+
+        # Convert list text content to string
+        if message.get("role") in ("assistant", "tool"):
+            content = message.get("content")
+            if isinstance(content, list):
+                content = "\n".join(chunk.get("text") for chunk in content)
+                message["content"] = content
+
+    # The Mistral client, in comparison to the OpenAI client, requires the
+    # "parameters" dict to be present, even if it's empty.
+    if tools:
+        for function in [
+                tool["function"] for tool in tools
+                if tool["type"] == "function"
+        ]:
+            if function.get("parameters") is None:
+                function["parameters"] = {}
+
+    from mistral_common.protocol.instruct.request import ChatCompletionRequest
+    return ChatCompletionRequest(messages=messages,
+                                 tools=tools)  # type: ignore[type-var]
+
+
+class MistralTokenizer(TokenizerBase):
+
+    def __init__(self, tokenizer: "PublicMistralTokenizer") -> None:
+        self.mistral = tokenizer
+        self.instruct = tokenizer.instruct_tokenizer
+        _mistral_version_str = self.instruct.tokenizer.version.value
+        self.version: int = int(_mistral_version_str.split("v")[-1])
+
+        tokenizer_ = tokenizer.instruct_tokenizer.tokenizer
+        from mistral_common.tokens.tokenizers.tekken import (
+            SpecialTokenPolicy, Tekkenizer)
+        self.is_tekken = isinstance(tokenizer_, Tekkenizer)
+        from mistral_common.tokens.tokenizers.sentencepiece import (
+            SentencePieceTokenizer)
+        self.is_spm = isinstance(tokenizer_, SentencePieceTokenizer)
+        if self.is_tekken:
+            # Make sure special tokens will not raise
+            tokenizer_.special_token_policy = SpecialTokenPolicy.IGNORE
+        elif self.is_spm:
+            pass
+        else:
+            raise TypeError(f"Unsupported tokenizer: {type(tokenizer_)}")
+
+        self._vocab = tokenizer_.vocab()
+        # Convert to a dict[str, int] to match protocol, but this is a lossy
+        # conversion. There may be multiple token ids that decode to the same
+        # string due to partial UTF-8 byte sequences being converted to <20>
+        self._vocab_dict = {
+            token: idx
+            for idx, token in enumerate(self._vocab)
+        }
+        self.tokenizer = tokenizer_
+        self._max_token_id = self.vocab_size - 1
+
+    @classmethod
+    def from_pretrained(cls,
+                        path_or_repo_id: str,
+                        *,
+                        revision: Optional[str] = None) -> "MistralTokenizer":
+        if not Path(path_or_repo_id).exists():
+            assert len(path_or_repo_id.split("/")) == 2, (
+                "You have either provided a non-existent path: "
+                "{path_or_repo_id} or an invalid HF Hub repo id.")
+            tokenizer_file = cls._download_mistral_tokenizer_from_hf(
+                path_or_repo_id, revision)
+        elif Path(path_or_repo_id).is_dir():
+            tokenizer_file_name = find_tokenizer_file(
+                os.listdir(path_or_repo_id))
+            tokenizer_file = str(Path(path_or_repo_id) / tokenizer_file_name)
+        else:
+            assert Path(
+                path_or_repo_id).is_file(), f"Invalid path: {path_or_repo_id}"
+            tokenizer_file = str(Path(path_or_repo_id))
+
+        from mistral_common.tokens.tokenizers.mistral import (
+            MistralTokenizer as PublicMistralTokenizer)
+        mistral_tokenizer = PublicMistralTokenizer.from_file(tokenizer_file)
+        return cls(mistral_tokenizer)
+
+    @staticmethod
+    def _download_mistral_tokenizer_from_hf(tokenizer_name: str,
+                                            revision: Optional[str]) -> str:
+        try:
+            hf_api = HfApi()
+            files = hf_api.list_repo_files(repo_id=tokenizer_name,
+                                           revision=revision)
+        except ConnectionError as exc:
+            files = list_local_repo_files(repo_id=tokenizer_name,
+                                          revision=revision)
+
+            if len(files) == 0:
+                raise exc
+
+        filename = find_tokenizer_file(files)
+
+        tokenizer_file = hf_hub_download(tokenizer_name,
+                                         filename=filename,
+                                         revision=revision)
+        return tokenizer_file
+
+    # the following attributes are set to fit vLLM's design and are used
+    # by the guided structured output backends.
+    @property
+    def all_special_tokens_extended(self) -> list[str]:
+        from mistral_common.tokens.tokenizers.base import SpecialTokens
+
+        # tekken defines its own extended special tokens list
+        if hasattr(self.tokenizer, "SPECIAL_TOKENS"):
+            special_tokens = self.tokenizer.SPECIAL_TOKENS
+        else:
+            special_tokens = list(SpecialTokens)
+        return [
+            s.value if isinstance(s, SpecialTokens) else s
+            for s in special_tokens
+        ]
+
+    @property
+    def all_special_tokens(self) -> list[str]:
+        return self.all_special_tokens_extended
+
+    @property
+    def all_special_ids(self) -> list[int]:
+        return [
+            self.all_special_tokens.index(t) for t in self.all_special_tokens
+        ]
+
+    @property
+    def bos_token_id(self) -> int:
+        return self.tokenizer.bos_id
+
+    @property
+    def eos_token_id(self) -> int:
+        return self.tokenizer.eos_id
+
+    @property
+    def sep_token(self) -> str:
+        raise NotImplementedError()
+
+    @property
+    def pad_token(self) -> str:
+        raise NotImplementedError()
+
+    @property
+    def is_fast(self) -> bool:
+        return True
+
+    @property
+    def vocab_size(self) -> int:
+        return len(self._vocab)
+
+    @property
+    def max_token_id(self) -> int:
+        return self._max_token_id
+
+    def __len__(self) -> int:
+        return self.vocab_size
+
+    def __call__(
+        self,
+        text: Union[str, list[str], list[int]],
+        text_pair: Optional[str] = None,
+        add_special_tokens: bool = False,
+        truncation: bool = False,
+        max_length: Optional[int] = None,
+    ):
+        input_ids: Union[list[int], list[list[int]]]
+        # For list[str], original prompt text
+        if is_list_of(text, str):
+            input_ids_: list[list[int]] = []
+            for p in text:
+                each_input_ids = self.encode_one(p, truncation, max_length)
+                input_ids_.append(each_input_ids)
+            input_ids = input_ids_
+        # For list[int], apply chat template output, already tokens.
+        elif is_list_of(text, int):
+            input_ids = text
+        # For str, single prompt text
+        else:
+            input_ids = self.encode_one(text, truncation, max_length)
+        return Encoding(input_ids=input_ids)
+
+    def get_vocab(self) -> dict[str, int]:
+        # NB: the dictionary form of the vocabulary collapses token ids that map
+        # to the same string but have different bytes
+        return self._vocab_dict
+
+    def get_added_vocab(self) -> dict[str, int]:
+        # Mistral tokenizers have no added vocabulary
+        return {}
+
+    def encode_one(
+        self,
+        text: str,
+        truncation: bool = False,
+        max_length: Optional[int] = None,
+    ) -> list[int]:
+        # Mistral Tokenizers should not add special tokens
+        input_ids = self.encode(text)
+
+        if truncation:
+            input_ids = input_ids[:max_length]
+        return input_ids
+
+    def encode(self,
+               text: str,
+               truncation: Optional[bool] = None,
+               max_length: Optional[int] = None,
+               add_special_tokens: Optional[bool] = None) -> list[int]:
+        # `encode` should only be used for prompt completion
+        # it should never be used for chat_completion.
+        # For chat completion use `apply_chat_template`
+        if add_special_tokens is not None:
+            return self.tokenizer.encode(text,
+                                         bos=add_special_tokens,
+                                         eos=add_special_tokens)
+        else:
+            return self.tokenizer.encode(text, bos=True, eos=False)
+
+    def apply_chat_template(self,
+                            messages: list["ChatCompletionMessageParam"],
+                            tools: Optional[list[dict[str, Any]]] = None,
+                            **kwargs) -> list[int]:
+
+        request = make_mistral_chat_completion_request(messages, tools)
+        encoded = self.mistral.encode_chat_completion(request)
+
+        # encode-decode to get clean prompt
+        return encoded.tokens
+
+    def convert_tokens_to_string(self, tokens: list[str]) -> str:
+        from mistral_common.tokens.tokenizers.base import SpecialTokens
+        if self.is_tekken:
+            tokens = [
+                t for t in tokens
+                if (t is SpecialTokens.tool_calls
+                    or t not in self.tokenizer._all_special_tokens)
+            ]
+
+            if any(isinstance(t, bytes) for t in tokens):
+                # we need to encode and decode all tokens again
+                shift = self.tokenizer.num_special_tokens
+
+                def _token_to_id(t: str):
+                    t_bytes = t.encode("utf-8") \
+                        if not isinstance(t, bytes) else t
+                    try:
+                        return shift + \
+                            self.tokenizer._tekken_token2id_nospecial[t_bytes]
+                    except KeyError:
+                        logger.warning(
+                            "Failed to convert token %s to id,"
+                            " replacing with <unk>", t_bytes)
+                        return self.tokenizer.unk_id
+
+                ids = [_token_to_id(t) for t in tokens]
+                decoded = self.tokenizer.decode(ids)
+            else:
+                decoded = "".join(tokens)
+        else:
+            # make sure certain special tokens like Tool calls are
+            # not decoded
+            special_tokens = {SpecialTokens.tool_calls}
+            regular_tokens: list[str] = []
+            decoded_list = []
+
+            for token in tokens:
+                if token in special_tokens:
+                    if regular_tokens:
+                        decoded_list.append(
+                            self.tokenizer.decode(regular_tokens))
+                        regular_tokens = []
+                    decoded_list.append(token)
+                else:
+                    regular_tokens.append(token)
+
+            if regular_tokens:
+                decoded_list.append(
+                    self.tokenizer.decode(regular_tokens))  # type: ignore
+
+            decoded = ''.join(decoded_list)
+
+        return decoded
+
+    # WARN: Outlines logits processors can overwrite this method.
+    # See: guided_decoding/outlines_logits_processors.py::_adapt_tokenizer
+    # for more.
+    def decode(self,
+               ids: Union[list[int], int],
+               skip_special_tokens: bool = True) -> str:
+        assert (
+            skip_special_tokens
+        ), "skip_special_tokens=False is not supported for Mistral tokenizers."
+
+        if isinstance(ids, int):
+            ids = [ids]
+        return self.tokenizer.decode(ids)
+
+    def convert_ids_to_tokens(
+        self,
+        ids: list[int],
+        skip_special_tokens: bool = True,
+    ) -> list[str]:
+        from mistral_common.tokens.tokenizers.base import SpecialTokens
+
+        # TODO(Patrick) - potentially allow special tokens to not be skipped
+        assert (
+            skip_special_tokens
+        ), "skip_special_tokens=False is not supported for Mistral tokenizers."
+
+        assert self.is_tekken or self.is_spm, type(self.tokenizer)
+
+        if self.is_tekken:
+            # skip special tokens except tool call
+            ids = [
+                i for i in ids if i > self.tokenizer.num_special_tokens or i ==
+                self.tokenizer.get_control_token(SpecialTokens.tool_calls)
+            ]
+
+        tokens = [self.tokenizer.id_to_piece(id) for id in ids]
+
+        if any("<EFBFBD>" in t for t in tokens) and self.is_tekken:
+            # if a decoded token contains the replacement character, then the
+            # token has an incomplete UTF-8 character so we must use bytes
+            # See: https://github.com/vllm-project/vllm/pull/8640
+            #      https://github.com/vllm-project/vllm/pull/9625
+            # if underlying tokenizeir is sentencepiece, we just add "<22>"
+            tokens = [self.tokenizer.id_to_byte_piece(id) for id in ids]
+
+        return tokens