update

2026-04-09 11:23:47 +08:00
parent 8082d5f4b2
commit 72387e4fa8
1885 changed files with 611521 additions and 1 deletions
--- a/vllm/renderers/mistral.py
+++ b/vllm/renderers/mistral.py
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from concurrent.futures import ThreadPoolExecutor
+from typing import Any
+
+from vllm.config import VllmConfig
+from vllm.entrypoints.chat_utils import (
+    ChatCompletionMessageParam,
+    ConversationMessage,
+    parse_chat_messages,
+    parse_chat_messages_async,
+)
+from vllm.logger import init_logger
+from vllm.tokenizers import cached_get_tokenizer
+from vllm.tokenizers.mistral import MistralTokenizer
+from vllm.utils.async_utils import make_async
+
+from .base import BaseRenderer
+from .inputs import DictPrompt
+from .inputs.preprocess import parse_dec_only_prompt
+from .params import ChatParams
+
+logger = init_logger(__name__)
+
+
+def safe_apply_chat_template(
+    tokenizer: MistralTokenizer,
+    messages: list[ChatCompletionMessageParam],
+    **kwargs,
+) -> str | list[int]:
+    from mistral_common.exceptions import MistralCommonException
+
+    try:
+        return tokenizer.apply_chat_template(messages, **kwargs)
+    # mistral-common uses assert statements to stop processing of input
+    # if input does not comply with the expected format.
+    # We convert those assertion errors to ValueErrors so they can be
+    # properly caught in the preprocessing_input step
+    except (AssertionError, MistralCommonException) as e:
+        raise ValueError(str(e)) from e
+
+    # External library exceptions can sometimes occur despite the framework's
+    # internal exception management capabilities.
+    except Exception as e:
+        # Log and report any library-related exceptions for further
+        # investigation.
+        logger.exception(
+            "An error occurred in `mistral_common` while applying chat template"
+        )
+        raise ValueError(str(e)) from e
+
+
+class MistralRenderer(BaseRenderer[MistralTokenizer]):
+    @classmethod
+    def from_config(  # type: ignore[override]
+        cls,
+        config: VllmConfig,
+        tokenizer_kwargs: dict[str, Any],
+    ) -> "MistralRenderer":
+        model_config = config.model_config
+        if model_config.skip_tokenizer_init:
+            tokenizer = None
+        else:
+            tokenizer = cached_get_tokenizer(
+                tokenizer_cls=MistralTokenizer,
+                **tokenizer_kwargs,
+            )
+
+        return cls(config, tokenizer)
+
+    def __init__(
+        self,
+        config: VllmConfig,
+        tokenizer: MistralTokenizer | None,
+    ) -> None:
+        super().__init__(config, tokenizer)
+
+        self._apply_chat_template_executor = ThreadPoolExecutor(max_workers=1)
+        self._apply_chat_template_async = make_async(
+            safe_apply_chat_template, executor=self._apply_chat_template_executor
+        )
+
+    def render_messages(
+        self,
+        messages: list[ChatCompletionMessageParam],
+        params: ChatParams,
+    ) -> tuple[list[ConversationMessage], DictPrompt]:
+        tokenizer = self.get_tokenizer()
+        conversation, mm_data, mm_uuids = parse_chat_messages(
+            messages,
+            self.model_config,
+            content_format="string",
+        )
+
+        prompt_raw = safe_apply_chat_template(
+            tokenizer,
+            messages,
+            **params.get_apply_chat_template_kwargs(),
+        )
+
+        prompt = parse_dec_only_prompt(prompt_raw)
+        if mm_data is not None:
+            prompt["multi_modal_data"] = mm_data
+        if mm_uuids is not None:
+            prompt["multi_modal_uuids"] = mm_uuids
+
+        return conversation, prompt
+
+    async def render_messages_async(
+        self,
+        messages: list[ChatCompletionMessageParam],
+        params: ChatParams,
+    ) -> tuple[list[ConversationMessage], DictPrompt]:
+        tokenizer = self.get_tokenizer()
+        conversation, mm_data, mm_uuids = await parse_chat_messages_async(
+            messages,
+            self.model_config,
+            content_format="string",
+        )
+
+        prompt_raw = await self._apply_chat_template_async(
+            tokenizer,
+            messages,
+            **params.get_apply_chat_template_kwargs(),
+        )
+
+        prompt = parse_dec_only_prompt(prompt_raw)
+        if mm_data is not None:
+            prompt["multi_modal_data"] = mm_data
+        if mm_uuids is not None:
+            prompt["multi_modal_uuids"] = mm_uuids
+
+        return conversation, prompt