update
This commit is contained in:
133
vllm/renderers/mistral.py
Normal file
133
vllm/renderers/mistral.py
Normal file
@@ -0,0 +1,133 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Any
|
||||
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.entrypoints.chat_utils import (
|
||||
ChatCompletionMessageParam,
|
||||
ConversationMessage,
|
||||
parse_chat_messages,
|
||||
parse_chat_messages_async,
|
||||
)
|
||||
from vllm.logger import init_logger
|
||||
from vllm.tokenizers import cached_get_tokenizer
|
||||
from vllm.tokenizers.mistral import MistralTokenizer
|
||||
from vllm.utils.async_utils import make_async
|
||||
|
||||
from .base import BaseRenderer
|
||||
from .inputs import DictPrompt
|
||||
from .inputs.preprocess import parse_dec_only_prompt
|
||||
from .params import ChatParams
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
def safe_apply_chat_template(
|
||||
tokenizer: MistralTokenizer,
|
||||
messages: list[ChatCompletionMessageParam],
|
||||
**kwargs,
|
||||
) -> str | list[int]:
|
||||
from mistral_common.exceptions import MistralCommonException
|
||||
|
||||
try:
|
||||
return tokenizer.apply_chat_template(messages, **kwargs)
|
||||
# mistral-common uses assert statements to stop processing of input
|
||||
# if input does not comply with the expected format.
|
||||
# We convert those assertion errors to ValueErrors so they can be
|
||||
# properly caught in the preprocessing_input step
|
||||
except (AssertionError, MistralCommonException) as e:
|
||||
raise ValueError(str(e)) from e
|
||||
|
||||
# External library exceptions can sometimes occur despite the framework's
|
||||
# internal exception management capabilities.
|
||||
except Exception as e:
|
||||
# Log and report any library-related exceptions for further
|
||||
# investigation.
|
||||
logger.exception(
|
||||
"An error occurred in `mistral_common` while applying chat template"
|
||||
)
|
||||
raise ValueError(str(e)) from e
|
||||
|
||||
|
||||
class MistralRenderer(BaseRenderer[MistralTokenizer]):
|
||||
@classmethod
|
||||
def from_config( # type: ignore[override]
|
||||
cls,
|
||||
config: VllmConfig,
|
||||
tokenizer_kwargs: dict[str, Any],
|
||||
) -> "MistralRenderer":
|
||||
model_config = config.model_config
|
||||
if model_config.skip_tokenizer_init:
|
||||
tokenizer = None
|
||||
else:
|
||||
tokenizer = cached_get_tokenizer(
|
||||
tokenizer_cls=MistralTokenizer,
|
||||
**tokenizer_kwargs,
|
||||
)
|
||||
|
||||
return cls(config, tokenizer)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: VllmConfig,
|
||||
tokenizer: MistralTokenizer | None,
|
||||
) -> None:
|
||||
super().__init__(config, tokenizer)
|
||||
|
||||
self._apply_chat_template_executor = ThreadPoolExecutor(max_workers=1)
|
||||
self._apply_chat_template_async = make_async(
|
||||
safe_apply_chat_template, executor=self._apply_chat_template_executor
|
||||
)
|
||||
|
||||
def render_messages(
|
||||
self,
|
||||
messages: list[ChatCompletionMessageParam],
|
||||
params: ChatParams,
|
||||
) -> tuple[list[ConversationMessage], DictPrompt]:
|
||||
tokenizer = self.get_tokenizer()
|
||||
conversation, mm_data, mm_uuids = parse_chat_messages(
|
||||
messages,
|
||||
self.model_config,
|
||||
content_format="string",
|
||||
)
|
||||
|
||||
prompt_raw = safe_apply_chat_template(
|
||||
tokenizer,
|
||||
messages,
|
||||
**params.get_apply_chat_template_kwargs(),
|
||||
)
|
||||
|
||||
prompt = parse_dec_only_prompt(prompt_raw)
|
||||
if mm_data is not None:
|
||||
prompt["multi_modal_data"] = mm_data
|
||||
if mm_uuids is not None:
|
||||
prompt["multi_modal_uuids"] = mm_uuids
|
||||
|
||||
return conversation, prompt
|
||||
|
||||
async def render_messages_async(
|
||||
self,
|
||||
messages: list[ChatCompletionMessageParam],
|
||||
params: ChatParams,
|
||||
) -> tuple[list[ConversationMessage], DictPrompt]:
|
||||
tokenizer = self.get_tokenizer()
|
||||
conversation, mm_data, mm_uuids = await parse_chat_messages_async(
|
||||
messages,
|
||||
self.model_config,
|
||||
content_format="string",
|
||||
)
|
||||
|
||||
prompt_raw = await self._apply_chat_template_async(
|
||||
tokenizer,
|
||||
messages,
|
||||
**params.get_apply_chat_template_kwargs(),
|
||||
)
|
||||
|
||||
prompt = parse_dec_only_prompt(prompt_raw)
|
||||
if mm_data is not None:
|
||||
prompt["multi_modal_data"] = mm_data
|
||||
if mm_uuids is not None:
|
||||
prompt["multi_modal_uuids"] = mm_uuids
|
||||
|
||||
return conversation, prompt
|
||||
Reference in New Issue
Block a user