Files
enginex-bi_150-vllm/vllm/renderers/deepseek_v32.py
2026-04-09 11:23:47 +08:00

93 lines
2.8 KiB
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Any
from vllm.config import VllmConfig
from vllm.entrypoints.chat_utils import (
ChatCompletionMessageParam,
ConversationMessage,
parse_chat_messages,
parse_chat_messages_async,
)
from vllm.logger import init_logger
from vllm.tokenizers import cached_get_tokenizer
from vllm.tokenizers.deepseek_v32 import DeepseekV32Tokenizer
from .base import BaseRenderer
from .inputs import DictPrompt
from .inputs.preprocess import parse_dec_only_prompt
from .params import ChatParams
logger = init_logger(__name__)
class DeepseekV32Renderer(BaseRenderer[DeepseekV32Tokenizer]):
@classmethod
def from_config( # type: ignore[override]
cls,
config: VllmConfig,
tokenizer_kwargs: dict[str, Any],
) -> "DeepseekV32Renderer":
model_config = config.model_config
if model_config.skip_tokenizer_init:
tokenizer = None
else:
tokenizer = cached_get_tokenizer(
tokenizer_cls=DeepseekV32Tokenizer,
**tokenizer_kwargs,
)
return cls(config, tokenizer)
def render_messages(
self,
messages: list[ChatCompletionMessageParam],
params: ChatParams,
) -> tuple[list[ConversationMessage], DictPrompt]:
tokenizer = self.get_tokenizer()
conversation, mm_data, mm_uuids = parse_chat_messages(
messages,
self.model_config,
content_format="string",
)
prompt_raw = tokenizer.apply_chat_template(
conversation=conversation,
messages=messages,
**params.get_apply_chat_template_kwargs(),
)
prompt = parse_dec_only_prompt(prompt_raw)
if mm_data is not None:
prompt["multi_modal_data"] = mm_data
if mm_uuids is not None:
prompt["multi_modal_uuids"] = mm_uuids
return conversation, prompt
async def render_messages_async(
self,
messages: list[ChatCompletionMessageParam],
params: ChatParams,
) -> tuple[list[ConversationMessage], DictPrompt]:
tokenizer = self.get_tokenizer()
conversation, mm_data, mm_uuids = await parse_chat_messages_async(
messages,
self.model_config,
content_format="string",
)
prompt_raw = tokenizer.apply_chat_template(
conversation=conversation,
messages=messages,
**params.get_apply_chat_template_kwargs(),
)
prompt = parse_dec_only_prompt(prompt_raw)
if mm_data is not None:
prompt["multi_modal_data"] = mm_data
if mm_uuids is not None:
prompt["multi_modal_uuids"] = mm_uuids
return conversation, prompt