################################################################################ # Copyright(c)2020-2025 Shanghai Biren Technology Co., Ltd. All rights reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ################################################################################ from collections.abc import Mapping from vllm.model_executor.models.qwen2_vl import (Qwen2VLDummyInputsBuilder, Qwen2VLProcessingInfo) from vllm.multimodal.parse import ImageSize def get_image_size_with_most_features(self) -> ImageSize: """This function is used in Qwen2_VL, Qwen2_5_VL, patch it in qwen2_vl.py""" max_image_size, _ = self._get_vision_info( image_width=240, image_height=240, image_processor=None, ) return max_image_size def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str: num_images = 1 num_videos = 0 hf_processor = self.info.get_hf_processor() image_token: str = hf_processor.image_token video_token: str = hf_processor.video_token return image_token * num_images + video_token * num_videos Qwen2VLProcessingInfo.get_image_size_with_most_features = ( get_image_size_with_most_features) Qwen2VLDummyInputsBuilder.get_dummy_text = get_dummy_text