################################################################################ # Copyright(c)2020-2025 Shanghai Biren Technology Co., Ltd. All rights reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ################################################################################ # SPDX-License-Identifier: Apache-2.0 # Adapted from # https://github.com/huggingface/transformers/blob/main/src/transformers/models/qwen3_vl/modeling_qwen3_vl.py # Copyright 2025 The vLLM team. # Copyright 2025 The Qwen Team. # Copyright 2025 The HuggingFace Inc. team. # All rights reserved. # # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX # and OPT implementations in this library. It has been modified from its # original forms to accommodate minor architectural differences compared # to GPT-NeoX and OPT used by the Meta AI team that trained the model. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Inference-only Qwen3-VL model compatible with HuggingFace weights.""" import torch import vllm from vllm.model_executor.models.utils import (_embedding_count_expression, _flatten_embeddings) from vllm.multimodal import NestedTensors def _merge_multimodal_embeddings_fit( inputs_embeds: torch.Tensor, is_multimodal: torch.Tensor, multimodal_embeddings: NestedTensors, ) -> torch.Tensor: """ Merge ``multimodal_embeddings`` into ``inputs_embeds`` by overwriting the positions in ``inputs_embeds`` corresponding to placeholder tokens in ``input_ids``. Note: This updates ``inputs_embeds`` in place. """ flattened = _flatten_embeddings(multimodal_embeddings) try: # This is equivalent to: inputs_embeds[is_multimodal] = flattened. # inputs_embeds.masked_scatter_(is_multimodal.unsqueeze(-1), # flattened.to(dtype=inputs_embeds.dtype)) inputs_embeds[is_multimodal] = flattened except RuntimeError as e: num_expected_tokens = is_multimodal.sum().item() assert isinstance(num_expected_tokens, int) if flattened.shape[0] != num_expected_tokens: expr = _embedding_count_expression(multimodal_embeddings) raise ValueError( f"Attempted to assign {expr} = {flattened.shape[0]} " f"multimodal tokens to {num_expected_tokens} placeholders" ) from e else: raise ValueError("Error during masked scatter operation") from e return inputs_embeds vllm.model_executor.models.utils._merge_multimodal_embeddings = _merge_multimodal_embeddings_fit