################################################################################ # Copyright(c)2020-2025 Shanghai Biren Technology Co., Ltd. All rights reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ################################################################################ # SPDX-License-Identifier: Apache-2.0 from typing import Optional import torch # Adapted from transformers from fastcore.basics import patch_to import vllm from vllm.model_executor.models.roberta import ( create_position_ids_from_input_ids) @patch_to(vllm.model_executor.models.roberta.RobertaClassificationHead) def forward(self, features, **kwargs): x = features[0, :] # take token (equiv. to [CLS]) x = x.unsqueeze(0) # add batch dimension x = self.dense(x) x = torch.tanh(x) x = self.out_proj(x) x = x.squeeze(0) # remove batch dimension return x @patch_to(vllm.model_executor.models.roberta.RobertaEmbedding) def forward( self, input_ids: torch.Tensor, seq_lens: torch.Tensor, position_ids: torch.Tensor, token_type_ids: Optional[torch.Tensor] = None, ) -> torch.Tensor: input_ids = input_ids.squeeze(0) # notice here input_ids is 2-dim tensor input_shape = input_ids.size() inputs_embeds = self.word_embeddings(input_ids) # Replace position ids because in RoBERTa models # they have to start at padding_idx + 1 and ignore # existing padding tokens # References: # - https://github.com/huggingface/transformers/blob/a3d69a8994d673899608a7c17fbf4f953f50474e/src/transformers/models/roberta/modeling_roberta.py#L133 # - https://github.com/huggingface/transformers/blob/a3d69a8994d673899608a7c17fbf4f953f50474e/src/transformers/models/roberta/modeling_roberta.py#L1669 pos_list = [] token_list = [] offset = 0 for seq_len in seq_lens: pos_list.append(position_ids[offset:offset + seq_len]) token_list.append(input_ids[offset:offset + seq_len]) offset += seq_len new_pos_list = [] for positions, tokens in zip(pos_list, token_list, strict=False): # Verify assumption that incoming position are # always a sequence from 0 to N. expected_pos = torch.arange(positions.size()[0], dtype=torch.long, device=inputs_embeds.device) assert torch.equal(positions, expected_pos) new_pos_list.append( create_position_ids_from_input_ids(tokens, self.padding_idx)) position_ids = torch.cat(new_pos_list) # Position embeddings. position_embeddings = self.position_embeddings(position_ids) if token_type_ids is None: token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=inputs_embeds.device) token_type_embeddings = self.token_type_embeddings(token_type_ids) embeddings = inputs_embeds + token_type_embeddings + position_embeddings embeddings = self.LayerNorm(embeddings) return embeddings.unsqueeze(0) # add batch dimension for BR attention