90 lines
3.5 KiB
Python
90 lines
3.5 KiB
Python
################################################################################
|
|
# Copyright(c)2020-2025 Shanghai Biren Technology Co., Ltd. All rights reserved.
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
################################################################################
|
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
from typing import Optional
|
|
|
|
import torch
|
|
# Adapted from transformers
|
|
from fastcore.basics import patch_to
|
|
|
|
import vllm
|
|
from vllm.model_executor.models.roberta import (
|
|
create_position_ids_from_input_ids)
|
|
|
|
|
|
@patch_to(vllm.model_executor.models.roberta.RobertaClassificationHead)
|
|
def forward(self, features, **kwargs):
|
|
x = features[0, :] # take <s> token (equiv. to [CLS])
|
|
x = x.unsqueeze(0) # add batch dimension
|
|
x = self.dense(x)
|
|
x = torch.tanh(x)
|
|
x = self.out_proj(x)
|
|
x = x.squeeze(0) # remove batch dimension
|
|
return x
|
|
|
|
|
|
@patch_to(vllm.model_executor.models.roberta.RobertaEmbedding)
|
|
def forward(
|
|
self,
|
|
input_ids: torch.Tensor,
|
|
seq_lens: torch.Tensor,
|
|
position_ids: torch.Tensor,
|
|
token_type_ids: Optional[torch.Tensor] = None,
|
|
) -> torch.Tensor:
|
|
input_ids = input_ids.squeeze(0) # notice here input_ids is 2-dim tensor
|
|
input_shape = input_ids.size()
|
|
inputs_embeds = self.word_embeddings(input_ids)
|
|
|
|
# Replace position ids because in RoBERTa models
|
|
# they have to start at padding_idx + 1 and ignore
|
|
# existing padding tokens
|
|
# References:
|
|
# - https://github.com/huggingface/transformers/blob/a3d69a8994d673899608a7c17fbf4f953f50474e/src/transformers/models/roberta/modeling_roberta.py#L133
|
|
# - https://github.com/huggingface/transformers/blob/a3d69a8994d673899608a7c17fbf4f953f50474e/src/transformers/models/roberta/modeling_roberta.py#L1669
|
|
pos_list = []
|
|
token_list = []
|
|
offset = 0
|
|
for seq_len in seq_lens:
|
|
pos_list.append(position_ids[offset:offset + seq_len])
|
|
token_list.append(input_ids[offset:offset + seq_len])
|
|
offset += seq_len
|
|
|
|
new_pos_list = []
|
|
for positions, tokens in zip(pos_list, token_list, strict=False):
|
|
# Verify assumption that incoming position are
|
|
# always a sequence from 0 to N.
|
|
expected_pos = torch.arange(positions.size()[0],
|
|
dtype=torch.long,
|
|
device=inputs_embeds.device)
|
|
assert torch.equal(positions, expected_pos)
|
|
new_pos_list.append(
|
|
create_position_ids_from_input_ids(tokens, self.padding_idx))
|
|
position_ids = torch.cat(new_pos_list)
|
|
|
|
# Position embeddings.
|
|
position_embeddings = self.position_embeddings(position_ids)
|
|
if token_type_ids is None:
|
|
token_type_ids = torch.zeros(input_shape,
|
|
dtype=torch.long,
|
|
device=inputs_embeds.device)
|
|
|
|
token_type_embeddings = self.token_type_embeddings(token_type_ids)
|
|
embeddings = inputs_embeds + token_type_embeddings + position_embeddings
|
|
embeddings = self.LayerNorm(embeddings)
|
|
return embeddings.unsqueeze(0) # add batch dimension for BR attention
|