first commit

2026-03-10 13:31:25 +08:00
parent ba974cecfa
commit b62b889355
2604 changed files with 438977 additions and 0 deletions
--- a/vllm_br/model_executor/models/roberta.py
+++ b/vllm_br/model_executor/models/roberta.py
@@ -0,0 +1,89 @@
+################################################################################
+# Copyright(c)2020-2025 Shanghai Biren Technology Co., Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional
+
+import torch
+# Adapted from transformers
+from fastcore.basics import patch_to
+
+import vllm
+from vllm.model_executor.models.roberta import (
+    create_position_ids_from_input_ids)
+
+
+@patch_to(vllm.model_executor.models.roberta.RobertaClassificationHead)
+def forward(self, features, **kwargs):
+    x = features[0, :]  # take <s> token (equiv. to [CLS])
+    x = x.unsqueeze(0)  # add batch dimension
+    x = self.dense(x)
+    x = torch.tanh(x)
+    x = self.out_proj(x)
+    x = x.squeeze(0)  # remove batch dimension
+    return x
+
+
+@patch_to(vllm.model_executor.models.roberta.RobertaEmbedding)
+def forward(
+    self,
+    input_ids: torch.Tensor,
+    seq_lens: torch.Tensor,
+    position_ids: torch.Tensor,
+    token_type_ids: Optional[torch.Tensor] = None,
+) -> torch.Tensor:
+    input_ids = input_ids.squeeze(0)  # notice here input_ids is 2-dim tensor
+    input_shape = input_ids.size()
+    inputs_embeds = self.word_embeddings(input_ids)
+
+    # Replace position ids because in RoBERTa models
+    # they have to start at padding_idx + 1 and ignore
+    # existing padding tokens
+    # References:
+    # - https://github.com/huggingface/transformers/blob/a3d69a8994d673899608a7c17fbf4f953f50474e/src/transformers/models/roberta/modeling_roberta.py#L133
+    # - https://github.com/huggingface/transformers/blob/a3d69a8994d673899608a7c17fbf4f953f50474e/src/transformers/models/roberta/modeling_roberta.py#L1669
+    pos_list = []
+    token_list = []
+    offset = 0
+    for seq_len in seq_lens:
+        pos_list.append(position_ids[offset:offset + seq_len])
+        token_list.append(input_ids[offset:offset + seq_len])
+        offset += seq_len
+
+    new_pos_list = []
+    for positions, tokens in zip(pos_list, token_list, strict=False):
+        # Verify assumption that incoming position are
+        # always a sequence from 0 to N.
+        expected_pos = torch.arange(positions.size()[0],
+                                    dtype=torch.long,
+                                    device=inputs_embeds.device)
+        assert torch.equal(positions, expected_pos)
+        new_pos_list.append(
+            create_position_ids_from_input_ids(tokens, self.padding_idx))
+    position_ids = torch.cat(new_pos_list)
+
+    # Position embeddings.
+    position_embeddings = self.position_embeddings(position_ids)
+    if token_type_ids is None:
+        token_type_ids = torch.zeros(input_shape,
+                                     dtype=torch.long,
+                                     device=inputs_embeds.device)
+
+    token_type_embeddings = self.token_type_embeddings(token_type_ids)
+    embeddings = inputs_embeds + token_type_embeddings + position_embeddings
+    embeddings = self.LayerNorm(embeddings)
+    return embeddings.unsqueeze(0)  # add batch dimension for BR attention