first commit

2026-03-10 13:31:25 +08:00
parent ba974cecfa
commit b62b889355
2604 changed files with 438977 additions and 0 deletions
--- a/vllm_br/model_executor/models/clip.py
+++ b/vllm_br/model_executor/models/clip.py
@@ -0,0 +1,65 @@
+################################################################################
+# Copyright(c)2020-2025 Shanghai Biren Technology Co., Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+
+# SPDX-License-Identifier: Apache-2.0
+"""Minimal implementation of CLIPVisionModel intended to be only used
+within a vision language model."""
+
+import torch
+import torch_br
+
+from vllm.model_executor.models.clip import CLIPVisionEmbeddings
+
+
+def clip_vision_embeddings_forward(self,
+                                   pixel_values: torch.Tensor) -> torch.Tensor:
+    batch_size = pixel_values.shape[0]
+    target_dtype = self.patch_embedding.weight.dtype
+    if self.patch_size == 14:
+        import torch_br.supa._debug as supa_debug
+
+        supa_debug.set_disable_zero_ws(False)
+        supa_debug.set_disable_zero_output_uma(False)
+        supa_debug.set_disable_zero_output_numa(False)
+        supa_debug.set_disable_reorder_zero(False)
+
+        #TODO(shouqing): this op need to do internal clear_zeros operation
+        patch_embeds = torch_br.supa_conv2d_knxn_snxn_p0x0_fwd(
+            pixel_values.to(dtype=target_dtype), self.patch_embedding.weight,
+            self.patch_size, self.patch_size, 0)
+
+        supa_debug.set_disable_zero_ws(True)
+        supa_debug.set_disable_zero_output_uma(True)
+        supa_debug.set_disable_zero_output_numa(True)
+        supa_debug.set_disable_reorder_zero(True)
+    else:
+        patch_embeds = self.patch_embedding(pixel_values.to(
+            dtype=target_dtype))  # shape = [*, width, grid, grid]
+    patch_embeds = patch_embeds.flatten(2).transpose(1, 2)
+
+    class_embeds = self.class_embedding.expand(batch_size, 1, -1)
+    embeddings = torch.cat([class_embeds, patch_embeds], dim=1)
+    data_in_cpu = lambda t: t.device == torch.device('cpu')
+    if data_in_cpu(self.position_ids):
+        cur_device = torch.supa.current_device()
+        self.position_ids = self.position_ids.to(cur_device)
+    embeddings = embeddings + self.position_embedding(self.position_ids)
+
+    return embeddings
+
+
+#logger.debug('[Patch] patch CLIPVisionEmbeddings forward')
+CLIPVisionEmbeddings.forward = clip_vision_embeddings_forward