first commit

2026-03-10 13:31:25 +08:00
parent ba974cecfa
commit b62b889355
2604 changed files with 438977 additions and 0 deletions
--- a/vllm_br/v1/kv_cache_interface.py
+++ b/vllm_br/v1/kv_cache_interface.py
@@ -0,0 +1,25 @@
+################################################################################
+# Copyright(c)2020-2025 Shanghai Biren Technology Co., Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+
+# TODO(ychun) temp annotation
+# @property  # type: ignore
+# def AttentionSpec_page_size_bytes(self) -> int:
+#     # For MLA we only store a single latent vector, BR166 uses BB, so it needs to be multiplied by 2
+#     coef = 1 if (self.use_mla and envs.VLLM_BR_DEVICE_SPC_NUM <= 16) else 2
+#     return coef * self.block_size * self.num_kv_heads * self.head_size \
+#             * get_dtype_size(self.dtype)
+
+# AttentionSpec.page_size_bytes = AttentionSpec_page_size_bytes