[Patch] Remove the patch of MiniCPM (#5975)

### What this PR does / why we need it? Part of #5304. After https://github.com/vllm-project/vllm/pull/32523 merge, we could remove the patch of `MiniCPMAttention`. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? Test it locally. - vLLM version: v0.13.0 - vLLM main: 2c24bc6996 --------- Signed-off-by: gcanlin <canlinguosdu@gmail.com>
2026-02-09 14:07:44 +08:00
parent e5f0e0eaf7
commit b7aa511daa
4 changed files with 0 additions and 128 deletions
--- a/vllm_ascend/patch/init.py
+++ b/vllm_ascend/patch/init.py
@@ -112,20 +112,6 @@
 #       Remove this patch when the refactor of all2all manager is done.
 #       Remove this patch when vLLM support all_reduce as customop.
 #
-# ** 2. File: worker/patch_minicpm.py **
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#   1. `vllm.model_executor.models.minicpm.MiniCPMAttention.forward`
-#    Why:
-#       The forward func of MiniCPMAttention in vllm do a datatype convert
-#       (original datatype --> float32) to ensure the precision on cuda.
-#       However float32 is not supported in cann rope op, thus we keep this patch
-#    How：
-#       Removed the dtype convert operations in forward
-#    Related PR (if no, explain why):
-#       NO, only for npu due to rope op.
-#    Future Plan:
-#       Keep this patch in vllm-ascend.
-#
 # ** 3. File: worker/patch_multimodal_merge.py**
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #   1. `vllm.model_executor.models.utils._merge_multimodal_embeddings`
--- a/vllm_ascend/patch/worker/init.py
+++ b/vllm_ascend/patch/worker/init.py
@@ -26,7 +26,6 @@ import vllm_ascend.patch.worker.patch_unquantized_gemm  # noqa
 import vllm_ascend.patch.worker.patch_bert  # noqa
 import vllm_ascend.patch.worker.patch_distributed  # noqa
 import vllm_ascend.patch.worker.patch_multimodal_merge  # noqa
-import vllm_ascend.patch.worker.patch_minicpm  # noqa
 import vllm_ascend.patch.worker.patch_rope  # noqa
 import vllm_ascend.patch.worker.patch_qwen3_next  # noqa
 import vllm_ascend.patch.worker.patch_qwen3_next_mtp  # noqa
--- a/vllm_ascend/patch/worker/patch_minicpm.py
+++ b/vllm_ascend/patch/worker/patch_minicpm.py
@@ -1,36 +0,0 @@
-#
-# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
-# This file is a part of the vllm-ascend project.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import torch
-from vllm.model_executor.models.minicpm import MiniCPMAttention
-
-
-def forward(
-    self,
-    positions: torch.Tensor,
-    hidden_states: torch.Tensor,
-) -> torch.Tensor:
-    qkv, _ = self.qkv_proj(hidden_states)
-    q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
-    q, k = self.rotary_emb(positions, q, k)
-    attn_output = self.attn(q, k, v)
-    output, _ = self.o_proj(attn_output)
-    return output
-
-
-# The type conversion in the forward function is deleted to support the rope operator.
-MiniCPMAttention.forward = forward