From b7aa511daa57da2f8198ea2955113ceca428f7dc Mon Sep 17 00:00:00 2001 From: Canlin Guo Date: Mon, 9 Feb 2026 14:07:44 +0800 Subject: [PATCH] [Patch] Remove the patch of MiniCPM (#5975) ### What this PR does / why we need it? Part of #5304. After https://github.com/vllm-project/vllm/pull/32523 merge, we could remove the patch of `MiniCPMAttention`. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? Test it locally. - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2c24bc6996cb165fce92f780b388a5e39b3f4060 --------- Signed-off-by: gcanlin --- .../worker/patch_common/test_patch_minicpm.py | 77 ------------------- vllm_ascend/patch/__init__.py | 14 ---- vllm_ascend/patch/worker/__init__.py | 1 - vllm_ascend/patch/worker/patch_minicpm.py | 36 --------- 4 files changed, 128 deletions(-) delete mode 100644 tests/ut/patch/worker/patch_common/test_patch_minicpm.py delete mode 100644 vllm_ascend/patch/worker/patch_minicpm.py diff --git a/tests/ut/patch/worker/patch_common/test_patch_minicpm.py b/tests/ut/patch/worker/patch_common/test_patch_minicpm.py deleted file mode 100644 index 9a63d0ea..00000000 --- a/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +++ /dev/null @@ -1,77 +0,0 @@ -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# This file is a part of the vllm-ascend project. -# - -from unittest.mock import MagicMock - -import torch - -from tests.ut.base import TestBase -from vllm_ascend.patch.worker.patch_minicpm import forward - - -class TestPatchMiniCPM(TestBase): - - def setUp(self): - self.mock_self = MagicMock() - - self.mock_self.q_size = 128 - self.mock_self.kv_size = 128 - - self.mock_self.qkv_proj = MagicMock() - self.mock_self.rotary_emb = MagicMock() - self.mock_self.attn = MagicMock() - self.mock_self.o_proj = MagicMock() - - self.positions = torch.tensor([1, 2, 3]) - self.hidden_states = torch.randn(3, 256) - - self.mock_qkv = torch.randn(3, 384) - self.mock_q = self.mock_qkv[:, :128] - self.mock_k = self.mock_qkv[:, 128:256] - self.mock_v = self.mock_qkv[:, 256:] - - self.mock_self.qkv_proj.return_value = (self.mock_qkv, None) - self.mock_self.rotary_emb.return_value = (self.mock_q, self.mock_k) - self.mock_self.attn.return_value = torch.randn(3, 256) - self.mock_self.o_proj.return_value = (torch.randn(3, 256), None) - - def test_forward_patched(self): - from vllm.model_executor.models.minicpm import MiniCPMAttention - - self.assertIs(MiniCPMAttention.forward, forward) - - def test_forward_function(self): - result = forward(self.mock_self, self.positions, self.hidden_states) - - self.mock_self.qkv_proj.assert_called_once_with(self.hidden_states) - - args, _ = self.mock_self.rotary_emb.call_args - self.assertEqual(len(args), 3) - self.assertTrue(torch.equal(args[0], self.positions)) - self.assertTrue(torch.equal(args[1], self.mock_q)) - self.assertTrue(torch.equal(args[2], self.mock_k)) - - args, _ = self.mock_self.attn.call_args - self.assertEqual(len(args), 3) - self.assertTrue(torch.equal(args[0], self.mock_q)) - self.assertTrue(torch.equal(args[1], self.mock_k)) - self.assertTrue(torch.equal(args[2], self.mock_v)) - - self.mock_self.o_proj.assert_called_once_with( - self.mock_self.attn.return_value) - - self.assertEqual(result.shape, (3, 256)) - self.assertTrue( - torch.equal(result, self.mock_self.o_proj.return_value[0])) diff --git a/vllm_ascend/patch/__init__.py b/vllm_ascend/patch/__init__.py index b7010b73..a5a32dd1 100644 --- a/vllm_ascend/patch/__init__.py +++ b/vllm_ascend/patch/__init__.py @@ -112,20 +112,6 @@ # Remove this patch when the refactor of all2all manager is done. # Remove this patch when vLLM support all_reduce as customop. # -# ** 2. File: worker/patch_minicpm.py ** -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# 1. `vllm.model_executor.models.minicpm.MiniCPMAttention.forward` -# Why: -# The forward func of MiniCPMAttention in vllm do a datatype convert -# (original datatype --> float32) to ensure the precision on cuda. -# However float32 is not supported in cann rope op, thus we keep this patch -# How: -# Removed the dtype convert operations in forward -# Related PR (if no, explain why): -# NO, only for npu due to rope op. -# Future Plan: -# Keep this patch in vllm-ascend. -# # ** 3. File: worker/patch_multimodal_merge.py** # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # 1. `vllm.model_executor.models.utils._merge_multimodal_embeddings` diff --git a/vllm_ascend/patch/worker/__init__.py b/vllm_ascend/patch/worker/__init__.py index 2fd0498f..1eac4d0c 100644 --- a/vllm_ascend/patch/worker/__init__.py +++ b/vllm_ascend/patch/worker/__init__.py @@ -26,7 +26,6 @@ import vllm_ascend.patch.worker.patch_unquantized_gemm # noqa import vllm_ascend.patch.worker.patch_bert # noqa import vllm_ascend.patch.worker.patch_distributed # noqa import vllm_ascend.patch.worker.patch_multimodal_merge # noqa -import vllm_ascend.patch.worker.patch_minicpm # noqa import vllm_ascend.patch.worker.patch_rope # noqa import vllm_ascend.patch.worker.patch_qwen3_next # noqa import vllm_ascend.patch.worker.patch_qwen3_next_mtp # noqa diff --git a/vllm_ascend/patch/worker/patch_minicpm.py b/vllm_ascend/patch/worker/patch_minicpm.py deleted file mode 100644 index 663a08a5..00000000 --- a/vllm_ascend/patch/worker/patch_minicpm.py +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. -# This file is a part of the vllm-ascend project. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import torch -from vllm.model_executor.models.minicpm import MiniCPMAttention - - -def forward( - self, - positions: torch.Tensor, - hidden_states: torch.Tensor, -) -> torch.Tensor: - qkv, _ = self.qkv_proj(hidden_states) - q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1) - q, k = self.rotary_emb(positions, q, k) - attn_output = self.attn(q, k, v) - output, _ = self.o_proj(attn_output) - return output - - -# The type conversion in the forward function is deleted to support the rope operator. -MiniCPMAttention.forward = forward