From c2fdd4b8bc9ce859343909caebf331e0cd047908 Mon Sep 17 00:00:00 2001 From: Mengqing Cao Date: Thu, 11 Sep 2025 11:30:16 +0800 Subject: [PATCH] [CI/UT] Fix UTs on register customop and warm up model (#2862) ### What this PR does / why we need it? Fix UTs on register customop and warm up model ### How was this patch tested? CI passed with existing test. Co-authored-by: Icey <1790571317@qq.com> - vLLM version: main - vLLM main: https://github.com/vllm-project/vllm/commit/cc99baf14dacc2497d0c5ed84e076ef2c37f6a4d --------- Signed-off-by: MengqingCao --- tests/ut/test_utils.py | 9 +++--- tests/ut/worker/test_worker_v1.py | 17 +++++----- vllm_ascend/utils.py | 53 ++++++++++++++----------------- 3 files changed, 36 insertions(+), 43 deletions(-) diff --git a/tests/ut/test_utils.py b/tests/ut/test_utils.py index 508bb2a..f9284f9 100644 --- a/tests/ut/test_utils.py +++ b/tests/ut/test_utils.py @@ -24,6 +24,7 @@ from vllm.config import (CompilationConfig, ModelConfig, ParallelConfig, from tests.ut.base import TestBase from vllm_ascend import utils +from vllm_ascend.utils import REGISTERED_ASCEND_OPS class TestUtils(TestBase): @@ -302,14 +303,14 @@ class TestUtils(TestBase): # ascend custom op is not registered utils.register_ascend_customop() - # should call register_oot three - self.assertEqual(mock_customop.register_oot.call_count, 13) + self.assertEqual(mock_customop.register_oot.call_count, + len(REGISTERED_ASCEND_OPS)) self.assertTrue(utils._ASCEND_CUSTOMOP_IS_REIGISTERED) # ascend custom op is already registered utils.register_ascend_customop() - # should not register_oot again, thus only called three in this ut - self.assertEqual(mock_customop.register_oot.call_count, 13) + self.assertEqual(mock_customop.register_oot.call_count, + len(REGISTERED_ASCEND_OPS)) class TestProfileExecuteDuration(TestBase): diff --git a/tests/ut/worker/test_worker_v1.py b/tests/ut/worker/test_worker_v1.py index 7292a29..dc3edaa 100644 --- a/tests/ut/worker/test_worker_v1.py +++ b/tests/ut/worker/test_worker_v1.py @@ -1009,9 +1009,8 @@ class TestNPUWorker(TestBase): @patch("vllm_ascend.worker.worker_v1.NPUPlatform.seed_everything") @patch("vllm_ascend.worker.worker_v1.logger") - @patch("torch_npu._npu_matmul_add_fp32") - def test_compile_or_warm_up_model_with_eager_mode(self, - mock_npu_matmul_add, + @patch("vllm_ascend.worker.worker_v1.NPUWorker._warm_up_atb") + def test_compile_or_warm_up_model_with_eager_mode(self, mock_warm_up_atb, mock_logger, mock_seed_everything): """Test compile_or_warm_up_model method - eager mode""" @@ -1054,14 +1053,14 @@ class TestNPUWorker(TestBase): # Verify seed setting mock_seed_everything.assert_called_once_with(12345) - # Verify calls - mock_npu_matmul_add.assert_called_once() + # Verify atb warm up + mock_warm_up_atb.assert_called_once() @patch("vllm_ascend.worker.worker_v1.NPUPlatform.seed_everything") @patch("vllm_ascend.worker.worker_v1.logger") - @patch("torch_npu._npu_matmul_add_fp32") + @patch("vllm_ascend.worker.worker_v1.NPUWorker._warm_up_atb") def test_compile_or_warm_up_model_with_graph_capture( - self, mock_npu_matmul_add, mock_logger, mock_seed_everything): + self, mock_warm_up_atb, mock_logger, mock_seed_everything): """Test compile_or_warm_up_model method - with graph capture enabled""" from vllm_ascend.worker.worker_v1 import NPUWorker @@ -1094,8 +1093,8 @@ class TestNPUWorker(TestBase): # Verify seed setting mock_seed_everything.assert_called_once_with(67890) - # Verify calls - mock_npu_matmul_add.assert_called_once() + # Verify atb warm up + mock_warm_up_atb.assert_called_once() @patch("vllm_ascend.worker.worker_v1.CaMemAllocator") def test_initialize_from_config_with_sleep_mode(self, diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py index 33e1e01..cd014d0 100644 --- a/vllm_ascend/utils.py +++ b/vllm_ascend/utils.py @@ -50,6 +50,7 @@ MAX_CAPTURE_SIZE = 1800 ASCEND_QUANTIZATION_METHOD = "ascend" SOC_VERSION_INFERENCE_SERIES = ["Ascend310P3"] +REGISTERED_ASCEND_OPS = {} ACL_FORMAT_FRACTAL_ND = 2 ACL_FORMAT_FRACTAL_NZ = 29 @@ -493,7 +494,10 @@ def register_ascend_customop(): return from vllm.model_executor.custom_op import CustomOp + from vllm_ascend.models.layers.mla import AscendMultiHeadLatentAttention from vllm_ascend.ops.activation import AscendQuickGELU, AscendSiluAndMul + from vllm_ascend.ops.common_fused_moe import AscendFusedMoE + from vllm_ascend.ops.layernorm import AscendRMSNorm from vllm_ascend.ops.linear import (AscendColumnParallelLinear, AscendMergedColumnParallelLinear, AscendQKVParallelLinear, @@ -503,38 +507,27 @@ def register_ascend_customop(): from vllm_ascend.ops.vocab_parallel_embedding import ( AscendLogitsProcessor, AscendParallelLMHead, AscendVocabParallelEmbedding) - CustomOp.register_oot(_decorated_op_cls=AscendQuickGELU, name="QuickGELU") - CustomOp.register_oot(_decorated_op_cls=AscendSiluAndMul, - name="SiluAndMul") - CustomOp.register_oot(_decorated_op_cls=AscendRotaryEmbedding, - name="RotaryEmbedding") - CustomOp.register_oot(_decorated_op_cls=AscendColumnParallelLinear, - name="ColumnParallelLinear") - CustomOp.register_oot(_decorated_op_cls=AscendRowParallelLinear, - name="RowParallelLinear") - CustomOp.register_oot(_decorated_op_cls=AscendMergedColumnParallelLinear, - name="MergedColumnParallelLinear") - CustomOp.register_oot(_decorated_op_cls=AscendQKVParallelLinear, - name="QKVParallelLinear") - CustomOp.register_oot( - _decorated_op_cls=AscendDeepseekScalingRotaryEmbedding, - name="DeepseekScalingRotaryEmbedding") - CustomOp.register_oot(_decorated_op_cls=AscendVocabParallelEmbedding, - name="VocabParallelEmbedding") - CustomOp.register_oot(_decorated_op_cls=AscendParallelLMHead, - name="ParallelLMHead") - CustomOp.register_oot(_decorated_op_cls=AscendLogitsProcessor, - name="LogitsProcessor") - from vllm_ascend.ops.layernorm import AscendRMSNorm - CustomOp.register_oot(_decorated_op_cls=AscendRMSNorm, name="RMSNorm") + global REGISTERED_ASCEND_OPS + REGISTERED_ASCEND_OPS = { + "QuickGELU": AscendQuickGELU, + "SiluAndMul": AscendSiluAndMul, + "RotaryEmbedding": AscendRotaryEmbedding, + "ColumnParallelLinear": AscendColumnParallelLinear, + "RowParallelLinear": AscendRowParallelLinear, + "MergedColumnParallelLinear": AscendMergedColumnParallelLinear, + "QKVParallelLinear": AscendQKVParallelLinear, + "DeepseekScalingRotaryEmbedding": AscendDeepseekScalingRotaryEmbedding, + "VocabParallelEmbedding": AscendVocabParallelEmbedding, + "ParallelLMHead": AscendParallelLMHead, + "LogitsProcessor": AscendLogitsProcessor, + "RMSNorm": AscendRMSNorm, + "FusedMoE": AscendFusedMoE, + "MultiHeadLatentAttention": AscendMultiHeadLatentAttention, + } - from vllm_ascend.ops.common_fused_moe import AscendFusedMoE - CustomOp.register_oot(_decorated_op_cls=AscendFusedMoE, name="FusedMoE") - - from vllm_ascend.models.layers.mla import AscendMultiHeadLatentAttention - CustomOp.register_oot(_decorated_op_cls=AscendMultiHeadLatentAttention, - name="MultiHeadLatentAttention") + for name, op_cls in REGISTERED_ASCEND_OPS.items(): + CustomOp.register_oot(_decorated_op_cls=op_cls, name=name) # NOTE: Keep this at last to ensure all custom actions are registered _ASCEND_CUSTOMOP_IS_REIGISTERED = True