[CI/UT] Fix UTs on register customop and warm up model (#2862)

### What this PR does / why we need it?
Fix UTs on register customop and warm up model

### How was this patch tested?
CI passed with existing test.

Co-authored-by: Icey <1790571317@qq.com>

- vLLM version: main
- vLLM main:
cc99baf14d

---------

Signed-off-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
Mengqing Cao
2025-09-11 11:30:16 +08:00
committed by GitHub
parent b7df04de9b
commit c2fdd4b8bc
3 changed files with 36 additions and 43 deletions

View File

@@ -24,6 +24,7 @@ from vllm.config import (CompilationConfig, ModelConfig, ParallelConfig,
from tests.ut.base import TestBase from tests.ut.base import TestBase
from vllm_ascend import utils from vllm_ascend import utils
from vllm_ascend.utils import REGISTERED_ASCEND_OPS
class TestUtils(TestBase): class TestUtils(TestBase):
@@ -302,14 +303,14 @@ class TestUtils(TestBase):
# ascend custom op is not registered # ascend custom op is not registered
utils.register_ascend_customop() utils.register_ascend_customop()
# should call register_oot three self.assertEqual(mock_customop.register_oot.call_count,
self.assertEqual(mock_customop.register_oot.call_count, 13) len(REGISTERED_ASCEND_OPS))
self.assertTrue(utils._ASCEND_CUSTOMOP_IS_REIGISTERED) self.assertTrue(utils._ASCEND_CUSTOMOP_IS_REIGISTERED)
# ascend custom op is already registered # ascend custom op is already registered
utils.register_ascend_customop() utils.register_ascend_customop()
# should not register_oot again, thus only called three in this ut self.assertEqual(mock_customop.register_oot.call_count,
self.assertEqual(mock_customop.register_oot.call_count, 13) len(REGISTERED_ASCEND_OPS))
class TestProfileExecuteDuration(TestBase): class TestProfileExecuteDuration(TestBase):

View File

@@ -1009,9 +1009,8 @@ class TestNPUWorker(TestBase):
@patch("vllm_ascend.worker.worker_v1.NPUPlatform.seed_everything") @patch("vllm_ascend.worker.worker_v1.NPUPlatform.seed_everything")
@patch("vllm_ascend.worker.worker_v1.logger") @patch("vllm_ascend.worker.worker_v1.logger")
@patch("torch_npu._npu_matmul_add_fp32") @patch("vllm_ascend.worker.worker_v1.NPUWorker._warm_up_atb")
def test_compile_or_warm_up_model_with_eager_mode(self, def test_compile_or_warm_up_model_with_eager_mode(self, mock_warm_up_atb,
mock_npu_matmul_add,
mock_logger, mock_logger,
mock_seed_everything): mock_seed_everything):
"""Test compile_or_warm_up_model method - eager mode""" """Test compile_or_warm_up_model method - eager mode"""
@@ -1054,14 +1053,14 @@ class TestNPUWorker(TestBase):
# Verify seed setting # Verify seed setting
mock_seed_everything.assert_called_once_with(12345) mock_seed_everything.assert_called_once_with(12345)
# Verify calls # Verify atb warm up
mock_npu_matmul_add.assert_called_once() mock_warm_up_atb.assert_called_once()
@patch("vllm_ascend.worker.worker_v1.NPUPlatform.seed_everything") @patch("vllm_ascend.worker.worker_v1.NPUPlatform.seed_everything")
@patch("vllm_ascend.worker.worker_v1.logger") @patch("vllm_ascend.worker.worker_v1.logger")
@patch("torch_npu._npu_matmul_add_fp32") @patch("vllm_ascend.worker.worker_v1.NPUWorker._warm_up_atb")
def test_compile_or_warm_up_model_with_graph_capture( def test_compile_or_warm_up_model_with_graph_capture(
self, mock_npu_matmul_add, mock_logger, mock_seed_everything): self, mock_warm_up_atb, mock_logger, mock_seed_everything):
"""Test compile_or_warm_up_model method - with graph capture enabled""" """Test compile_or_warm_up_model method - with graph capture enabled"""
from vllm_ascend.worker.worker_v1 import NPUWorker from vllm_ascend.worker.worker_v1 import NPUWorker
@@ -1094,8 +1093,8 @@ class TestNPUWorker(TestBase):
# Verify seed setting # Verify seed setting
mock_seed_everything.assert_called_once_with(67890) mock_seed_everything.assert_called_once_with(67890)
# Verify calls # Verify atb warm up
mock_npu_matmul_add.assert_called_once() mock_warm_up_atb.assert_called_once()
@patch("vllm_ascend.worker.worker_v1.CaMemAllocator") @patch("vllm_ascend.worker.worker_v1.CaMemAllocator")
def test_initialize_from_config_with_sleep_mode(self, def test_initialize_from_config_with_sleep_mode(self,

View File

@@ -50,6 +50,7 @@ MAX_CAPTURE_SIZE = 1800
ASCEND_QUANTIZATION_METHOD = "ascend" ASCEND_QUANTIZATION_METHOD = "ascend"
SOC_VERSION_INFERENCE_SERIES = ["Ascend310P3"] SOC_VERSION_INFERENCE_SERIES = ["Ascend310P3"]
REGISTERED_ASCEND_OPS = {}
ACL_FORMAT_FRACTAL_ND = 2 ACL_FORMAT_FRACTAL_ND = 2
ACL_FORMAT_FRACTAL_NZ = 29 ACL_FORMAT_FRACTAL_NZ = 29
@@ -493,7 +494,10 @@ def register_ascend_customop():
return return
from vllm.model_executor.custom_op import CustomOp from vllm.model_executor.custom_op import CustomOp
from vllm_ascend.models.layers.mla import AscendMultiHeadLatentAttention
from vllm_ascend.ops.activation import AscendQuickGELU, AscendSiluAndMul from vllm_ascend.ops.activation import AscendQuickGELU, AscendSiluAndMul
from vllm_ascend.ops.common_fused_moe import AscendFusedMoE
from vllm_ascend.ops.layernorm import AscendRMSNorm
from vllm_ascend.ops.linear import (AscendColumnParallelLinear, from vllm_ascend.ops.linear import (AscendColumnParallelLinear,
AscendMergedColumnParallelLinear, AscendMergedColumnParallelLinear,
AscendQKVParallelLinear, AscendQKVParallelLinear,
@@ -503,38 +507,27 @@ def register_ascend_customop():
from vllm_ascend.ops.vocab_parallel_embedding import ( from vllm_ascend.ops.vocab_parallel_embedding import (
AscendLogitsProcessor, AscendParallelLMHead, AscendLogitsProcessor, AscendParallelLMHead,
AscendVocabParallelEmbedding) AscendVocabParallelEmbedding)
CustomOp.register_oot(_decorated_op_cls=AscendQuickGELU, name="QuickGELU")
CustomOp.register_oot(_decorated_op_cls=AscendSiluAndMul,
name="SiluAndMul")
CustomOp.register_oot(_decorated_op_cls=AscendRotaryEmbedding,
name="RotaryEmbedding")
CustomOp.register_oot(_decorated_op_cls=AscendColumnParallelLinear,
name="ColumnParallelLinear")
CustomOp.register_oot(_decorated_op_cls=AscendRowParallelLinear,
name="RowParallelLinear")
CustomOp.register_oot(_decorated_op_cls=AscendMergedColumnParallelLinear,
name="MergedColumnParallelLinear")
CustomOp.register_oot(_decorated_op_cls=AscendQKVParallelLinear,
name="QKVParallelLinear")
CustomOp.register_oot(
_decorated_op_cls=AscendDeepseekScalingRotaryEmbedding,
name="DeepseekScalingRotaryEmbedding")
CustomOp.register_oot(_decorated_op_cls=AscendVocabParallelEmbedding,
name="VocabParallelEmbedding")
CustomOp.register_oot(_decorated_op_cls=AscendParallelLMHead,
name="ParallelLMHead")
CustomOp.register_oot(_decorated_op_cls=AscendLogitsProcessor,
name="LogitsProcessor")
from vllm_ascend.ops.layernorm import AscendRMSNorm global REGISTERED_ASCEND_OPS
CustomOp.register_oot(_decorated_op_cls=AscendRMSNorm, name="RMSNorm") REGISTERED_ASCEND_OPS = {
"QuickGELU": AscendQuickGELU,
"SiluAndMul": AscendSiluAndMul,
"RotaryEmbedding": AscendRotaryEmbedding,
"ColumnParallelLinear": AscendColumnParallelLinear,
"RowParallelLinear": AscendRowParallelLinear,
"MergedColumnParallelLinear": AscendMergedColumnParallelLinear,
"QKVParallelLinear": AscendQKVParallelLinear,
"DeepseekScalingRotaryEmbedding": AscendDeepseekScalingRotaryEmbedding,
"VocabParallelEmbedding": AscendVocabParallelEmbedding,
"ParallelLMHead": AscendParallelLMHead,
"LogitsProcessor": AscendLogitsProcessor,
"RMSNorm": AscendRMSNorm,
"FusedMoE": AscendFusedMoE,
"MultiHeadLatentAttention": AscendMultiHeadLatentAttention,
}
from vllm_ascend.ops.common_fused_moe import AscendFusedMoE for name, op_cls in REGISTERED_ASCEND_OPS.items():
CustomOp.register_oot(_decorated_op_cls=AscendFusedMoE, name="FusedMoE") CustomOp.register_oot(_decorated_op_cls=op_cls, name=name)
from vllm_ascend.models.layers.mla import AscendMultiHeadLatentAttention
CustomOp.register_oot(_decorated_op_cls=AscendMultiHeadLatentAttention,
name="MultiHeadLatentAttention")
# NOTE: Keep this at last to ensure all custom actions are registered # NOTE: Keep this at last to ensure all custom actions are registered
_ASCEND_CUSTOMOP_IS_REIGISTERED = True _ASCEND_CUSTOMOP_IS_REIGISTERED = True