[CI] Update UT CANN version to 8.5.0 for main branch (#6564)

### What this PR does / why we need it?
Update UT CANN version to 8.5.0

### Does this PR introduce _any_ user-facing change?
NA


- vLLM version: v0.15.0
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.15.0

---------

Signed-off-by: leo-pony <nengjunma@outlook.com>
This commit is contained in:
Nengjun Ma
2026-02-06 10:28:42 +08:00
committed by GitHub
parent 81f3c09d6d
commit 11339eb48a
8 changed files with 47 additions and 3 deletions

View File

@@ -27,6 +27,7 @@ jobs:
VLLM_USE_MODELSCOPE: True VLLM_USE_MODELSCOPE: True
SOC_VERSION: ascend910b1 SOC_VERSION: ascend910b1
MAX_JOBS: 4 MAX_JOBS: 4
COMPILE_CUSTOM_KERNELS: 0
steps: steps:
- name: Install packages - name: Install packages
run: | run: |

View File

@@ -92,7 +92,7 @@ jobs:
with: with:
vllm: ${{ matrix.vllm_version }} vllm: ${{ matrix.vllm_version }}
runner: linux-amd64-cpu-8-hk runner: linux-amd64-cpu-8-hk
image: quay.nju.edu.cn/ascend/cann:8.2.rc2-910b-ubuntu22.04-py3.11 image: quay.nju.edu.cn/ascend/cann:8.5.0-910b-ubuntu22.04-py3.11
type: pr type: pr
e2e-light: e2e-light:

View File

@@ -334,6 +334,8 @@ class cmake_build_ext(build_ext):
) )
def build_extensions(self) -> None: def build_extensions(self) -> None:
if not envs.COMPILE_CUSTOM_KERNELS:
return
# Ensure that CMake is present and working # Ensure that CMake is present and working
try: try:
subprocess.check_output(["cmake", "--version"]) subprocess.check_output(["cmake", "--version"])
@@ -423,6 +425,8 @@ except LookupError:
# only checks out the commit. In this case, we set a dummy version. # only checks out the commit. In this case, we set a dummy version.
VERSION = "0.0.0" VERSION = "0.0.0"
ext_modules = []
if envs.COMPILE_CUSTOM_KERNELS:
ext_modules = [CMakeExtension(name="vllm_ascend.vllm_ascend_C")] ext_modules = [CMakeExtension(name="vllm_ascend.vllm_ascend_C")]

View File

@@ -39,7 +39,8 @@ def default_vllm_config():
with set_current_vllm_config(mock_config): with set_current_vllm_config(mock_config):
yield mock_config yield mock_config
@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
@pytest.mark.skipif(is_310p_hw(), reason="non_310P device unittest case.") @pytest.mark.skipif(is_310p_hw(), reason="non_310P device unittest case.")
@pytest.mark.parametrize("residual", [None, torch.randn(4, 8, dtype=torch.float32)]) @pytest.mark.parametrize("residual", [None, torch.randn(4, 8, dtype=torch.float32)])
@patch("torch_npu.npu_rms_norm", side_effect=mock_rms_norm) @patch("torch_npu.npu_rms_norm", side_effect=mock_rms_norm)

View File

@@ -17,6 +17,7 @@
from unittest.mock import MagicMock, PropertyMock, patch from unittest.mock import MagicMock, PropertyMock, patch
import pytest
import torch import torch
from tests.ut.base import TestBase from tests.ut.base import TestBase
@@ -180,6 +181,8 @@ class TestTokenDispatcherWithAllGather(TestBase):
self.patcher_npu_moe_init_routing_custom.stop() self.patcher_npu_moe_init_routing_custom.stop()
self.patcher_npu_moe_token_unpermute.stop() self.patcher_npu_moe_token_unpermute.stop()
@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
def test_token_dispatch_without_expert_map(self): def test_token_dispatch_without_expert_map(self):
hidden_states = torch.randn(3, 128) hidden_states = torch.randn(3, 128)
topk_weights = torch.tensor([[0.7, 0.3], [0.6, 0.4], [0.5, 0.5]]) topk_weights = torch.tensor([[0.7, 0.3], [0.6, 0.4], [0.5, 0.5]])
@@ -194,6 +197,8 @@ class TestTokenDispatcherWithAllGather(TestBase):
self.assertEqual(results.group_list_type, 1) self.assertEqual(results.group_list_type, 1)
@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
def test_token_dispatch_with_expert_map(self): def test_token_dispatch_with_expert_map(self):
self.dispatcher.expert_map = torch.tensor([0, 1, 2, 3]) self.dispatcher.expert_map = torch.tensor([0, 1, 2, 3])
hidden_states = torch.randn(3, 128) hidden_states = torch.randn(3, 128)
@@ -209,6 +214,8 @@ class TestTokenDispatcherWithAllGather(TestBase):
self.assertEqual(results.group_list_type, 1) self.assertEqual(results.group_list_type, 1)
@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
def test_token_dispatch_without_quant(self): def test_token_dispatch_without_quant(self):
kwargs = { kwargs = {
"apply_router_weight_on_input": False, "apply_router_weight_on_input": False,
@@ -229,6 +236,8 @@ class TestTokenDispatcherWithAllGather(TestBase):
self.assertEqual(results.group_list_type, 1) self.assertEqual(results.group_list_type, 1)
@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
def test_token_dispatch_with_quant(self): def test_token_dispatch_with_quant(self):
kwargs = { kwargs = {
"apply_router_weight_on_input": False, "apply_router_weight_on_input": False,
@@ -254,6 +263,8 @@ class TestTokenDispatcherWithAllGather(TestBase):
self.assertIsNotNone(results.dynamic_scale) self.assertIsNotNone(results.dynamic_scale)
self.assertEqual(results.group_list_type, 1) self.assertEqual(results.group_list_type, 1)
@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
def test_token_combine_with_expert_map(self): def test_token_combine_with_expert_map(self):
hidden_states = torch.randn(6, 128) hidden_states = torch.randn(6, 128)
context_metadata = { context_metadata = {
@@ -265,6 +276,8 @@ class TestTokenDispatcherWithAllGather(TestBase):
hidden_states, context_metadata).routed_out hidden_states, context_metadata).routed_out
self.assertEqual(final_hidden_states.shape, (6, 128)) self.assertEqual(final_hidden_states.shape, (6, 128))
@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
def test_token_combine_without_expert_map(self): def test_token_combine_without_expert_map(self):
hidden_states = torch.randn(6, 128) hidden_states = torch.randn(6, 128)
context_metadata = { context_metadata = {
@@ -277,6 +290,8 @@ class TestTokenDispatcherWithAllGather(TestBase):
self.mock_npu_moe_token_unpermute.assert_called_once() self.mock_npu_moe_token_unpermute.assert_called_once()
self.assertEqual(final_hidden_states.shape, (6, 128)) self.assertEqual(final_hidden_states.shape, (6, 128))
@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
def test_token_dispatch_with_router_weight(self): def test_token_dispatch_with_router_weight(self):
self.dispatcher.apply_router_weight_on_input = True self.dispatcher.apply_router_weight_on_input = True
hidden_states = torch.randn(3, 128) hidden_states = torch.randn(3, 128)
@@ -381,6 +396,8 @@ class TestTokenDispatcherWithAll2AllV(TestBase):
num_local_experts=2, num_local_experts=2,
with_quant=False) with_quant=False)
@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
def test_token_dispatch(self): def test_token_dispatch(self):
hidden_states = torch.randn(8, 16) hidden_states = torch.randn(8, 16)
topk_weights = torch.rand(8, 4) topk_weights = torch.rand(8, 4)
@@ -400,6 +417,8 @@ class TestTokenDispatcherWithAll2AllV(TestBase):
self.assertIsNotNone(result.group_list) self.assertIsNotNone(result.group_list)
self.assertEqual(result.group_list_type, 1) self.assertEqual(result.group_list_type, 1)
@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
def test_token_combine(self): def test_token_combine(self):
hidden_states = torch.randn(16, 16) hidden_states = torch.randn(16, 16)
context_metadata = { context_metadata = {
@@ -419,6 +438,8 @@ class TestTokenDispatcherWithAll2AllV(TestBase):
self.assertIsNotNone(output) self.assertIsNotNone(output)
self.assertEqual(output.routed_out.shape, (8, 16)) self.assertEqual(output.routed_out.shape, (8, 16))
@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
def test_token_dispatch_with_quant(self): def test_token_dispatch_with_quant(self):
self.dispatcher = TokenDispatcherWithAll2AllV(top_k=2, self.dispatcher = TokenDispatcherWithAll2AllV(top_k=2,
num_experts=4, num_experts=4,
@@ -444,6 +465,8 @@ class TestTokenDispatcherWithAll2AllV(TestBase):
self.assertIsNotNone(result.dynamic_scale) self.assertIsNotNone(result.dynamic_scale)
self.assertEqual(result.group_list_type, 1) self.assertEqual(result.group_list_type, 1)
@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
def test_token_dispatch_with_quant_no_active_tokens(self): def test_token_dispatch_with_quant_no_active_tokens(self):
self.dispatcher = TokenDispatcherWithAll2AllV(top_k=2, self.dispatcher = TokenDispatcherWithAll2AllV(top_k=2,
num_experts=4, num_experts=4,

View File

@@ -18,6 +18,7 @@ import os
from threading import Lock from threading import Lock
from unittest import mock from unittest import mock
import pytest
import torch import torch
from vllm.config import (CompilationConfig, ModelConfig, ParallelConfig, from vllm.config import (CompilationConfig, ModelConfig, ParallelConfig,
VllmConfig) VllmConfig)
@@ -104,6 +105,8 @@ class TestUtils(TestBase):
output_tensor = utils.aligned_16(input_tensor) output_tensor = utils.aligned_16(input_tensor)
self.assertEqual(output_tensor.shape[0], 32) self.assertEqual(output_tensor.shape[0], 32)
@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
def test_enable_custom_op(self): def test_enable_custom_op(self):
result = utils.enable_custom_op() result = utils.enable_custom_op()
self.assertTrue(result) self.assertTrue(result)

View File

@@ -35,6 +35,12 @@ env_variables: dict[str, Callable[[], Any]] = {
# The build type of the package. It can be one of the following values: # The build type of the package. It can be one of the following values:
# Release, Debug, RelWithDebugInfo. If not set, the default value is Release. # Release, Debug, RelWithDebugInfo. If not set, the default value is Release.
"CMAKE_BUILD_TYPE": lambda: os.getenv("CMAKE_BUILD_TYPE"), "CMAKE_BUILD_TYPE": lambda: os.getenv("CMAKE_BUILD_TYPE"),
# Whether to compile custom kernels. If not set, the default value is True.
# If set to False, the custom kernels will not be compiled.
# This configuration option should only be set to False when running UT
# scenarios in an environment without an NPU. Do not set it to False in
# other scenarios.
"COMPILE_CUSTOM_KERNELS": lambda: bool(int(os.getenv("COMPILE_CUSTOM_KERNELS", "1"))),
# The CXX compiler used for compiling the package. If not set, the default # The CXX compiler used for compiling the package. If not set, the default
# value is None, which means the system default CXX compiler will be used. # value is None, which means the system default CXX compiler will be used.
"CXX_COMPILER": lambda: os.getenv("CXX_COMPILER", None), "CXX_COMPILER": lambda: os.getenv("CXX_COMPILER", None),

View File

@@ -88,6 +88,12 @@ class NPUWorker(WorkerBase):
# Additional parameters for compatibility with vllm # Additional parameters for compatibility with vllm
**kwargs): **kwargs):
"""Initialize the worker for Ascend.""" """Initialize the worker for Ascend."""
if not envs_ascend.COMPILE_CUSTOM_KERNELS:
logger.warning(
"COMPILE_CUSTOM_KERNELS is set to False. "
"In most scenarios, without custom kernels, vllm-ascend will not function correctly."
)
# register patch for vllm # register patch for vllm
from vllm_ascend.utils import adapt_patch from vllm_ascend.utils import adapt_patch
adapt_patch() adapt_patch()