[CI] Update UT CANN version to 8.5.0 for main branch (#6564)
### What this PR does / why we need it? Update UT CANN version to 8.5.0 ### Does this PR introduce _any_ user-facing change? NA - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.15.0 --------- Signed-off-by: leo-pony <nengjunma@outlook.com>
This commit is contained in:
1
.github/workflows/_unit_test.yaml
vendored
1
.github/workflows/_unit_test.yaml
vendored
@@ -27,6 +27,7 @@ jobs:
|
|||||||
VLLM_USE_MODELSCOPE: True
|
VLLM_USE_MODELSCOPE: True
|
||||||
SOC_VERSION: ascend910b1
|
SOC_VERSION: ascend910b1
|
||||||
MAX_JOBS: 4
|
MAX_JOBS: 4
|
||||||
|
COMPILE_CUSTOM_KERNELS: 0
|
||||||
steps:
|
steps:
|
||||||
- name: Install packages
|
- name: Install packages
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
2
.github/workflows/pr_test_light.yaml
vendored
2
.github/workflows/pr_test_light.yaml
vendored
@@ -92,7 +92,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
vllm: ${{ matrix.vllm_version }}
|
vllm: ${{ matrix.vllm_version }}
|
||||||
runner: linux-amd64-cpu-8-hk
|
runner: linux-amd64-cpu-8-hk
|
||||||
image: quay.nju.edu.cn/ascend/cann:8.2.rc2-910b-ubuntu22.04-py3.11
|
image: quay.nju.edu.cn/ascend/cann:8.5.0-910b-ubuntu22.04-py3.11
|
||||||
type: pr
|
type: pr
|
||||||
|
|
||||||
e2e-light:
|
e2e-light:
|
||||||
|
|||||||
6
setup.py
6
setup.py
@@ -334,6 +334,8 @@ class cmake_build_ext(build_ext):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def build_extensions(self) -> None:
|
def build_extensions(self) -> None:
|
||||||
|
if not envs.COMPILE_CUSTOM_KERNELS:
|
||||||
|
return
|
||||||
# Ensure that CMake is present and working
|
# Ensure that CMake is present and working
|
||||||
try:
|
try:
|
||||||
subprocess.check_output(["cmake", "--version"])
|
subprocess.check_output(["cmake", "--version"])
|
||||||
@@ -423,7 +425,9 @@ except LookupError:
|
|||||||
# only checks out the commit. In this case, we set a dummy version.
|
# only checks out the commit. In this case, we set a dummy version.
|
||||||
VERSION = "0.0.0"
|
VERSION = "0.0.0"
|
||||||
|
|
||||||
ext_modules = [CMakeExtension(name="vllm_ascend.vllm_ascend_C")]
|
ext_modules = []
|
||||||
|
if envs.COMPILE_CUSTOM_KERNELS:
|
||||||
|
ext_modules = [CMakeExtension(name="vllm_ascend.vllm_ascend_C")]
|
||||||
|
|
||||||
|
|
||||||
def get_path(*filepath) -> str:
|
def get_path(*filepath) -> str:
|
||||||
|
|||||||
@@ -39,7 +39,8 @@ def default_vllm_config():
|
|||||||
with set_current_vllm_config(mock_config):
|
with set_current_vllm_config(mock_config):
|
||||||
yield mock_config
|
yield mock_config
|
||||||
|
|
||||||
|
@pytest.mark.skip(
|
||||||
|
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
|
||||||
@pytest.mark.skipif(is_310p_hw(), reason="non_310P device unittest case.")
|
@pytest.mark.skipif(is_310p_hw(), reason="non_310P device unittest case.")
|
||||||
@pytest.mark.parametrize("residual", [None, torch.randn(4, 8, dtype=torch.float32)])
|
@pytest.mark.parametrize("residual", [None, torch.randn(4, 8, dtype=torch.float32)])
|
||||||
@patch("torch_npu.npu_rms_norm", side_effect=mock_rms_norm)
|
@patch("torch_npu.npu_rms_norm", side_effect=mock_rms_norm)
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
|
|
||||||
from unittest.mock import MagicMock, PropertyMock, patch
|
from unittest.mock import MagicMock, PropertyMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from tests.ut.base import TestBase
|
from tests.ut.base import TestBase
|
||||||
@@ -180,6 +181,8 @@ class TestTokenDispatcherWithAllGather(TestBase):
|
|||||||
self.patcher_npu_moe_init_routing_custom.stop()
|
self.patcher_npu_moe_init_routing_custom.stop()
|
||||||
self.patcher_npu_moe_token_unpermute.stop()
|
self.patcher_npu_moe_token_unpermute.stop()
|
||||||
|
|
||||||
|
@pytest.mark.skip(
|
||||||
|
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
|
||||||
def test_token_dispatch_without_expert_map(self):
|
def test_token_dispatch_without_expert_map(self):
|
||||||
hidden_states = torch.randn(3, 128)
|
hidden_states = torch.randn(3, 128)
|
||||||
topk_weights = torch.tensor([[0.7, 0.3], [0.6, 0.4], [0.5, 0.5]])
|
topk_weights = torch.tensor([[0.7, 0.3], [0.6, 0.4], [0.5, 0.5]])
|
||||||
@@ -194,6 +197,8 @@ class TestTokenDispatcherWithAllGather(TestBase):
|
|||||||
|
|
||||||
self.assertEqual(results.group_list_type, 1)
|
self.assertEqual(results.group_list_type, 1)
|
||||||
|
|
||||||
|
@pytest.mark.skip(
|
||||||
|
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
|
||||||
def test_token_dispatch_with_expert_map(self):
|
def test_token_dispatch_with_expert_map(self):
|
||||||
self.dispatcher.expert_map = torch.tensor([0, 1, 2, 3])
|
self.dispatcher.expert_map = torch.tensor([0, 1, 2, 3])
|
||||||
hidden_states = torch.randn(3, 128)
|
hidden_states = torch.randn(3, 128)
|
||||||
@@ -209,6 +214,8 @@ class TestTokenDispatcherWithAllGather(TestBase):
|
|||||||
|
|
||||||
self.assertEqual(results.group_list_type, 1)
|
self.assertEqual(results.group_list_type, 1)
|
||||||
|
|
||||||
|
@pytest.mark.skip(
|
||||||
|
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
|
||||||
def test_token_dispatch_without_quant(self):
|
def test_token_dispatch_without_quant(self):
|
||||||
kwargs = {
|
kwargs = {
|
||||||
"apply_router_weight_on_input": False,
|
"apply_router_weight_on_input": False,
|
||||||
@@ -229,6 +236,8 @@ class TestTokenDispatcherWithAllGather(TestBase):
|
|||||||
|
|
||||||
self.assertEqual(results.group_list_type, 1)
|
self.assertEqual(results.group_list_type, 1)
|
||||||
|
|
||||||
|
@pytest.mark.skip(
|
||||||
|
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
|
||||||
def test_token_dispatch_with_quant(self):
|
def test_token_dispatch_with_quant(self):
|
||||||
kwargs = {
|
kwargs = {
|
||||||
"apply_router_weight_on_input": False,
|
"apply_router_weight_on_input": False,
|
||||||
@@ -254,6 +263,8 @@ class TestTokenDispatcherWithAllGather(TestBase):
|
|||||||
self.assertIsNotNone(results.dynamic_scale)
|
self.assertIsNotNone(results.dynamic_scale)
|
||||||
self.assertEqual(results.group_list_type, 1)
|
self.assertEqual(results.group_list_type, 1)
|
||||||
|
|
||||||
|
@pytest.mark.skip(
|
||||||
|
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
|
||||||
def test_token_combine_with_expert_map(self):
|
def test_token_combine_with_expert_map(self):
|
||||||
hidden_states = torch.randn(6, 128)
|
hidden_states = torch.randn(6, 128)
|
||||||
context_metadata = {
|
context_metadata = {
|
||||||
@@ -265,6 +276,8 @@ class TestTokenDispatcherWithAllGather(TestBase):
|
|||||||
hidden_states, context_metadata).routed_out
|
hidden_states, context_metadata).routed_out
|
||||||
self.assertEqual(final_hidden_states.shape, (6, 128))
|
self.assertEqual(final_hidden_states.shape, (6, 128))
|
||||||
|
|
||||||
|
@pytest.mark.skip(
|
||||||
|
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
|
||||||
def test_token_combine_without_expert_map(self):
|
def test_token_combine_without_expert_map(self):
|
||||||
hidden_states = torch.randn(6, 128)
|
hidden_states = torch.randn(6, 128)
|
||||||
context_metadata = {
|
context_metadata = {
|
||||||
@@ -277,6 +290,8 @@ class TestTokenDispatcherWithAllGather(TestBase):
|
|||||||
self.mock_npu_moe_token_unpermute.assert_called_once()
|
self.mock_npu_moe_token_unpermute.assert_called_once()
|
||||||
self.assertEqual(final_hidden_states.shape, (6, 128))
|
self.assertEqual(final_hidden_states.shape, (6, 128))
|
||||||
|
|
||||||
|
@pytest.mark.skip(
|
||||||
|
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
|
||||||
def test_token_dispatch_with_router_weight(self):
|
def test_token_dispatch_with_router_weight(self):
|
||||||
self.dispatcher.apply_router_weight_on_input = True
|
self.dispatcher.apply_router_weight_on_input = True
|
||||||
hidden_states = torch.randn(3, 128)
|
hidden_states = torch.randn(3, 128)
|
||||||
@@ -381,6 +396,8 @@ class TestTokenDispatcherWithAll2AllV(TestBase):
|
|||||||
num_local_experts=2,
|
num_local_experts=2,
|
||||||
with_quant=False)
|
with_quant=False)
|
||||||
|
|
||||||
|
@pytest.mark.skip(
|
||||||
|
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
|
||||||
def test_token_dispatch(self):
|
def test_token_dispatch(self):
|
||||||
hidden_states = torch.randn(8, 16)
|
hidden_states = torch.randn(8, 16)
|
||||||
topk_weights = torch.rand(8, 4)
|
topk_weights = torch.rand(8, 4)
|
||||||
@@ -400,6 +417,8 @@ class TestTokenDispatcherWithAll2AllV(TestBase):
|
|||||||
self.assertIsNotNone(result.group_list)
|
self.assertIsNotNone(result.group_list)
|
||||||
self.assertEqual(result.group_list_type, 1)
|
self.assertEqual(result.group_list_type, 1)
|
||||||
|
|
||||||
|
@pytest.mark.skip(
|
||||||
|
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
|
||||||
def test_token_combine(self):
|
def test_token_combine(self):
|
||||||
hidden_states = torch.randn(16, 16)
|
hidden_states = torch.randn(16, 16)
|
||||||
context_metadata = {
|
context_metadata = {
|
||||||
@@ -419,6 +438,8 @@ class TestTokenDispatcherWithAll2AllV(TestBase):
|
|||||||
self.assertIsNotNone(output)
|
self.assertIsNotNone(output)
|
||||||
self.assertEqual(output.routed_out.shape, (8, 16))
|
self.assertEqual(output.routed_out.shape, (8, 16))
|
||||||
|
|
||||||
|
@pytest.mark.skip(
|
||||||
|
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
|
||||||
def test_token_dispatch_with_quant(self):
|
def test_token_dispatch_with_quant(self):
|
||||||
self.dispatcher = TokenDispatcherWithAll2AllV(top_k=2,
|
self.dispatcher = TokenDispatcherWithAll2AllV(top_k=2,
|
||||||
num_experts=4,
|
num_experts=4,
|
||||||
@@ -444,6 +465,8 @@ class TestTokenDispatcherWithAll2AllV(TestBase):
|
|||||||
self.assertIsNotNone(result.dynamic_scale)
|
self.assertIsNotNone(result.dynamic_scale)
|
||||||
self.assertEqual(result.group_list_type, 1)
|
self.assertEqual(result.group_list_type, 1)
|
||||||
|
|
||||||
|
@pytest.mark.skip(
|
||||||
|
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
|
||||||
def test_token_dispatch_with_quant_no_active_tokens(self):
|
def test_token_dispatch_with_quant_no_active_tokens(self):
|
||||||
self.dispatcher = TokenDispatcherWithAll2AllV(top_k=2,
|
self.dispatcher = TokenDispatcherWithAll2AllV(top_k=2,
|
||||||
num_experts=4,
|
num_experts=4,
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import os
|
|||||||
from threading import Lock
|
from threading import Lock
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
|
import pytest
|
||||||
import torch
|
import torch
|
||||||
from vllm.config import (CompilationConfig, ModelConfig, ParallelConfig,
|
from vllm.config import (CompilationConfig, ModelConfig, ParallelConfig,
|
||||||
VllmConfig)
|
VllmConfig)
|
||||||
@@ -104,6 +105,8 @@ class TestUtils(TestBase):
|
|||||||
output_tensor = utils.aligned_16(input_tensor)
|
output_tensor = utils.aligned_16(input_tensor)
|
||||||
self.assertEqual(output_tensor.shape[0], 32)
|
self.assertEqual(output_tensor.shape[0], 32)
|
||||||
|
|
||||||
|
@pytest.mark.skip(
|
||||||
|
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
|
||||||
def test_enable_custom_op(self):
|
def test_enable_custom_op(self):
|
||||||
result = utils.enable_custom_op()
|
result = utils.enable_custom_op()
|
||||||
self.assertTrue(result)
|
self.assertTrue(result)
|
||||||
|
|||||||
@@ -35,6 +35,12 @@ env_variables: dict[str, Callable[[], Any]] = {
|
|||||||
# The build type of the package. It can be one of the following values:
|
# The build type of the package. It can be one of the following values:
|
||||||
# Release, Debug, RelWithDebugInfo. If not set, the default value is Release.
|
# Release, Debug, RelWithDebugInfo. If not set, the default value is Release.
|
||||||
"CMAKE_BUILD_TYPE": lambda: os.getenv("CMAKE_BUILD_TYPE"),
|
"CMAKE_BUILD_TYPE": lambda: os.getenv("CMAKE_BUILD_TYPE"),
|
||||||
|
# Whether to compile custom kernels. If not set, the default value is True.
|
||||||
|
# If set to False, the custom kernels will not be compiled.
|
||||||
|
# This configuration option should only be set to False when running UT
|
||||||
|
# scenarios in an environment without an NPU. Do not set it to False in
|
||||||
|
# other scenarios.
|
||||||
|
"COMPILE_CUSTOM_KERNELS": lambda: bool(int(os.getenv("COMPILE_CUSTOM_KERNELS", "1"))),
|
||||||
# The CXX compiler used for compiling the package. If not set, the default
|
# The CXX compiler used for compiling the package. If not set, the default
|
||||||
# value is None, which means the system default CXX compiler will be used.
|
# value is None, which means the system default CXX compiler will be used.
|
||||||
"CXX_COMPILER": lambda: os.getenv("CXX_COMPILER", None),
|
"CXX_COMPILER": lambda: os.getenv("CXX_COMPILER", None),
|
||||||
|
|||||||
@@ -88,6 +88,12 @@ class NPUWorker(WorkerBase):
|
|||||||
# Additional parameters for compatibility with vllm
|
# Additional parameters for compatibility with vllm
|
||||||
**kwargs):
|
**kwargs):
|
||||||
"""Initialize the worker for Ascend."""
|
"""Initialize the worker for Ascend."""
|
||||||
|
if not envs_ascend.COMPILE_CUSTOM_KERNELS:
|
||||||
|
logger.warning(
|
||||||
|
"COMPILE_CUSTOM_KERNELS is set to False. "
|
||||||
|
"In most scenarios, without custom kernels, vllm-ascend will not function correctly."
|
||||||
|
)
|
||||||
|
|
||||||
# register patch for vllm
|
# register patch for vllm
|
||||||
from vllm_ascend.utils import adapt_patch
|
from vllm_ascend.utils import adapt_patch
|
||||||
adapt_patch()
|
adapt_patch()
|
||||||
|
|||||||
Reference in New Issue
Block a user