[CI]refactor: standardize test case naming convention (#5243)

### What this PR does / why we need it?
- Standardize test case naming in `vllm-ascend/tests/e2e/multicard/` to
follow the `<model>_<feature>_<distributed>` convention.

- vLLM version: release/v0.13.0
- vLLM main:
ad32e3e19c

---------

Signed-off-by: MrZ20 <2609716663@qq.com>
Signed-off-by: root <root@LAPTOP-VQKDDVMG.localdomain>
Co-authored-by: root <root@LAPTOP-VQKDDVMG.localdomain>
This commit is contained in:
SILONG ZENG
2025-12-23 14:13:42 +08:00
committed by GitHub
parent 592cfb6a6f
commit 29a93daa82
10 changed files with 33 additions and 33 deletions

View File

@@ -209,13 +209,13 @@ jobs:
#pytest -sv --durations=0 tests/e2e/multicard/test_ilama_lora_tp2.py
# To avoid oom, we need to run the test in a single process.
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_fc2_for_qwen3_moe
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_deepseek_w4a8_accuracy
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_deepseek_multistream_moe_tp2
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_qwen3_w4a8_dynamic_tp2
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_qwen3_moe_sp_tp2
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_qwen3_moe_fc2_tp2
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_qwen3_dense_fc1_tp2
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_qwen3_dense_prefetch_mlp_weight_tp2
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_deepseek_w4a8_accuracy_tp2
pytest -sv --durations=0 tests/e2e/multicard/test_prefix_caching.py
pytest -sv --durations=0 tests/e2e/multicard/test_pipeline_parallel.py
@@ -288,8 +288,8 @@ jobs:
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
run: |
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Kimi_K2_Thinking_W4A16
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_deepseek_multistream_moe_tp2
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_kimi_k2_thinking_w4a16_tp4
pytest -sv --durations=0 tests/e2e/multicard/test_data_parallel_tp2.py
pytest -sv --durations=0 tests/e2e/multicard/long_sequence/test_basic.py
pytest -sv --durations=0 tests/e2e/multicard/long_sequence/test_accuracy.py

View File

@@ -134,7 +134,7 @@ def _run_worker_process(
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("max_tokens", [4, 36])
@patch.dict(os.environ, {"ASCEND_RT_VISIBLE_DEVICES": "0,1"})
def test_aclgraph_capture_replay_metrics_dp2(
def test_models_aclgraph_capture_replay_metrics_dp2(
model: str,
max_tokens: int,
monkeypatch: pytest.MonkeyPatch,

View File

@@ -38,7 +38,7 @@ MODELS = [
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("max_tokens", [32])
@patch.dict(os.environ, {"ASCEND_RT_VISIBLE_DEVICES": "0,1"})
def test_qwen_inference_dp2(model, max_tokens):
def test_qwen3_inference_dp2(model, max_tokens):
moe_models = ["Qwen/Qwen3-30B-A3B", "vllm-ascend/Qwen3-30B-A3B-W8A8"]
quantization_models = ["vllm-ascend/Qwen3-30B-A3B-W8A8"]
script = "examples/offline_data_parallel.py"

View File

@@ -15,7 +15,7 @@ MODELS = ["Qwen/Qwen3-0.6B"]
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("max_tokens", [32])
@patch.dict(os.environ, {"ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3"})
def test_qwen_inference_dp2_tp2(model, max_tokens):
def test_qwen3_inference_dp2_tp2(model, max_tokens):
script = "examples/offline_data_parallel.py"
env = os.environ.copy()

View File

@@ -37,7 +37,7 @@ DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
@pytest.mark.parametrize("model", MODELS)
@patch.dict(os.environ, {"HCCL_BUFFSIZE": "500"})
def test_qwen_external_launcher(model):
def test_qwen3_external_launcher(model):
script = Path(
__file__
).parent.parent.parent.parent / "examples" / "offline_external_launcher.py"
@@ -78,7 +78,7 @@ def test_qwen_external_launcher(model):
@pytest.mark.parametrize("model", MOE_MODELS)
def test_qwen_moe_external_launcher_ep(model):
def test_qwen3_moe_external_launcher_ep_tp2(model):
script = Path(
__file__
).parent.parent.parent.parent / "examples" / "offline_external_launcher.py"
@@ -109,7 +109,7 @@ def test_qwen_moe_external_launcher_ep(model):
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
def test_qwen_external_launcher_with_sleepmode():
def test_qwen3_external_launcher_with_sleepmode():
script = Path(
__file__
).parent.parent.parent.parent / "examples" / "offline_external_launcher.py"
@@ -154,7 +154,7 @@ def test_qwen_external_launcher_with_sleepmode():
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
def test_qwen_external_launcher_with_sleepmode_level2():
def test_qwen3_external_launcher_with_sleepmode_level2():
script = Path(
__file__
).parent.parent.parent.parent / "examples" / "offline_external_launcher.py"
@@ -210,7 +210,7 @@ def test_qwen_external_launcher_with_sleepmode_level2():
"VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE": "1",
"HCCL_BUFFSIZE": "500"
})
def test_qwen_external_launcher_with_matmul_allreduce(model):
def test_qwen3_external_launcher_with_matmul_allreduce(model):
script = Path(
__file__
).parent.parent.parent.parent / "examples" / "offline_external_launcher.py"

View File

@@ -29,7 +29,7 @@ from tests.e2e.conftest import VllmRunner
from tests.e2e.model_utils import check_outputs_equal
def test_qwen_moe_with_full_decode_only():
def test_qwen3_moe_full_decode_only_tp2():
if 'HCCL_OP_EXPANSION_MODE' in os.environ:
del os.environ['HCCL_OP_EXPANSION_MODE']
prompts = [
@@ -75,7 +75,7 @@ def test_qwen_moe_with_full_decode_only():
)
def test_qwen_moe_with_full():
def test_qwen3_moe_full_graph_tp2():
if 'HCCL_OP_EXPANSION_MODE' in os.environ:
del os.environ['HCCL_OP_EXPANSION_MODE']
prompts = [

View File

@@ -41,7 +41,7 @@ from tests.e2e.conftest import VllmRunner
"TASK_QUEUE_ENABLE": "1",
"VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP": "1"
})
def test_deepseek_moe_fused_allgather_ep():
def test_deepseek_v3_moe_fused_allgather_ep_tp2():
example_prompts = ["Hello, my name is"]
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
@@ -62,7 +62,7 @@ def test_deepseek_moe_fused_allgather_ep():
"VLLM_WORKER_MULTIPROC_METHOD": "spawn",
"TASK_QUEUE_ENABLE": "1"
})
def test_deepseek_moe_fused_alltoall_ep():
def test_deepseek_v3_moe_fused_alltoall_ep_tp2():
example_prompts = ["Hello, my name is"]
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)

View File

@@ -34,7 +34,7 @@ os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
QWEN_DENSE_MODELS = [
"vllm-ascend/Qwen3-8B-W8A8",
"vllm-ascend/Qwen3-0.6B-W8A8",
]
QWEN_W4A8_MODELS = [
@@ -50,7 +50,7 @@ KIMI_W4A16_MODELS = [
]
def test_models_distributed_DeepSeek_multistream_moe():
def test_deepseek_multistream_moe_tp2():
example_prompts = [
"Hello, my name is",
]
@@ -70,7 +70,7 @@ def test_models_distributed_DeepSeek_multistream_moe():
@pytest.mark.parametrize("model", QWEN_W4A8_MODELS)
def test_models_distributed_Qwen3_W4A8DYNAMIC(model):
def test_qwen3_w4a8_dynamic_tp2(model):
prompts = [
"Hello, my name is",
]
@@ -85,7 +85,7 @@ def test_models_distributed_Qwen3_W4A8DYNAMIC(model):
vllm_model.generate_greedy(prompts, max_tokens)
def test_sp_for_qwen3_moe() -> None:
def test_qwen3_moe_sp_tp2() -> None:
example_prompts = [
"Hello, my name is",
]
@@ -108,7 +108,7 @@ def test_sp_for_qwen3_moe() -> None:
@pytest.mark.parametrize("model", DEEPSEEK_W4A8_MODELS)
@patch.dict(os.environ, {"HCCL_BUFFSIZE": "2048"})
def test_deepseek_w4a8_accuracy(model):
def test_deepseek_w4a8_accuracy_tp2(model):
prompts = [
"Hello, my name is", "The president of the United States is",
"vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs"
@@ -140,7 +140,7 @@ def test_deepseek_w4a8_accuracy(model):
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_FLASHCOMM1": "1"})
@patch.dict(os.environ, {"VLLM_ASCEND_FLASHCOMM2_PARALLEL_SIZE": "1"})
def test_fc2_for_qwen3_moe() -> None:
def test_qwen3_moe_fc2_tp2() -> None:
example_prompts = [
"Hello, my name is",
]
@@ -159,7 +159,7 @@ def test_fc2_for_qwen3_moe() -> None:
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_FLASHCOMM1": "1"})
def test_models_distributed_deepseek_v2_lite_with_flashcomm_v1() -> None:
def test_deepseek_v2_lite_fc1_tp2() -> None:
example_prompts = [
"test" * 1001,
]
@@ -180,7 +180,7 @@ def test_models_distributed_deepseek_v2_lite_with_flashcomm_v1() -> None:
@pytest.mark.parametrize("model", QWEN_DENSE_MODELS)
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE": "1"})
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_FLASHCOMM1": "1"})
def test_models_distributed_Qwen_Dense_with_flashcomm_v1(model):
def test_qwen3_dense_fc1_tp2(model):
example_prompts = [
"Hello, my name is",
]
@@ -200,7 +200,7 @@ def test_models_distributed_Qwen_Dense_with_flashcomm_v1(model):
@pytest.mark.parametrize("model", QWEN_DENSE_MODELS)
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE": "1"})
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_PREFETCH_MLP": "1"})
def test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight(model):
def test_qwen3_dense_prefetch_mlp_weight_tp2(model):
example_prompts = [
"Hello, my name is",
]
@@ -218,7 +218,7 @@ def test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight(model):
@pytest.mark.parametrize("model", KIMI_W4A16_MODELS)
def test_models_distributed_Kimi_K2_Thinking_W4A16(model):
def test_kimi_k2_thinking_w4a16_tp4(model):
example_prompts = [
"Hello, my name is",
]

View File

@@ -31,7 +31,7 @@ MODELS = ["Qwen/Qwen3-30B-A3B"]
@pytest.mark.parametrize("model", MODELS)
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
def test_qwen_offline_weight_load_and_sleepmode(model):
def test_qwen3_offline_load_and_sleepmode_tp2(model):
script = Path(
__file__
).parent.parent.parent.parent / "examples" / "offline_external_launcher.py"

View File

@@ -44,4 +44,4 @@ def test_models_pp2(model: str, tp_size: int, pp_size: int,
pipeline_parallel_size=pp_size,
distributed_executor_backend=distributed_executor_backend,
gpu_memory_utilization=0.7) as vllm_model:
vllm_model.generate_greedy(prompts, 64)
vllm_model.generate_greedy(prompts, 64)