[CI]refactor: standardize test case naming convention (#5243)
### What this PR does / why we need it?
- Standardize test case naming in `vllm-ascend/tests/e2e/multicard/` to
follow the `<model>_<feature>_<distributed>` convention.
- vLLM version: release/v0.13.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: MrZ20 <2609716663@qq.com>
Signed-off-by: root <root@LAPTOP-VQKDDVMG.localdomain>
Co-authored-by: root <root@LAPTOP-VQKDDVMG.localdomain>
This commit is contained in:
@@ -34,7 +34,7 @@ os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"
|
||||
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
|
||||
|
||||
QWEN_DENSE_MODELS = [
|
||||
"vllm-ascend/Qwen3-8B-W8A8",
|
||||
"vllm-ascend/Qwen3-0.6B-W8A8",
|
||||
]
|
||||
|
||||
QWEN_W4A8_MODELS = [
|
||||
@@ -50,7 +50,7 @@ KIMI_W4A16_MODELS = [
|
||||
]
|
||||
|
||||
|
||||
def test_models_distributed_DeepSeek_multistream_moe():
|
||||
def test_deepseek_multistream_moe_tp2():
|
||||
example_prompts = [
|
||||
"Hello, my name is",
|
||||
]
|
||||
@@ -70,7 +70,7 @@ def test_models_distributed_DeepSeek_multistream_moe():
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", QWEN_W4A8_MODELS)
|
||||
def test_models_distributed_Qwen3_W4A8DYNAMIC(model):
|
||||
def test_qwen3_w4a8_dynamic_tp2(model):
|
||||
prompts = [
|
||||
"Hello, my name is",
|
||||
]
|
||||
@@ -85,7 +85,7 @@ def test_models_distributed_Qwen3_W4A8DYNAMIC(model):
|
||||
vllm_model.generate_greedy(prompts, max_tokens)
|
||||
|
||||
|
||||
def test_sp_for_qwen3_moe() -> None:
|
||||
def test_qwen3_moe_sp_tp2() -> None:
|
||||
example_prompts = [
|
||||
"Hello, my name is",
|
||||
]
|
||||
@@ -108,7 +108,7 @@ def test_sp_for_qwen3_moe() -> None:
|
||||
|
||||
@pytest.mark.parametrize("model", DEEPSEEK_W4A8_MODELS)
|
||||
@patch.dict(os.environ, {"HCCL_BUFFSIZE": "2048"})
|
||||
def test_deepseek_w4a8_accuracy(model):
|
||||
def test_deepseek_w4a8_accuracy_tp2(model):
|
||||
prompts = [
|
||||
"Hello, my name is", "The president of the United States is",
|
||||
"vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs"
|
||||
@@ -140,7 +140,7 @@ def test_deepseek_w4a8_accuracy(model):
|
||||
|
||||
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_FLASHCOMM1": "1"})
|
||||
@patch.dict(os.environ, {"VLLM_ASCEND_FLASHCOMM2_PARALLEL_SIZE": "1"})
|
||||
def test_fc2_for_qwen3_moe() -> None:
|
||||
def test_qwen3_moe_fc2_tp2() -> None:
|
||||
example_prompts = [
|
||||
"Hello, my name is",
|
||||
]
|
||||
@@ -159,7 +159,7 @@ def test_fc2_for_qwen3_moe() -> None:
|
||||
|
||||
|
||||
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_FLASHCOMM1": "1"})
|
||||
def test_models_distributed_deepseek_v2_lite_with_flashcomm_v1() -> None:
|
||||
def test_deepseek_v2_lite_fc1_tp2() -> None:
|
||||
example_prompts = [
|
||||
"test" * 1001,
|
||||
]
|
||||
@@ -180,7 +180,7 @@ def test_models_distributed_deepseek_v2_lite_with_flashcomm_v1() -> None:
|
||||
@pytest.mark.parametrize("model", QWEN_DENSE_MODELS)
|
||||
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE": "1"})
|
||||
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_FLASHCOMM1": "1"})
|
||||
def test_models_distributed_Qwen_Dense_with_flashcomm_v1(model):
|
||||
def test_qwen3_dense_fc1_tp2(model):
|
||||
example_prompts = [
|
||||
"Hello, my name is",
|
||||
]
|
||||
@@ -200,7 +200,7 @@ def test_models_distributed_Qwen_Dense_with_flashcomm_v1(model):
|
||||
@pytest.mark.parametrize("model", QWEN_DENSE_MODELS)
|
||||
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE": "1"})
|
||||
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_PREFETCH_MLP": "1"})
|
||||
def test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight(model):
|
||||
def test_qwen3_dense_prefetch_mlp_weight_tp2(model):
|
||||
example_prompts = [
|
||||
"Hello, my name is",
|
||||
]
|
||||
@@ -218,7 +218,7 @@ def test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight(model):
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", KIMI_W4A16_MODELS)
|
||||
def test_models_distributed_Kimi_K2_Thinking_W4A16(model):
|
||||
def test_kimi_k2_thinking_w4a16_tp4(model):
|
||||
example_prompts = [
|
||||
"Hello, my name is",
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user