[Test] Add new e2e test use deepseek-v2-lite in ge graph mode (#3937)
### What this PR does / why we need it?
The current test cases lack end-to-end (e2e) testing for the
deepseek-v2-lite network in ge graph mode.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
- vLLM version: v0.11.0
- vLLM main:
83f478bb19
---------
Signed-off-by: CodeNine-CJ <chenjian343@huawei.com>
This commit is contained in:
8
.github/workflows/_e2e_test.yaml
vendored
8
.github/workflows/_e2e_test.yaml
vendored
@@ -74,8 +74,8 @@ jobs:
|
|||||||
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
||||||
if: ${{ inputs.type == 'light' }}
|
if: ${{ inputs.type == 'light' }}
|
||||||
run: |
|
run: |
|
||||||
pytest -sv tests/e2e/singlecard/test_aclgraph.py
|
# pytest -sv tests/e2e/singlecard/test_aclgraph.py
|
||||||
pytest -sv tests/e2e/singlecard/test_quantization.py
|
# pytest -sv tests/e2e/singlecard/test_quantization.py
|
||||||
pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
|
pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
|
||||||
|
|
||||||
- name: Run e2e test
|
- name: Run e2e test
|
||||||
@@ -171,6 +171,7 @@ jobs:
|
|||||||
if: ${{ inputs.type == 'light' }}
|
if: ${{ inputs.type == 'light' }}
|
||||||
run: |
|
run: |
|
||||||
pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
|
pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
|
||||||
|
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py::test_e2e_deepseekv2lite_with_torchair
|
||||||
|
|
||||||
- name: Run vllm-project/vllm-ascend test (full)
|
- name: Run vllm-project/vllm-ascend test (full)
|
||||||
env:
|
env:
|
||||||
@@ -178,6 +179,7 @@ jobs:
|
|||||||
VLLM_USE_MODELSCOPE: True
|
VLLM_USE_MODELSCOPE: True
|
||||||
if: ${{ inputs.type == 'full' }}
|
if: ${{ inputs.type == 'full' }}
|
||||||
run: |
|
run: |
|
||||||
|
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
|
||||||
pytest -sv tests/e2e/multicard/test_data_parallel.py
|
pytest -sv tests/e2e/multicard/test_data_parallel.py
|
||||||
pytest -sv tests/e2e/multicard/test_expert_parallel.py
|
pytest -sv tests/e2e/multicard/test_expert_parallel.py
|
||||||
# pytest -sv tests/e2e/multicard/test_external_launcher.py
|
# pytest -sv tests/e2e/multicard/test_external_launcher.py
|
||||||
@@ -199,4 +201,4 @@ jobs:
|
|||||||
pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
|
pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
|
||||||
pytest -sv tests/e2e/multicard/test_prefix_caching.py
|
pytest -sv tests/e2e/multicard/test_prefix_caching.py
|
||||||
pytest -sv tests/e2e/multicard/test_qwen3_moe.py
|
pytest -sv tests/e2e/multicard/test_qwen3_moe.py
|
||||||
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
|
|
||||||
|
|||||||
@@ -225,3 +225,66 @@ def test_e2e_qwen2_with_torchair():
|
|||||||
|
|
||||||
def test_e2e_qwen3_moe_with_torchair():
|
def test_e2e_qwen3_moe_with_torchair():
|
||||||
_qwen_torchair_test_fixture("Qwen/Qwen3-30B-A3B", 2, True)
|
_qwen_torchair_test_fixture("Qwen/Qwen3-30B-A3B", 2, True)
|
||||||
|
|
||||||
|
|
||||||
|
# test deepseek-v2-lite
|
||||||
|
def _deepseek_v2_lite_torchair_test_fixure(
|
||||||
|
additional_config: Dict,
|
||||||
|
*,
|
||||||
|
tensor_parallel_size=2,
|
||||||
|
use_v1_schduler=False,
|
||||||
|
):
|
||||||
|
example_prompts = [
|
||||||
|
"Hello, my name is",
|
||||||
|
"The president of the United States is",
|
||||||
|
"The capital of France is",
|
||||||
|
"The future of AI is",
|
||||||
|
]
|
||||||
|
|
||||||
|
kwargs = {}
|
||||||
|
if not use_v1_schduler:
|
||||||
|
kwargs = {
|
||||||
|
"ascend_scheduler_config": {
|
||||||
|
"enable": True,
|
||||||
|
},
|
||||||
|
"refresh": True,
|
||||||
|
}
|
||||||
|
additional_config.update(**kwargs)
|
||||||
|
|
||||||
|
with VllmRunner(
|
||||||
|
"deepseek-ai/DeepSeek-V2-Lite",
|
||||||
|
dtype="half",
|
||||||
|
tensor_parallel_size=tensor_parallel_size,
|
||||||
|
distributed_executor_backend="mp",
|
||||||
|
additional_config=additional_config,
|
||||||
|
) as vllm_model:
|
||||||
|
vllm_output = vllm_model.generate_greedy(example_prompts, 5)
|
||||||
|
|
||||||
|
# NOTE: deepseek-ai/DeepSeek-V2-Lite is a random weight of
|
||||||
|
# DeepSeek-V2-Lite with 2 hidden layers, thus the golden results seems
|
||||||
|
# inaccurate. This will only change if accuracy improves with the
|
||||||
|
# official weights of DeepSeek-V2-Lite.
|
||||||
|
|
||||||
|
for i in range(len(vllm_output)):
|
||||||
|
generated_text = vllm_output[i][1]
|
||||||
|
assert len(
|
||||||
|
generated_text.strip()) > 0, f"The {i}-th output is null, failed"
|
||||||
|
|
||||||
|
|
||||||
|
def test_e2e_deepseekv2lite_with_torchair():
|
||||||
|
additional_config = {
|
||||||
|
"torchair_graph_config": {
|
||||||
|
"enabled": True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
_deepseek_v2_lite_torchair_test_fixure(additional_config)
|
||||||
|
|
||||||
|
|
||||||
|
def test_e2e_deepseekv2lite_with_torchair_v1scheduler():
|
||||||
|
additional_config = {
|
||||||
|
"torchair_graph_config": {
|
||||||
|
"enabled": True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
_deepseek_v2_lite_torchair_test_fixure(additional_config,
|
||||||
|
use_v1_schduler=True)
|
||||||
|
|||||||
Reference in New Issue
Block a user