From bd4fb871c6155658e8b60308a0221dcd35456226 Mon Sep 17 00:00:00 2001 From: zhangyiming <34808445+menogrey@users.noreply.github.com> Date: Wed, 24 Dec 2025 10:41:32 +0800 Subject: [PATCH] [CI] Add skipped testcases. (#5254) ### What this PR does / why we need it? Some E2E testcases are not in our CI workflow, this PR add them back. - vLLM version: release/v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 Signed-off-by: menogrey <1299267905@qq.com> --- .github/workflows/_e2e_test.yaml | 2 ++ .../singlecard/test_multistream_overlap_shared_expert.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml index 5ee2fd25..97ccc4a6 100644 --- a/.github/workflows/_e2e_test.yaml +++ b/.github/workflows/_e2e_test.yaml @@ -117,6 +117,7 @@ jobs: pytest -sv --durations=0 tests/e2e/singlecard/pooling/ pytest -sv --durations=0 tests/e2e/singlecard/compile/test_norm_quant_fusion.py pytest -sv --durations=0 tests/e2e/singlecard/test_cross_layer_attn_model.py + pytest -sv --durations=0 tests/e2e/singlecard/test_multistream_overlap_shared_expert.py # ------------------------------------ v1 spec decode test ------------------------------------ # pytest -sv --durations=0 tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py @@ -219,6 +220,7 @@ jobs: pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_qwen3_dense_fc1_tp2 pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_qwen3_dense_prefetch_mlp_weight_tp2 pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_deepseek_w4a8_accuracy_tp2 + pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_deepseek_v2_lite_fc1_tp2 pytest -sv --durations=0 tests/e2e/multicard/test_prefix_caching.py pytest -sv --durations=0 tests/e2e/multicard/test_pipeline_parallel.py diff --git a/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py b/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py index eb00cc67..caf09bd9 100644 --- a/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py +++ b/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py @@ -48,9 +48,11 @@ def test_models_with_multistream_overlap_shared_expert( model, max_model_len=1024, enforce_eager=True, + cudagraph_capture_sizes=[4, 8, 16, 32], additional_config={ "multistream_overlap_shared_expert": True, }, + quantization="ascend", ) as runner: vllm_moe_ms_eager_outputs = runner.model.generate( prompts, sampling_params) @@ -58,9 +60,11 @@ def test_models_with_multistream_overlap_shared_expert( with VllmRunner( model, max_model_len=1024, + cudagraph_capture_sizes=[4, 8, 16, 32], additional_config={ "multistream_overlap_shared_expert": True, }, + quantization="ascend", ) as runner: vllm_moe_ms_aclgraph_outputs = runner.model.generate( prompts, sampling_params) @@ -69,6 +73,8 @@ def test_models_with_multistream_overlap_shared_expert( model, max_model_len=1024, enforce_eager=True, + cudagraph_capture_sizes=[4, 8, 16, 32], + quantization="ascend", ) as runner: vllm_eager_outputs = runner.model.generate(prompts, sampling_params)