[CI] recover e2e test (#2688)

1. recover the skipped test. 2. remove pangu eager mode test, it's tested by torchair mode already. 3. skip pangu test util the bug is fixed. - vLLM version: v0.10.1.1 - vLLM main: 56d04089ef Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-09-02 18:49:17 +08:00
parent f023bd52bf
commit 0829b4873f
6 changed files with 8 additions and 45 deletions
--- a/tests/e2e/multicard/test_offline_inference_distributed.py
+++ b/tests/e2e/multicard/test_offline_inference_distributed.py
@@ -72,22 +72,6 @@ def test_models_distributed_DeepSeek_multistream_moe():
        vllm_model.generate_greedy(example_prompts, max_tokens)


-def test_models_distributed_pangu():
-    example_prompts = [
-        "Hello, my name is",
-    ]
-    max_tokens = 5
-
-    with VllmRunner(snapshot_download("vllm-ascend/pangu-pro-moe-pruing"),
-                    max_model_len=8192,
-                    enforce_eager=True,
-                    dtype="auto",
-                    tensor_parallel_size=2,
-                    distributed_executor_backend="mp",
-                    enable_expert_parallel=True) as vllm_model:
-        vllm_model.generate_greedy(example_prompts, max_tokens)
-
-
 def test_models_distributed_Qwen3_W8A8():
    example_prompts = [
        "Hello, my name is",
--- a/tests/e2e/multicard/test_prefix_caching.py
+++ b/tests/e2e/multicard/test_prefix_caching.py
@@ -6,7 +6,6 @@ import pytest

 from tests.e2e.conftest import VllmRunner
 from tests.e2e.model_utils import check_outputs_equal
-from vllm_ascend.ascend_config import clear_ascend_config

 MODELS = [
    # for MHA
@@ -103,8 +102,6 @@ def test_prefix_cache_with_ascend_scheduler(model: str,
                    gpu_memory_utilization=0.7) as vllm_model:
        vllm_output = vllm_model.generate_greedy(INPUT_PROMPTS, max_tokens)

-    clear_ascend_config()
-
    with VllmRunner(model,
                    additional_config={
                        'ascend_scheduler_config': {
@@ -119,8 +116,6 @@ def test_prefix_cache_with_ascend_scheduler(model: str,
        prefix_cache_output = vllm_model.generate_greedy(
            INPUT_PROMPTS, max_tokens)

-    clear_ascend_config()
-
    with VllmRunner(model,
                    additional_config={
                        'ascend_scheduler_config': {
@@ -136,8 +131,6 @@ def test_prefix_cache_with_ascend_scheduler(model: str,
        chunk_prefill_prefix_cache_output = vllm_model.generate_greedy(
            INPUT_PROMPTS, max_tokens)

-    clear_ascend_config()
-
    check_outputs_equal(
        outputs_0_lst=vllm_output,
        outputs_1_lst=prefix_cache_output,
--- a/tests/e2e/multicard/test_torchair_graph_mode.py
+++ b/tests/e2e/multicard/test_torchair_graph_mode.py
@@ -22,8 +22,9 @@ Run `pytest tests/multicard/test_torchair_graph_mode.py`.
 import os
 from typing import Dict

+import pytest
+
 from tests.e2e.conftest import VllmRunner
-from vllm_ascend.ascend_config import clear_ascend_config

 os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"

@@ -85,8 +86,6 @@ def test_e2e_deepseekv3_with_torchair():
    }
    _deepseek_torchair_test_fixture(additional_config)

-    clear_ascend_config()
-

 def test_e2e_deepseekv3_with_torchair_ms_mla():
    additional_config = {
@@ -97,8 +96,6 @@ def test_e2e_deepseekv3_with_torchair_ms_mla():
    }
    _deepseek_torchair_test_fixture(additional_config)

-    clear_ascend_config()
-

 def test_e2e_deepseekv3_with_torchair_v1scheduler():
    additional_config = {
@@ -108,8 +105,6 @@ def test_e2e_deepseekv3_with_torchair_v1scheduler():
    }
    _deepseek_torchair_test_fixture(additional_config, use_v1_schduler=True)

-    clear_ascend_config()
-

 def _pangu_torchair_test_fixture(
    additional_config: Dict,
@@ -160,6 +155,7 @@ def _pangu_torchair_test_fixture(
        print(f"Generated text: {vllm_output[i][1]!r}")


+@pytest.mark.skip("pangu doesn't work, fix me")
 def test_e2e_pangu_with_torchair():
    additional_config = {
        "torchair_graph_config": {
@@ -168,8 +164,6 @@ def test_e2e_pangu_with_torchair():
    }
    _pangu_torchair_test_fixture(additional_config)

-    clear_ascend_config()
-

 def _qwen_torchair_test_fixture(
    model,
@@ -228,9 +222,6 @@ def _qwen_torchair_test_fixture(
 def test_e2e_qwen2_with_torchair():
    _qwen_torchair_test_fixture("Qwen/Qwen2.5-0.5B-Instruct", 2, False)

-    clear_ascend_config()
-

 def test_e2e_qwen3_moe_with_torchair():
    _qwen_torchair_test_fixture("Qwen/Qwen3-30B-A3B", 2, True)
-    clear_ascend_config()