[CI] drop ascend scheduler test (#4582)

let' drop ascend scheduler test first to ensure all function works without it. - vLLM version: v0.11.2 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.2 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-12-01 20:33:50 +08:00
parent 203b4e6777
commit 27b09ca9b9
28 changed files with 53 additions and 376 deletions
--- a/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
+++ b/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
@@ -48,27 +48,26 @@ def mtp_correctness(sampling_config: SamplingParams,
    if graph_mode == CUDAGraphMode.FULL:
        graph_mode_str = "FULL_DECODE_ONLY"

-    with VllmRunner(
-            model_name,
-            tensor_parallel_size=1,
-            max_num_seqs=256,
-            gpu_memory_utilization=0.7,
-            distributed_executor_backend="mp",
-            enable_expert_parallel=True,
-            speculative_config={
-                "method": "deepseek_mtp",
-                "num_speculative_tokens": num_speculative_tokens,
-                "disable_padded_drafter_batch": disable_padded_drafter_batch,
-            },
-            enforce_eager=enforce_eager,
-            max_model_len=2000,
-            compilation_config=CompilationConfig(
-                cudagraph_mode=graph_mode_str,
-                cudagraph_capture_sizes=[12],
-            ),
-            additional_config={"ascend_scheduler_config": {
-                "enabled": False
-            }}) as spec_llm:
+    with VllmRunner(model_name,
+                    tensor_parallel_size=1,
+                    max_num_seqs=256,
+                    gpu_memory_utilization=0.7,
+                    distributed_executor_backend="mp",
+                    enable_expert_parallel=True,
+                    speculative_config={
+                        "method":
+                        "deepseek_mtp",
+                        "num_speculative_tokens":
+                        num_speculative_tokens,
+                        "disable_padded_drafter_batch":
+                        disable_padded_drafter_batch,
+                    },
+                    enforce_eager=enforce_eager,
+                    max_model_len=2000,
+                    compilation_config=CompilationConfig(
+                        cudagraph_mode=graph_mode_str,
+                        cudagraph_capture_sizes=[12],
+                    )) as spec_llm:
        spec_outputs = spec_llm.generate(example_prompts, sampling_config)

    matches = 0