diff --git a/.github/workflows/nightly_test_a2.yaml b/.github/workflows/nightly_test_a2.yaml
index bef88138..03aa1ed6 100644
--- a/.github/workflows/nightly_test_a2.yaml
+++ b/.github/workflows/nightly_test_a2.yaml
@@ -49,7 +49,7 @@ jobs:
       fail-fast: false
       matrix:
         test_config:
-          - name: qwen3next
+          - name: qwen3-next
             os: linux-aarch64-a2-4
             tests: tests/e2e/nightly/single_node/models/test_qwen3_next.py
           - name: qwen3-32b
diff --git a/tests/e2e/nightly/single_node/models/test_qwen3_next.py b/tests/e2e/nightly/single_node/models/test_qwen3_next.py
index 5fd9d183..7d178727 100644
--- a/tests/e2e/nightly/single_node/models/test_qwen3_next.py
+++ b/tests/e2e/nightly/single_node/models/test_qwen3_next.py
@@ -16,7 +16,7 @@ MODELS = [
 MODES = ["aclgraph"]
 
 TENSOR_PARALLELS = [4]
-MAX_NUM_BATCHED_TOKENS = [1024, 4096, 8192, 32768]
+MAX_NUM_BATCHED_TOKENS = [8192, 32768]
 
 prompts = [
     "San Francisco is a",
@@ -70,7 +70,6 @@ async def test_models(model: str, mode: str, tp_size: int,
         "HCCL_BUFFSIZE": "1024",
         "PYTORCH_NPU_ALLOC_CONF": "expandable_segments:True",
     }
-    compilation_config = {"cudagraph_mode": "FULL_DECODE_ONLY"}
     server_args = [
         "--tensor-parallel-size",
         str(tp_size),
@@ -81,15 +80,14 @@ async def test_models(model: str, mode: str, tp_size: int,
         "--max-num-batched-tokens",
         str(max_num_batched_tokens),
         "--trust-remote-code",
+        "--async-scheduling",
+        "--no-enable-prefix-caching",
+        "--enable-expert-parallel",
         "--gpu-memory-utilization",
         "0.8",
         "--max-num-seqs",
         "64",
     ]
-    if mode == "aclgraph":
-        server_args.extend(
-            ["--compilation-config",
-             json.dumps(compilation_config)])
     request_keyword_args: dict[str, Any] = {
         **api_keyword_args,
     }