diff --git a/.github/workflows/misc/model_list.json b/.github/workflows/misc/model_list.json
index a45bebcc..c82d9671 100644
--- a/.github/workflows/misc/model_list.json
+++ b/.github/workflows/misc/model_list.json
@@ -1,6 +1,7 @@
 {
     "models": [
       "AngelSlim/Qwen3-32B_eagle3",
+      "AngelSlim/Qwen3-a3B_eagle3",
       "Anionex/Qwen3-1.7B-W4A8-V1",
       "ArthurZ/ilama-3.2-1B",
       "BAAI/bge-base-en-v1.5",
@@ -207,10 +208,12 @@
       "vllm-ascend/Qwen3-30B-A3B-Puring",
       "vllm-ascend/Qwen3-30B-A3B-W8A8",
       "vllm-ascend/Qwen3-30B-A3B-W8A8-Pruning",
+      "vllm-ascend/Qwen3-30B-A3B-W8A8-QuaRot",
       "vllm-ascend/Qwen3-30B-A3B-Instruct-2507-quantized.w8a8",
       "vllm-ascend/Qwen3-30B-A3B-Instruct-2507-quantized.w4a8",
       "vllm-ascend/Qwen3-32B-W4A4",
       "vllm-ascend/Qwen3-32B-W8A8",
+      "vllm-ascend/Qwen3-32B-W8A8-QuaRot",
       "vllm-ascend/Qwen3-8B",
       "vllm-ascend/Qwen3-8B-W4A8",
       "vllm-ascend/Qwen3-8B-W8A8",
diff --git a/.github/workflows/scripts/config.yaml b/.github/workflows/scripts/config.yaml
index 6b7bd2a8..e009841d 100644
--- a/.github/workflows/scripts/config.yaml
+++ b/.github/workflows/scripts/config.yaml
@@ -54,7 +54,7 @@ e2e-singlecard:
   - name: tests/e2e/singlecard/spec_decode/test_mtp_eagle_correctness.py
     estimated_time: 1500
   - name: tests/e2e/singlecard/spec_decode/test_v1_spec_decode.py
-    estimated_time: 1800
+    estimated_time: 600
   - name: tests/e2e/singlecard/model_runner_v2/test_basic.py
     estimated_time: 80
     is_skipped: true
@@ -101,6 +101,8 @@ e2e-multicard-2-cards:
     estimated_time: 60
   - name: tests/e2e/multicard/2-cards/test_llama32_lora_tp2.py
     estimated_time: 223
+  - name: tests/e2e/multicard/2-cards/spec_decode/test_quarot_eagle.py
+    estimated_time: 600
   # Run the test in a separate step to avoid oom
   - name: tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_deepseek_multistream_moe_tp2
     estimated_time: 100
diff --git a/tests/e2e/multicard/2-cards/spec_decode/test_quarot_eagle.py b/tests/e2e/multicard/2-cards/spec_decode/test_quarot_eagle.py
new file mode 100644
index 00000000..cf4bbd38
--- /dev/null
+++ b/tests/e2e/multicard/2-cards/spec_decode/test_quarot_eagle.py
@@ -0,0 +1,209 @@
+# Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved.
+# Copyright 2023 The vLLM team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+# Run `pytest tests/e2e/multicard/2-cards/spec_decode/test_quarot_eagle.py`.
+
+from __future__ import annotations
+
+import os
+from typing import Any
+
+import pytest
+from transformers import AutoTokenizer
+from vllm import SamplingParams
+from vllm.v1.metrics.reader import Counter, Vector
+
+from tests.e2e.conftest import VllmRunner
+
+os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
+
+K = 4  # Number of speculative tokens
+TOL = 0.06  # Absolute tolerance for acceptance comparison
+
+
+# Here, the two selected models correspond to two scenarios.
+# The 32B draft model comes with its own embedding,
+# while the 30B draft model shares the embedding of the target model.
+MODELS: dict[str, dict] = {
+    "32B": {
+        "target": {
+            "float": "Qwen/Qwen3-32B",
+            "w8a8": "vllm-ascend/Qwen3-32B-W8A8-QuaRot",
+        },
+        "draft": "RedHatAI/Qwen3-32B-speculator.eagle3",
+    },
+    "30B": {
+        "target": {
+            "float": "Qwen/Qwen3-30B-A3B",
+            "w8a8": "vllm-ascend/Qwen3-30B-A3B-W8A8-QuaRot",
+        },
+        "draft": "AngelSlim/Qwen3-a3B_eagle3",
+    },
+}
+
+
+def _build_prompts(target_model: str) -> list[str]:
+    # These prompts were formed by taking one from each category of mt-bench.
+    # Although there are still some differences from the processing method of
+    # vllm serve bench, it does not affect this test.
+    # it is possible to directly take from mt-bench or further
+    # call vllm bench serve for direct testing later.
+    prompts = [
+        {
+            "role": "user",
+            "content": "Compose an engaging travel blog post about a recent trip to Hawaii, "
+            "highlighting cultural experiences and must-see attractions.",
+        },
+        {
+            "role": "user",
+            "content": "Pretend yourself to be Elon Musk in all the following conversations. "
+            "Speak like Elon Musk as much as possible. Why do we need to go to Mars?",
+        },
+        {
+            "role": "user",
+            "content": "Imagine you are participating in a race with a group of people. "
+            "If you have just overtaken the second person, what's your current position? "
+            "Where is the person you just overtook?",
+        },
+        {
+            "role": "user",
+            "content": "The vertices of a triangle are at points (0, 0), (-1, 1), and (3, 3). "
+            "What is the area of the triangle?",
+        },
+        {
+            "role": "user",
+            "content": "Develop a Python program that reads all the text files under a directory "
+            "and returns top-5 words with the most number of occurrences.",
+        },
+        {
+            "role": "user",
+            "content": "Evaluate the following movie reviews on a scale of 1 to 5, with 1 being very negative, "
+            "3 being neutral, and 5 being very positive:\n1. This movie released on Nov. 18, 2019, was phenomenal. "
+            "The cinematography, the acting, the plot - everything was top-notch.\n"
+            "2. Never before have I been so disappointed with a movie. The plot was predictable and the characters "
+            "were one-dimensional. In my opinion, this movie is the worst one to have been released in 2022.\n"
+            "3. The movie was okay. There were some parts I  enjoyed, but there were also parts that felt lackluster. "
+            "This is a movie that was released in Feb 2018 and seems to be quite ordinary.\n"
+            "Return the answer as a JSON array of integers.",
+        },
+        {
+            "role": "user",
+            "content": "In the field of quantum physics, what is superposition, "
+            "and how does it relate to the phenomenon of quantum entanglement?",
+        },
+        {
+            "role": "user",
+            "content": "Provide insights into the correlation between economic indicators such as GDP, "
+            "inflation, and unemployment rates. Explain how fiscal and monetary policies affect those indicators.",
+        },
+    ]
+
+    tokenizer = AutoTokenizer.from_pretrained(
+        target_model,
+        trust_remote_code=True,
+    )
+
+    prompts_with_template: list[str] = [
+        tokenizer.apply_chat_template(
+            [prompt],
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+        for prompt in prompts
+    ]
+    return prompts_with_template
+
+
+def _run_model(
+    llm_kwargs: dict,
+    prompts: list[str],
+    sampling_params: SamplingParams,
+) -> list[Any]:
+    with VllmRunner(**llm_kwargs) as llm:
+        _ = llm.generate(prompts, sampling_params)
+        metrics = llm.model.get_metrics()
+
+    return metrics
+
+
+def _compute_acceptance(metrics: list[Any]) -> list[float | int]:
+    num_drafts = 0
+    num_accepted_tokens_per_pos = [0] * K
+
+    for metric in metrics:
+        if metric.name == "vllm:spec_decode_num_drafts":
+            assert isinstance(metric, Counter)
+            num_drafts += metric.value
+
+        elif metric.name == "vllm:spec_decode_num_accepted_tokens_per_pos":
+            assert isinstance(metric, Vector)
+            for i, v in enumerate(metric.values):
+                num_accepted_tokens_per_pos[i] += v
+
+    acceptance_per_pos = [
+        num_accepted_tokens / num_drafts if num_drafts > 0 else 0.0
+        for num_accepted_tokens in num_accepted_tokens_per_pos
+    ]
+
+    return acceptance_per_pos
+
+
+@pytest.mark.parametrize("model", ["32B", "30B"])
+def test_quarot_eagle_acceptance_tp2(model: str):
+    target_model = MODELS[model]["target"]["float"]
+    draft_model = MODELS[model]["draft"]
+
+    prompts = _build_prompts(target_model)
+
+    sampling_params = SamplingParams(
+        temperature=0,
+        ignore_eos=False,
+        max_tokens=512,
+    )
+
+    llm_kwargs = dict(
+        model_name=target_model,
+        enforce_eager=True,
+        max_model_len=4096,
+        disable_log_stats=False,
+        tensor_parallel_size=2,
+        distributed_executor_backend="mp",
+        gpu_memory_utilization=0.9,
+        speculative_config={
+            "enforce_eager": True,
+            "method": "eagle3",
+            "model": draft_model,
+            "num_speculative_tokens": K,
+        },
+    )
+
+    # Run the float model and the quarot model,
+    # and then compare their acceptance rates at each position.
+    ref_metrics = _run_model(llm_kwargs, prompts, sampling_params)
+    ref_acceptance = _compute_acceptance(ref_metrics)
+
+    llm_kwargs["model_name"] = MODELS[model]["target"]["w8a8"]
+    llm_kwargs["quantization"] = "ascend"
+
+    quarot_metrics = _run_model(llm_kwargs, prompts, sampling_params)
+    quarot_acceptance = _compute_acceptance(quarot_metrics)
+
+    match = all(abs(i - j) <= TOL for i, j in zip(ref_acceptance, quarot_acceptance))
+
+    assert match, (
+        f"\nref_acceptance_per_pos: {[round(_, 4) for _ in ref_acceptance]}"
+        f"\nquarot_acceptance_per_pos: {[round(_, 4) for _ in quarot_acceptance]}"
+    )