diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml
index f4c6a65e..c4442eeb 100644
--- a/.github/workflows/_e2e_test.yaml
+++ b/.github/workflows/_e2e_test.yaml
@@ -192,11 +192,11 @@ jobs:
           # To avoid oom, we need to run the test in a single process.
           pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
           pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
-          pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
           pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
           pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_fc2_for_qwen3_moe
           pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1
           pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight
+          pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_deepseek_w4a8_accuracy
 
           pytest -sv --durations=0 tests/e2e/multicard/test_prefix_caching.py
           pytest -sv --durations=0 tests/e2e/multicard/test_pipeline_parallel.py
@@ -265,7 +265,6 @@ jobs:
           VLLM_USE_MODELSCOPE: True
         run: |
           pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
-          pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
           pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Kimi_K2_Thinking_W4A16
           pytest -sv --durations=0 tests/e2e/multicard/test_data_parallel_tp2.py
 
diff --git a/tests/e2e/multicard/test_offline_inference_distributed.py b/tests/e2e/multicard/test_offline_inference_distributed.py
index 5cb90bc9..a1e24ecf 100644
--- a/tests/e2e/multicard/test_offline_inference_distributed.py
+++ b/tests/e2e/multicard/test_offline_inference_distributed.py
@@ -28,6 +28,7 @@ from modelscope import snapshot_download  # type: ignore
 from vllm import SamplingParams
 
 from tests.e2e.conftest import VllmRunner
+from tests.e2e.model_utils import check_outputs_equal
 
 os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"
 os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
@@ -84,22 +85,6 @@ def test_models_distributed_Qwen3_W4A8DYNAMIC(model):
         vllm_model.generate_greedy(prompts, max_tokens)
 
 
-@pytest.mark.parametrize("model", DEEPSEEK_W4A8_MODELS)
-@patch.dict(os.environ, {"HCCL_BUFFSIZE": "1024"})
-def test_models_distributed_DeepSeek_W4A8DYNAMIC(model):
-    prompts = [
-        "Hello, my name is",
-    ]
-    max_tokens = 5
-    with VllmRunner(snapshot_download(model),
-                    dtype="auto",
-                    tensor_parallel_size=2,
-                    quantization="ascend",
-                    enforce_eager=True,
-                    enable_expert_parallel=True) as vllm_model:
-        vllm_model.generate_greedy(prompts, max_tokens)
-
-
 def test_sp_for_qwen3_moe() -> None:
     example_prompts = [
         "Hello, my name is",
@@ -121,6 +106,38 @@ def test_sp_for_qwen3_moe() -> None:
         vllm_model.generate(example_prompts, sampling_params)
 
 
+@pytest.mark.parametrize("model", DEEPSEEK_W4A8_MODELS)
+@patch.dict(os.environ, {"HCCL_BUFFSIZE": "2048"})
+def test_deepseek_w4a8_accuracy(model):
+    prompts = [
+        "Hello, my name is", "The president of the United States is",
+        "vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs"
+    ]
+    vllm_ds_w4a8_answers = [
+        '逍遙而至地去 accrued', '平行于我udo madreHelen', 'ysteepaolis backwards Kj'
+    ]
+    sampling_params = SamplingParams(max_tokens=5, temperature=0.0)
+    with VllmRunner(snapshot_download(model),
+                    dtype="auto",
+                    tensor_parallel_size=2,
+                    quantization="ascend",
+                    enable_expert_parallel=True) as vllm_model:
+        vllm_quant_outputs = vllm_model.model.generate(prompts,
+                                                       sampling_params)
+
+    vllm_quant_outputs_list = []
+    for output in vllm_quant_outputs:
+        vllm_quant_outputs_list.append(
+            ([output.outputs[0].index], output.outputs[0].text))
+    vllm_answer_list = []
+    vllm_answer_list = ([([0], answer) for answer in vllm_ds_w4a8_answers])
+
+    check_outputs_equal(outputs_0_lst=vllm_answer_list,
+                        outputs_1_lst=vllm_quant_outputs_list,
+                        name_0="vllm_quant_outputs",
+                        name_1="vllm_answer_outputs")
+
+
 @patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_FLASHCOMM1": "1"})
 @patch.dict(os.environ, {"VLLM_ASCEND_FLASHCOMM2_PARALLEL_SIZE": "1"})
 def test_fc2_for_qwen3_moe() -> None: