diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml
index 9af889eb..cda75bae 100644
--- a/.github/workflows/_e2e_test.yaml
+++ b/.github/workflows/_e2e_test.yaml
@@ -189,11 +189,8 @@ jobs:
           #pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
 
           # To avoid oom, we need to run the test in a single process.
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC_new_version
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC_old_version
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_fc2_for_qwen3_moe
@@ -272,7 +269,6 @@ jobs:
           # pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
           # pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_W8A8_WITH_EP
           pytest -sv tests/e2e/multicard/test_data_parallel_tp2.py
-
       - name: Install Ascend toolkit & triton_ascend (for Qwen3-Next-80B-A3B-Instruct)
         shell: bash -l {0}
         run: |
diff --git a/tests/e2e/multicard/test_aclgraph_capture_replay.py b/tests/e2e/multicard/test_aclgraph_capture_replay.py
index 4375e825..e81b5615 100644
--- a/tests/e2e/multicard/test_aclgraph_capture_replay.py
+++ b/tests/e2e/multicard/test_aclgraph_capture_replay.py
@@ -134,7 +134,7 @@ def _run_worker_process(
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("max_tokens", [4, 36])
 @patch.dict(os.environ, {"ASCEND_RT_VISIBLE_DEVICES": "0,1"})
-def test_aclgraph_capture_replay_dp2(
+def test_aclgraph_capture_replay_metrics_dp2(
     model: str,
     max_tokens: int,
     monkeypatch: pytest.MonkeyPatch,
diff --git a/tests/e2e/multicard/test_data_parallel.py b/tests/e2e/multicard/test_data_parallel.py
index e6959b02..cb3c6048 100644
--- a/tests/e2e/multicard/test_data_parallel.py
+++ b/tests/e2e/multicard/test_data_parallel.py
@@ -38,7 +38,7 @@ MODELS = [
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("max_tokens", [32])
 @patch.dict(os.environ, {"ASCEND_RT_VISIBLE_DEVICES": "0,1"})
-def test_data_parallel_inference(model, max_tokens):
+def test_qwen_inference_dp2(model, max_tokens):
     moe_models = ["Qwen/Qwen3-30B-A3B", "vllm-ascend/Qwen3-30B-A3B-W8A8"]
     quantization_models = ["vllm-ascend/Qwen3-30B-A3B-W8A8"]
     script = "examples/offline_data_parallel.py"
diff --git a/tests/e2e/multicard/test_data_parallel_tp2.py b/tests/e2e/multicard/test_data_parallel_tp2.py
index 6b0bdabe..202eaa9c 100644
--- a/tests/e2e/multicard/test_data_parallel_tp2.py
+++ b/tests/e2e/multicard/test_data_parallel_tp2.py
@@ -15,7 +15,7 @@ MODELS = ["Qwen/Qwen3-0.6B"]
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("max_tokens", [32])
 @patch.dict(os.environ, {"ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3"})
-def test_data_parallel_inference(model, max_tokens):
+def test_qwen_inference_dp2_tp2(model, max_tokens):
     script = "examples/offline_data_parallel.py"
 
     env = os.environ.copy()
diff --git a/tests/e2e/multicard/test_expert_parallel.py b/tests/e2e/multicard/test_expert_parallel.py
index b8f03d5f..762ca6d2 100644
--- a/tests/e2e/multicard/test_expert_parallel.py
+++ b/tests/e2e/multicard/test_expert_parallel.py
@@ -5,7 +5,7 @@ from tests.e2e.model_utils import check_outputs_equal
 
 
 @pytest.mark.parametrize("model_name", ["deepseek-ai/DeepSeek-V2-Lite-Chat"])
-def test_e2e_ep_correctness(model_name):
+def test_deepseek_correctness_ep(model_name):
     example_prompts = [
         "Hello, my name is",
         "The president of the United States is",
diff --git a/tests/e2e/multicard/test_external_launcher.py b/tests/e2e/multicard/test_external_launcher.py
index ece35def..4a4a17ec 100644
--- a/tests/e2e/multicard/test_external_launcher.py
+++ b/tests/e2e/multicard/test_external_launcher.py
@@ -37,7 +37,7 @@ DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
 
 @pytest.mark.parametrize("model", MODELS)
 @patch.dict(os.environ, {"HCCL_BUFFSIZE": "500"})
-def test_external_launcher(model):
+def test_qwen_external_launcher(model):
     script = Path(
         __file__
     ).parent.parent.parent.parent / "examples" / "offline_external_launcher.py"
@@ -78,7 +78,7 @@ def test_external_launcher(model):
 
 
 @pytest.mark.parametrize("model", MOE_MODELS)
-def test_moe_external_launcher(model):
+def test_qwen_moe_external_launcher_ep(model):
     script = Path(
         __file__
     ).parent.parent.parent.parent / "examples" / "offline_external_launcher.py"
@@ -109,7 +109,7 @@ def test_moe_external_launcher(model):
 
 
 @patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
-def test_external_launcher_and_sleepmode():
+def test_qwen_external_launcher_with_sleepmode():
     script = Path(
         __file__
     ).parent.parent.parent.parent / "examples" / "offline_external_launcher.py"
@@ -154,7 +154,7 @@ def test_external_launcher_and_sleepmode():
 
 
 @patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
-def test_external_launcher_and_sleepmode_level2():
+def test_qwen_external_launcher_with_sleepmode_level2():
     script = Path(
         __file__
     ).parent.parent.parent.parent / "examples" / "offline_external_launcher.py"
@@ -210,7 +210,7 @@ def test_external_launcher_and_sleepmode_level2():
     "VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE": "1",
     "HCCL_BUFFSIZE": "500"
 })
-def test_mm_allreduce(model):
+def test_qwen_external_launcher_with_matmul_allreduce(model):
     script = Path(
         __file__
     ).parent.parent.parent.parent / "examples" / "offline_external_launcher.py"
diff --git a/tests/e2e/multicard/test_full_graph_mode.py b/tests/e2e/multicard/test_full_graph_mode.py
index 3ccbf823..c788e9da 100644
--- a/tests/e2e/multicard/test_full_graph_mode.py
+++ b/tests/e2e/multicard/test_full_graph_mode.py
@@ -29,7 +29,7 @@ from tests.e2e.conftest import VllmRunner
 from tests.e2e.model_utils import check_outputs_equal
 
 
-def test_models_distributed_Qwen3_MOE_TP2_WITH_FULL_DECODE_ONLY():
+def test_qwen_moe_with_full_decode_only():
     if 'HCCL_OP_EXPANSION_MODE' in os.environ:
         del os.environ['HCCL_OP_EXPANSION_MODE']
     prompts = [
@@ -75,7 +75,7 @@ def test_models_distributed_Qwen3_MOE_TP2_WITH_FULL_DECODE_ONLY():
     )
 
 
-def test_models_distributed_Qwen3_MOE_TP2_WITH_FULL():
+def test_qwen_moe_with_full():
     if 'HCCL_OP_EXPANSION_MODE' in os.environ:
         del os.environ['HCCL_OP_EXPANSION_MODE']
     prompts = [
diff --git a/tests/e2e/multicard/test_fused_moe_allgather_ep.py b/tests/e2e/multicard/test_fused_moe_allgather_ep.py
index 85d246e5..4fa111ce 100644
--- a/tests/e2e/multicard/test_fused_moe_allgather_ep.py
+++ b/tests/e2e/multicard/test_fused_moe_allgather_ep.py
@@ -41,7 +41,7 @@ from tests.e2e.conftest import VllmRunner
         "TASK_QUEUE_ENABLE": "1",
         "VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP": "1"
     })
-def test_generate_with_allgather():
+def test_deepseek_moe_fused_allgather_ep():
     example_prompts = ["Hello, my name is"]
     sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
 
@@ -62,7 +62,7 @@ def test_generate_with_allgather():
     "VLLM_WORKER_MULTIPROC_METHOD": "spawn",
     "TASK_QUEUE_ENABLE": "1"
 })
-def test_generate_with_alltoall():
+def test_deepseek_moe_fused_alltoall_ep():
     example_prompts = ["Hello, my name is"]
     sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
 
diff --git a/tests/e2e/multicard/test_offline_inference_distributed.py b/tests/e2e/multicard/test_offline_inference_distributed.py
index a13276bb..f4cf5a25 100644
--- a/tests/e2e/multicard/test_offline_inference_distributed.py
+++ b/tests/e2e/multicard/test_offline_inference_distributed.py
@@ -33,20 +33,15 @@ os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"
 os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
 
 QWEN_DENSE_MODELS = [
-    "vllm-ascend/Qwen3-8B-W8A8", "vllm-ascend/Qwen2.5-0.5B-Instruct-W8A8"
+    "vllm-ascend/Qwen3-8B-W8A8",
 ]
 
-QWEN_W4A8_OLD_VERSION_MODELS = [
-    "vllm-ascend/Qwen3-8B-W4A8",
-]
-
-QWEN_W4A8_NEW_VERSION_MODELS = [
+QWEN_W4A8_MODELS = [
     "vllm-ascend/Qwen3-1.7B-W4A8-V1",
 ]
 
 DEEPSEEK_W4A8_MODELS = [
-    "vllm-ascend/DeepSeek-V3-W4A8-Pruing",
-    "vllm-ascend/DeepSeek-V3.1-W4A8-puring"
+    "vllm-ascend/DeepSeek-V3.1-W4A8-puring",
 ]
 
 KIMI_W4A16_MODELS = [
@@ -54,22 +49,6 @@ KIMI_W4A16_MODELS = [
 ]
 
 
-def test_models_distributed_QwQ():
-    example_prompts = [
-        "Hello, my name is",
-    ]
-    dtype = "half"
-    max_tokens = 5
-    with VllmRunner(
-            "Qwen/QwQ-32B",
-            dtype=dtype,
-            tensor_parallel_size=2,
-            distributed_executor_backend="mp",
-            enforce_eager=False,
-    ) as vllm_model:
-        vllm_model.generate_greedy(example_prompts, max_tokens)
-
-
 def test_models_distributed_DeepSeek_multistream_moe():
     example_prompts = [
         "Hello, my name is",
@@ -89,40 +68,8 @@ def test_models_distributed_DeepSeek_multistream_moe():
         vllm_model.generate_greedy(example_prompts, max_tokens)
 
 
-def test_models_distributed_Qwen3_W8A8():
-    example_prompts = [
-        "Hello, my name is",
-    ]
-    max_tokens = 5
-
-    with VllmRunner(
-            snapshot_download("vllm-ascend/Qwen3-8B-W8A8"),
-            max_model_len=8192,
-            dtype="auto",
-            tensor_parallel_size=2,
-            quantization="ascend",
-    ) as vllm_model:
-        vllm_model.generate_greedy(example_prompts, max_tokens)
-
-
-@pytest.mark.parametrize("model", QWEN_W4A8_OLD_VERSION_MODELS)
-def test_models_distributed_Qwen3_W4A8DYNAMIC_old_version(model):
-    prompts = [
-        "Hello, my name is",
-    ]
-    max_tokens = 5
-    with VllmRunner(
-            snapshot_download(model),
-            max_model_len=8192,
-            dtype="auto",
-            tensor_parallel_size=2,
-            quantization="ascend",
-    ) as vllm_model:
-        vllm_model.generate_greedy(prompts, max_tokens)
-
-
-@pytest.mark.parametrize("model", QWEN_W4A8_NEW_VERSION_MODELS)
-def test_models_distributed_Qwen3_W4A8DYNAMIC_new_version(model):
+@pytest.mark.parametrize("model", QWEN_W4A8_MODELS)
+def test_models_distributed_Qwen3_W4A8DYNAMIC(model):
     prompts = [
         "Hello, my name is",
     ]
diff --git a/tests/e2e/multicard/test_offline_weight_load.py b/tests/e2e/multicard/test_offline_weight_load.py
index 0e9ba95c..dd0ac01f 100644
--- a/tests/e2e/multicard/test_offline_weight_load.py
+++ b/tests/e2e/multicard/test_offline_weight_load.py
@@ -31,7 +31,7 @@ MODELS = ["Qwen/Qwen3-30B-A3B"]
 
 @pytest.mark.parametrize("model", MODELS)
 @patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
-def test_offline_weight_load_and_sleepmode(model):
+def test_qwen_offline_weight_load_and_sleepmode(model):
     script = Path(
         __file__
     ).parent.parent.parent.parent / "examples" / "offline_external_launcher.py"
diff --git a/tests/e2e/multicard/test_pipeline_parallel.py b/tests/e2e/multicard/test_pipeline_parallel.py
index fa21fe8d..855724ea 100644
--- a/tests/e2e/multicard/test_pipeline_parallel.py
+++ b/tests/e2e/multicard/test_pipeline_parallel.py
@@ -1,47 +1,47 @@
-# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
-# Copyright 2023 The vLLM team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# This file is a part of the vllm-ascend project.
-#
-import pytest
-
-from tests.e2e.conftest import VllmRunner
-
-MODELS = [
-    "Qwen/Qwen3-0.6B",
-    "deepseek-ai/DeepSeek-V2-Lite-Chat",
-]
-
-TENSOR_PARALLELS = [1]
-PIPELINE_PARALLELS = [2]
-DIST_EXECUTOR_BACKEND = ["mp", "ray"]
-
-prompts = [
-    "Hello, my name is",
-    "The future of AI is",
-]
-
-
-@pytest.mark.parametrize("model", MODELS)
-@pytest.mark.parametrize("tp_size", TENSOR_PARALLELS)
-@pytest.mark.parametrize("pp_size", PIPELINE_PARALLELS)
-@pytest.mark.parametrize("distributed_executor_backend", DIST_EXECUTOR_BACKEND)
-def test_models(model: str, tp_size: int, pp_size: int,
-                distributed_executor_backend: str) -> None:
-    with VllmRunner(model,
-                    tensor_parallel_size=tp_size,
-                    pipeline_parallel_size=pp_size,
-                    distributed_executor_backend=distributed_executor_backend,
-                    gpu_memory_utilization=0.7) as vllm_model:
-        vllm_model.generate_greedy(prompts, 64)
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+# Copyright 2023 The vLLM team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+import pytest
+
+from tests.e2e.conftest import VllmRunner
+
+MODELS = [
+    "Qwen/Qwen3-0.6B",
+    "deepseek-ai/DeepSeek-V2-Lite-Chat",
+]
+
+TENSOR_PARALLELS = [1]
+PIPELINE_PARALLELS = [2]
+DIST_EXECUTOR_BACKEND = ["mp", "ray"]
+
+prompts = [
+    "Hello, my name is",
+    "The future of AI is",
+]
+
+
+@pytest.mark.parametrize("model", MODELS)
+@pytest.mark.parametrize("tp_size", TENSOR_PARALLELS)
+@pytest.mark.parametrize("pp_size", PIPELINE_PARALLELS)
+@pytest.mark.parametrize("distributed_executor_backend", DIST_EXECUTOR_BACKEND)
+def test_models_pp2(model: str, tp_size: int, pp_size: int,
+                    distributed_executor_backend: str) -> None:
+    with VllmRunner(model,
+                    tensor_parallel_size=tp_size,
+                    pipeline_parallel_size=pp_size,
+                    distributed_executor_backend=distributed_executor_backend,
+                    gpu_memory_utilization=0.7) as vllm_model:
+        vllm_model.generate_greedy(prompts, 64)