diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml
index 758b35ef..75849a56 100644
--- a/.github/workflows/_e2e_test.yaml
+++ b/.github/workflows/_e2e_test.yaml
@@ -403,7 +403,7 @@ jobs:
           PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
           VLLM_WORKER_MULTIPROC_METHOD: spawn
         run: |
-          pytest -sv --durations=0 tests/e2e/310p/test_offline_inference_310p.py
+          pytest -sv --durations=0 tests/e2e/310p/singlecard/test_dense_model_singlecard.py
 
   e2e_310p-4cards:
     name: 310p multicards 4cards
@@ -462,5 +462,5 @@ jobs:
           VLLM_WORKER_MULTIPROC_METHOD: spawn
         run: |
           pytest -sv --durations=0 \
-          tests/e2e/310p/test_offline_inference_parallel_310p.py \
-          tests/e2e/310p/test_offline_inference_w8a8_310p.py
+          tests/e2e/310p/multicard/test_dense_model_multicard.py \
+          tests/e2e/310p/multicard/test_moe_model_multicard.py
diff --git a/tests/e2e/310p/multicard/test_dense_model_multicard.py b/tests/e2e/310p/multicard/test_dense_model_multicard.py
new file mode 100644
index 00000000..e964c48f
--- /dev/null
+++ b/tests/e2e/310p/multicard/test_dense_model_multicard.py
@@ -0,0 +1,46 @@
+#
+# Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved.
+# Copyright 2023 The vLLM team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+
+from tests.e2e.conftest import VllmRunner
+
+
+def test_qwen3_dense_tp2_fp16():
+    example_prompts = [
+        "Hello, my name is",
+    ]
+    max_tokens = 5
+    with VllmRunner(
+            "Qwen/Qwen3-8B",
+            tensor_parallel_size=2,
+            enforce_eager=True,
+            dtype="float16"
+    ) as vllm_model:
+        vllm_model.generate_greedy(example_prompts, max_tokens)
+
+def test_qwen3_dense_tp4_w8a8():
+    example_prompts = [
+        "Hello, my name is",
+    ]
+    max_tokens = 5
+    with VllmRunner(
+            "vllm-ascend/Qwen3-32B-W8A8",
+            tensor_parallel_size=4,
+            enforce_eager=True,
+            dtype="float16",
+            quantization="ascend"
+    ) as vllm_model:
+        vllm_model.generate_greedy(example_prompts, max_tokens)
diff --git a/tests/e2e/310p/test_offline_inference_parallel_310p.py b/tests/e2e/310p/multicard/test_moe_model_multicard.py
similarity index 52%
rename from tests/e2e/310p/test_offline_inference_parallel_310p.py
rename to tests/e2e/310p/multicard/test_moe_model_multicard.py
index 2a796ad5..40dabf25 100644
--- a/tests/e2e/310p/test_offline_inference_parallel_310p.py
+++ b/tests/e2e/310p/multicard/test_moe_model_multicard.py
@@ -1,36 +1,46 @@
-#
-# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
-# Copyright 2023 The vLLM team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# This file is a part of the vllm-ascend project.
-
-import pytest
-
-from tests.e2e.conftest import VllmRunner
-
-
-@pytest.mark.parametrize("dtype", ["float16"])
-@pytest.mark.parametrize("max_tokens", [5])
-def test_models(dtype: str, max_tokens: int) -> None:
-    example_prompts = [
-        "Hello, my name is",
-        "The future of AI is",
-    ]
-
-    with VllmRunner("Qwen/Qwen3-0.6B",
-                    tensor_parallel_size=4,
-                    dtype=dtype,
-                    max_model_len=2048,
-                    enforce_eager=True) as vllm_model:
-        vllm_model.generate_greedy(example_prompts, max_tokens)
+#
+# Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved.
+# Copyright 2023 The vLLM team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+
+from tests.e2e.conftest import VllmRunner
+
+
+def test_qwen3_moe_tp4_fp16():
+    example_prompts = [
+        "Hello, my name is",
+    ]
+    max_tokens = 5
+    with VllmRunner(
+            "Qwen/Qwen3-30B-A3B",
+            tensor_parallel_size=4,
+            enforce_eager=True,
+            dtype="float16"
+    ) as vllm_model:
+        vllm_model.generate_greedy(example_prompts, max_tokens)
+
+def test_qwen3_moe_ep4_fp16():
+    example_prompts = [
+        "Hello, my name is",
+    ]
+    max_tokens = 5
+    with VllmRunner(
+            "Qwen/Qwen3-30B-A3B",
+            tensor_parallel_size=4,
+            enforce_eager=True,
+            dtype="float16",
+            enable_expert_parallel=True
+    ) as vllm_model:
+        vllm_model.generate_greedy(example_prompts, max_tokens)
diff --git a/tests/e2e/310p/singlecard/test_dense_model_singlecard.py b/tests/e2e/310p/singlecard/test_dense_model_singlecard.py
new file mode 100644
index 00000000..a557f577
--- /dev/null
+++ b/tests/e2e/310p/singlecard/test_dense_model_singlecard.py
@@ -0,0 +1,46 @@
+#
+# Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved.
+# Copyright 2023 The vLLM team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+
+from tests.e2e.conftest import VllmRunner
+
+
+def test_qwen3_dense_tp1_fp16():
+    example_prompts = [
+        "Hello, my name is",
+    ]
+    max_tokens = 5
+    with VllmRunner(
+            "Qwen/Qwen3-8B",
+            tensor_parallel_size=1,
+            enforce_eager=True,
+            dtype="float16"
+    ) as vllm_model:
+        vllm_model.generate_greedy(example_prompts, max_tokens)
+
+def test_qwen3_dense_tp1_w8a8():
+    example_prompts = [
+        "Hello, my name is",
+    ]
+    max_tokens = 5
+    with VllmRunner(
+            "vllm-ascend/Qwen3-8B-W8A8",
+            tensor_parallel_size=1,
+            enforce_eager=True,
+            dtype="float16",
+            quantization="ascend"
+    ) as vllm_model:
+        vllm_model.generate_greedy(example_prompts, max_tokens)
diff --git a/tests/e2e/310p/test_offline_inference_310p.py b/tests/e2e/310p/test_offline_inference_310p.py
deleted file mode 100644
index e62b8026..00000000
--- a/tests/e2e/310p/test_offline_inference_310p.py
+++ /dev/null
@@ -1,78 +0,0 @@
-#
-# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
-# Copyright 2023 The vLLM team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# This file is a part of the vllm-ascend project.
-import pytest
-from vllm.assets.image import ImageAsset
-
-from tests.e2e.conftest import VllmRunner
-
-
-@pytest.mark.parametrize("dtype", ["float16"])
-@pytest.mark.parametrize("max_tokens", [5])
-def test_llm_models(dtype: str, max_tokens: int) -> None:
-    example_prompts = [
-        "Hello, my name is",
-        "The future of AI is",
-    ]
-
-    with VllmRunner("Qwen/Qwen3-0.6B",
-                    tensor_parallel_size=1,
-                    dtype=dtype,
-                    max_model_len=2048,
-                    enforce_eager=True) as vllm_model:
-        vllm_model.generate_greedy(example_prompts, max_tokens)
-
-
-@pytest.mark.skip(reason="310P: multimodal test skipped, offline is ok")
-@pytest.mark.parametrize("dtype", ["float16"])
-def test_multimodal_vl(dtype: str):
-    image = ImageAsset("cherry_blossom").pil_image.convert("RGB")
-
-    img_questions = [
-        "What is the content of this image?",
-        "Describe the content of this image in detail.",
-        "What's in the image?",
-        "Where is this image taken?",
-    ]
-
-    images = [image] * len(img_questions)
-    placeholder = "<|image_pad|>"
-    prompts = [
-        ("<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
-         f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>"
-         f"{q}<|im_end|>\n<|im_start|>assistant\n") for q in img_questions
-    ]
-
-    with VllmRunner("Qwen/Qwen2.5-VL-3B-Instruct",
-                    mm_processor_kwargs={
-                        "min_pixels": 28 * 28,
-                        "max_pixels": 1280 * 28 * 28,
-                        "fps": 1,
-                    },
-                    dtype=dtype,
-                    max_model_len=8192,
-                    enforce_eager=True,
-                    limit_mm_per_prompt={"image": 1}) as vllm_model:
-        outputs = vllm_model.generate_greedy(
-            prompts=prompts,
-            images=images,
-            max_tokens=64,
-        )
-
-        assert len(outputs) == len(prompts)
-
-        for _, output_str in outputs:
-            assert output_str, "Generated output should not be empty."
diff --git a/tests/e2e/310p/test_offline_inference_w8a8_310p.py b/tests/e2e/310p/test_offline_inference_w8a8_310p.py
deleted file mode 100644
index 84b3eb49..00000000
--- a/tests/e2e/310p/test_offline_inference_w8a8_310p.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import pytest
-
-from tests.e2e.conftest import VllmRunner
-
-
-@pytest.mark.parametrize("dtype", ["float16"])
-@pytest.mark.parametrize("max_tokens", [5])
-def test_qwen3_w8a8_e2e_310p(dtype: str, max_tokens: int) -> None:
-    example_prompts = [
-        "vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs.",
-    ]
-
-    with VllmRunner(
-        "vllm-ascend/Qwen3-32B-W8A8",
-        tensor_parallel_size=4,
-        dtype=dtype,
-        max_model_len=8192,
-        enforce_eager=True,
-        quantization="ascend",
-        enable_prefix_caching=False,
-    ) as vllm_model:
-        vllm_model.generate_greedy(example_prompts, max_tokens)
diff --git a/vllm_ascend/_310p/fused_moe/fused_moe.py b/vllm_ascend/_310p/fused_moe/fused_moe.py
index 5cca5036..f54ac604 100644
--- a/vllm_ascend/_310p/fused_moe/fused_moe.py
+++ b/vllm_ascend/_310p/fused_moe/fused_moe.py
@@ -251,9 +251,11 @@ class AscendSharedFusedMoE310(SharedFusedMoE, AscendFusedMoE310):
         shared_experts: torch.nn.Module,
         gate: torch.nn.Module | None = None,
         use_overlapped: bool = True,
+        routed_input_transform: torch.nn.Module | None = None,
         **kwargs,
     ):
         AscendFusedMoE310.__init__(self, **kwargs)
+        self._routed_input_transform = routed_input_transform
         self._shared_experts = shared_experts
         self.use_overlapped = use_overlapped
         self.shared_expert_stream = None
diff --git a/vllm_ascend/_310p/worker_310p.py b/vllm_ascend/_310p/worker_310p.py
index 8ced752b..bb0fa28d 100644
--- a/vllm_ascend/_310p/worker_310p.py
+++ b/vllm_ascend/_310p/worker_310p.py
@@ -25,9 +25,7 @@ from vllm_ascend.worker.worker import NPUWorker, init_workspace_manager
 class NPUWorker310(NPUWorker):
     def init_device(self):
         self.device = self._init_device()
-
-        # TODO: There is accuracy issue when jit_compile is disabled currently.
-        torch_npu.npu.set_compile_mode(jit_compile=True)
+        torch_npu.npu.set_compile_mode(jit_compile=False)
 
         init_workspace_manager(self.device, num_ubatches=1)