diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml
index 84117eda..ed744973 100644
--- a/.github/workflows/_e2e_test.yaml
+++ b/.github/workflows/_e2e_test.yaml
@@ -15,6 +15,9 @@ on:
       type:
         required: true
         type: string
+      contains_310:
+        required: true
+        type: boolean
 
 jobs:
   e2e:
@@ -328,3 +331,119 @@ jobs:
 
           # spec_decode
           pytest -sv --durations=0 tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py
+
+  e2e_310p:
+    name: 310p singlecard
+    runs-on: linux-aarch64-310p-1
+    if: ${{ inputs.contains_310 }}
+    container:
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-310p-ubuntu22.04-py3.11
+      env:
+        VLLM_LOGGING_LEVEL: ERROR
+        VLLM_USE_MODELSCOPE: True
+        TRANSFORMERS_OFFLINE: 1
+    steps:
+      - name: Check npu and CANN info
+        run: |
+          npu-smi info
+          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+      - name: Config mirrors
+        run: |
+          sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
+          pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
+          pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
+          apt-get update -y
+          apt install git -y
+
+      - name: Checkout vllm-project/vllm-ascend repo
+        uses: actions/checkout@v6
+
+      - name: Install system dependencies
+        run: |
+          apt-get -y install `cat packages.txt`
+          apt-get -y install gcc g++ cmake libnuma-dev
+
+      - name: Checkout vllm-project/vllm repo
+        uses: actions/checkout@v6
+        with:
+          repository: vllm-project/vllm
+          ref: ${{ inputs.vllm }}
+          path: ./vllm-empty
+          fetch-depth: 1
+
+      - name: Install vllm-project/vllm from source
+        working-directory: ./vllm-empty
+        run: |
+          VLLM_TARGET_DEVICE=empty pip install -e .
+
+      - name: Install vllm-project/vllm-ascend
+        env:
+          PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
+        run: |
+          pip install -r requirements-dev.txt
+          pip install -v -e .
+
+      - name: Run vllm-project/vllm-ascend test
+        env:
+          PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
+          VLLM_WORKER_MULTIPROC_METHOD: spawn
+        run: |
+          pytest -sv --durations=0 tests/e2e/310p/test_offline_inference_310p.py
+
+  e2e_310p-4cards:
+    name: 310p multicards 4cards
+    runs-on: linux-aarch64-310p-4
+    if: ${{ inputs.contains_310 }}
+    container:
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-310p-ubuntu22.04-py3.11
+      env:
+        VLLM_LOGGING_LEVEL: ERROR
+        VLLM_USE_MODELSCOPE: True
+        TRANSFORMERS_OFFLINE: 1
+    steps:
+      - name: Check npu and CANN info
+        run: |
+          npu-smi info
+          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+      - name: Config mirrors
+        run: |
+          sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
+          pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
+          pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
+          apt-get update -y
+          apt install git -y
+
+      - name: Checkout vllm-project/vllm-ascend repo
+        uses: actions/checkout@v6
+
+      - name: Install system dependencies
+        run: |
+          apt-get -y install `cat packages.txt`
+          apt-get -y install gcc g++ cmake libnuma-dev
+
+      - name: Checkout vllm-project/vllm repo
+        uses: actions/checkout@v6
+        with:
+          repository: vllm-project/vllm
+          ref: ${{ inputs.vllm }}
+          path: ./vllm-empty
+          fetch-depth: 1
+
+      - name: Install vllm-project/vllm from source
+        working-directory: ./vllm-empty
+        run: |
+          VLLM_TARGET_DEVICE=empty pip install -e .
+
+      - name: Install vllm-project/vllm-ascend
+        env:
+          PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
+        run: |
+          pip install -r requirements-dev.txt
+          pip install -v -e .
+
+      - name: Run vllm-project/vllm-ascend test
+        env:
+          PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
+          VLLM_WORKER_MULTIPROC_METHOD: spawn
+        run: |
+          pytest -sv --durations=0 tests/e2e/310p/test_offline_inference_parallel_310p.py
diff --git a/.github/workflows/labled_test_310.yaml b/.github/workflows/labled_test_310.yaml
deleted file mode 100644
index acd06346..00000000
--- a/.github/workflows/labled_test_310.yaml
+++ /dev/null
@@ -1,110 +0,0 @@
-#
-# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# This file is a part of the vllm-ascend project.
-#
-
-name: 310p Labeled Test
-
-on:
-  pull_request:
-    types: [ labeled ]
-
-# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
-# declared as "shell: bash -el {0}" on steps that need to be properly activated.
-# It's used to activate ascend-toolkit environment variables.
-defaults:
-  run:
-    shell: bash -el {0}
-
-# only cancel in-progress runs of the same workflow
-# and ignore the lint / 1 card / 4 cards test type
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  e2e:
-    # e2e-310p-test will be triggered when tag 'e2e-310p-test' & 'ready-for-test' or schedule job
-    if: >- 
-      ${{ 
-        (contains(github.event.pull_request.labels.*.name, 'e2e-310p-test'))  && 
-        contains(github.event.pull_request.labels.*.name, 'ready-for-test') || 
-        github.event_name == 'schedule' || github.event_name == 'push' 
-        }}
-    strategy:
-      max-parallel: 2
-      matrix:
-        os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
-        vllm_version: [v0.11.0]
-    name: 310p e2e test
-    runs-on: ${{ matrix.os }}
-    container:
-      # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
-      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-310p-ubuntu22.04-py3.11
-      env:
-        VLLM_LOGGING_LEVEL: ERROR
-        VLLM_USE_MODELSCOPE: True
-    steps:        
-      - name: Check npu and CANN info
-        run: |
-          npu-smi info
-          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
-
-      - name: Config mirrors
-        run: |
-          sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
-          pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
-          pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
-          apt-get update -y
-          apt install git -y
-
-      - name: Checkout vllm-project/vllm-ascend repo
-        uses: actions/checkout@v6
-
-      - name: Install system dependencies
-        run: |
-          apt-get -y install `cat packages.txt`
-          apt-get -y install git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2
-
-      - name: Checkout vllm-project/vllm repo
-        uses: actions/checkout@v6
-        with:
-          repository: vllm-project/vllm
-          ref: ${{ matrix.vllm_version }}
-          path: ./vllm-empty
-
-      - name: Install vllm-project/vllm from source
-        working-directory: ./vllm-empty
-        run: |
-          VLLM_TARGET_DEVICE=empty pip install -e .
-
-      - name: Install vllm-project/vllm-ascend
-        run: |
-          export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
-          export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
-          pip install -r requirements-dev.txt
-          pip install -v -e .
-
-      - name: Run e2e test
-        env:
-          VLLM_WORKER_MULTIPROC_METHOD: spawn
-          VLLM_USE_MODELSCOPE: True
-          PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
-        run: |
-          if [[ "${{ matrix.os }}" == "linux-aarch64-310p-1" ]]; then
-            pytest -sv tests/e2e/310p/test_offline_inference_310p.py
-          else
-            pytest -sv tests/e2e/310p/test_offline_inference_parallel_310p.py
-          fi
diff --git a/.github/workflows/pr_test_full.yaml b/.github/workflows/pr_test_full.yaml
index 9d26c2ac..16461015 100644
--- a/.github/workflows/pr_test_full.yaml
+++ b/.github/workflows/pr_test_full.yaml
@@ -83,4 +83,5 @@ jobs:
       vllm: ${{ matrix.vllm_version }}
       runner: linux-aarch64-a2
       image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
+      contains_310: true
       type: full
diff --git a/.github/workflows/pr_test_light.yaml b/.github/workflows/pr_test_light.yaml
index 405d1ba6..3180f561 100644
--- a/.github/workflows/pr_test_light.yaml
+++ b/.github/workflows/pr_test_light.yaml
@@ -47,6 +47,7 @@ jobs:
     outputs:
       e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
       ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
+      _310_tracker: ${{ steps.filter.outputs._310_tracker }}
     steps:
       - name: Setup git proxy
         run: |
@@ -73,6 +74,8 @@ jobs:
             ut_tracker:
               - 'tests/ut/**'
               - '.github/workflows/pr_test_light.yaml'
+            _310_tracker:
+              - 'vllm_ascend/_310p/**'
 
   ut:
     needs: [lint, changes]
@@ -103,4 +106,5 @@ jobs:
       vllm: ${{ matrix.vllm_version }}
       runner: linux-aarch64-a2
       image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
+      contains_310: ${{ needs.changes.outputs._310_tracker == 'true' }}
       type: light
diff --git a/.github/workflows/schedule_test_vllm_main.yaml b/.github/workflows/schedule_test_vllm_main.yaml
index 7f8c7876..c233d25d 100644
--- a/.github/workflows/schedule_test_vllm_main.yaml
+++ b/.github/workflows/schedule_test_vllm_main.yaml
@@ -36,4 +36,5 @@ jobs:
       vllm: main
       runner: linux-aarch64-a2
       image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
+      contains_310: false
       type: full
diff --git a/tests/e2e/310p/test_offline_inference_310p.py b/tests/e2e/310p/test_offline_inference_310p.py
index 31f7eb92..188865f6 100644
--- a/tests/e2e/310p/test_offline_inference_310p.py
+++ b/tests/e2e/310p/test_offline_inference_310p.py
@@ -15,58 +15,61 @@
 # limitations under the License.
 # This file is a part of the vllm-ascend project.
 import pytest
-import vllm  # noqa: F401
-from vllm import SamplingParams
+from vllm.assets.image import ImageAsset
 
-import vllm_ascend  # noqa: F401
 from tests.e2e.conftest import VllmRunner
 
-MODELS = ["Qwen/Qwen3-0.6B", "Qwen/Qwen2.5-7B-Instruct"]
 
-
-@pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["float16"])
 @pytest.mark.parametrize("max_tokens", [5])
-def test_models(model: str, dtype: str, max_tokens: int) -> None:
+def test_llm_models(dtype: str, max_tokens: int) -> None:
     example_prompts = [
         "Hello, my name is",
         "The future of AI is",
     ]
 
-    with VllmRunner(model,
+    with VllmRunner("Qwen/Qwen3-0.6B",
                     tensor_parallel_size=1,
                     dtype=dtype,
                     max_model_len=2048,
-                    enforce_eager=True,
-                    compilation_config={
-                        "custom_ops":
-                        ["none", "+rms_norm", "+rotary_embedding"]
-                    }) as vllm_model:
+                    enforce_eager=True) as vllm_model:
         vllm_model.generate_greedy(example_prompts, max_tokens)
 
 
-VL_MODELS = ["Qwen/Qwen2.5-VL-3B-Instruct"]
+def test_multimodal_vl():
+    image = ImageAsset("cherry_blossom").pil_image.convert("RGB")
 
-
-@pytest.mark.parametrize("model", MODELS)
-@pytest.mark.parametrize("dtype", ["float16"])
-def test_vl_model_with_samples(model: str, dtype: str) -> None:
-    example_prompts = [
-        "Hello, my name is",
-        "The future of AI is",
+    img_questions = [
+        "What is the content of this image?",
+        "Describe the content of this image in detail.",
+        "What's in the image?",
+        "Where is this image taken?",
     ]
 
-    with VllmRunner(model,
-                    tensor_parallel_size=1,
-                    dtype=dtype,
-                    max_model_len=2048,
+    images = [image] * len(img_questions)
+    placeholder = "<|image_pad|>"
+    prompts = [
+        ("<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
+         f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>"
+         f"{q}<|im_end|>\n<|im_start|>assistant\n") for q in img_questions
+    ]
+
+    with VllmRunner("Qwen/Qwen2.5-VL-3B-Instruct",
+                    mm_processor_kwargs={
+                        "min_pixels": 28 * 28,
+                        "max_pixels": 1280 * 28 * 28,
+                        "fps": 1,
+                    },
+                    max_model_len=8192,
                     enforce_eager=True,
-                    compilation_config={
-                        "custom_ops":
-                        ["none", "+rms_norm", "+rotary_embedding"]
-                    }) as vllm_model:
-        sampling_params = SamplingParams(max_tokens=100,
-                                         top_p=0.95,
-                                         top_k=50,
-                                         temperature=0.6)
-        vllm_model.generate(example_prompts, sampling_params)
+                    limit_mm_per_prompt={"image": 1}) as vllm_model:
+        outputs = vllm_model.generate_greedy(
+            prompts=prompts,
+            images=images,
+            max_tokens=64,
+        )
+
+        assert len(outputs) == len(prompts)
+
+        for _, output_str in outputs:
+            assert output_str, "Generated output should not be empty."
diff --git a/tests/e2e/310p/test_offline_inference_parallel_310p.py b/tests/e2e/310p/test_offline_inference_parallel_310p.py
new file mode 100644
index 00000000..c6467d60
--- /dev/null
+++ b/tests/e2e/310p/test_offline_inference_parallel_310p.py
@@ -0,0 +1,20 @@
+import pytest
+
+from tests.e2e.conftest import VllmRunner
+
+
+@pytest.mark.parametrize("dtype", ["float16"])
+@pytest.mark.parametrize("max_tokens", [5])
+@pytest.skip("310p does not support parallel inference now. Fix me")
+def test_models(dtype: str, max_tokens: int) -> None:
+    example_prompts = [
+        "Hello, my name is",
+        "The future of AI is",
+    ]
+
+    with VllmRunner("Qwen/Qwen3-0.6B",
+                    tensor_parallel_size=4,
+                    dtype=dtype,
+                    max_model_len=2048,
+                    enforce_eager=True) as vllm_model:
+        vllm_model.generate_greedy(example_prompts, max_tokens)