diff --git a/.github/workflows/_pre_commit.yml b/.github/workflows/_pre_commit.yml
index dbe30a8e..dc848c2a 100644
--- a/.github/workflows/_pre_commit.yml
+++ b/.github/workflows/_pre_commit.yml
@@ -12,19 +12,16 @@ permissions:
 
 jobs:
   pre-commit:
-    runs-on: linux-amd64-cpu-16-hk
+    runs-on: linux-amd64-cpu-8-hk
     container:
       # Build it from https://github.com/nv-action/vllm-benchmarks/blob/main/Dockerfile
-      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-cpu
-    env:
-      GOPROXY: https://goproxy.io,direct
-      GITHUB_WORKSPACE: /__w/vllm-ascend/vllm-ascend
+      image: quay.io/ascend-ci/vllm-ascend:lint
     steps:
     - name: Checkout vllm-project/vllm-ascend repo
       uses: actions/checkout@v6
 
     # With problem matchers in a container, the output of $GITHUB_WORKSPACE and ${{ github.workspace }} are different.
-    # So we will just copy it into the path ${{ github.workspace }}. see https://github.com/actions/runner/issues/2058
+    # So we will just copy it into a temp path. see https://github.com/actions/runner/issues/2058
     - name: cp problem matchers
       run: |
         cp .github/workflows/matchers/actionlint.json "$RUNNER_TEMP/actionlint.json"
@@ -41,14 +38,21 @@ jobs:
         repository: vllm-project/vllm
         path: ./vllm-empty
         ref: ${{ inputs.vllm }}
-    - name: Install vllm
-      working-directory: vllm-empty
-      run: |
-        VLLM_TARGET_DEVICE=empty python3 -m pip install . --extra-index https://download.pytorch.org/whl/cpu/
-    - name: Install vllm-ascend dev
+    - uses: dorny/paths-filter@v3
+      id: filter
+      with:
+        filters: |
+          lint_tracker:
+            - 'requirements.txt'
+            - 'requirements-dev.txt'
+            - 'requirements-lint.txt'
+
+    - name: Install vllm-ascend dev (conditional)
+      if: steps.filter.outputs.lint_tracker == 'true'
       run: |
         git config --global --add safe.directory /__w/vllm-ascend/vllm-ascend
         pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu
+
     - name: Run pre-commit
       env:
         PRE_COMMIT_COLOR: always
@@ -56,4 +60,17 @@ jobs:
         TERM: xterm-256color
         SHELLCHECK_OPTS: "--exclude=SC2046,SC2006,SC2086" # Exclude SC2046, SC2006, SC2086 for actionlint
       run: |
+        git config --global --add safe.directory /__w/vllm-ascend/vllm-ascend
         pre-commit run --all-files --hook-stage manual --show-diff-on-failure
+    - name: Run mypy
+      run: |
+        PYTHONPATH="$PYTHONPATH:$(pwd)/vllm-empty"
+        export PYTHONPATH
+        git config --global --add safe.directory /__w/vllm-ascend/vllm-ascend
+        # Run mypy for Python 3.10, 3.11, 3.12 manually
+        # Note: We are now separating mypy from pre-commit hooks for performance reasons.
+        for python_version in "3.10" "3.11" "3.12"; do
+          echo "============================"
+          tools/mypy.sh 1 "$python_version"
+          echo "============================"
+        done
diff --git a/.github/workflows/dockerfiles/Dockerfile.lint b/.github/workflows/dockerfiles/Dockerfile.lint
new file mode 100644
index 00000000..b01e7b7f
--- /dev/null
+++ b/.github/workflows/dockerfiles/Dockerfile.lint
@@ -0,0 +1,46 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+
+FROM ascendai/python:3.11-ubuntu22.04
+
+ARG TARGETARCH
+
+RUN apt-get update -y && \
+    apt-get install -y curl git gcc g++ cmake libnuma-dev jq && \
+    rm -rf /var/cache/apt/* && \
+    rm -rf /var/lib/apt/lists/*
+
+
+ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
+# For lint purpose, actually we need make a main2main matching.
+ARG VLLM_COMMIT=d68209402ddab3f54a09bc1f4de9a9495a283b60
+RUN git clone $VLLM_REPO /vllm-workspace/vllm && \
+    cd /vllm-workspace/vllm && \
+    git checkout $VLLM_COMMIT
+
+# # Install vLLM common dependencies
+RUN python3 -m pip install -r /vllm-workspace/vllm/requirements/common.txt --extra-index https://download.pytorch.org/whl/cpu/ && \
+    python3 -m pip uninstall -y triton && \
+    python3 -m pip cache purge
+
+COPY . /vllm-workspace/vllm-ascend/
+
+RUN pip install -r /vllm-workspace/vllm-ascend/requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu && \
+    pip cache purge && \
+    rm -fr /vllm-workspace/
+
+CMD ["/bin/bash"]
diff --git a/.github/workflows/schedule_lint_image_build.yaml b/.github/workflows/schedule_lint_image_build.yaml
new file mode 100644
index 00000000..3dc86095
--- /dev/null
+++ b/.github/workflows/schedule_lint_image_build.yaml
@@ -0,0 +1,67 @@
+name: 'Image build lint'
+on:
+  schedule:
+    # Runs at 00:00 UTC+8 every day
+    - cron: '0 20 * * *' 
+  workflow_dispatch:
+  push:
+    paths:
+      - 'Dockerfile.lint'
+      - 'requirements-lint.txt'
+      - 'requirements-dev.txt'
+      - 'requirements.txt'
+
+# only cancel in-progress runs of the same workflow
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+
+  build:
+    name: vllm-ascend lint image build
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        persist-credentials: false
+
+    - name: Print
+      run: |
+        lscpu
+    - name: Docker meta
+      id: meta
+      uses: docker/metadata-action@v5
+      with:
+        images: |
+          quay.io/ascend-ci/vllm-ascend
+        tags: lint
+        flavor:
+          latest=false
+
+    - name: Build - Set up QEMU
+      uses: docker/setup-qemu-action@v3
+
+    - name: Build - Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Publish - Login to Quay Container Registry
+      if: ${{ github.repository_owner == 'vllm-project' }}
+      uses: docker/login-action@v3
+      with:
+        registry: quay.io
+        username: ${{ vars.QUAY_CI_USERNAME }}
+        password: ${{ secrets.QUAY_CI_PASSWORD }}
+
+    - name: Build and push
+      uses: docker/build-push-action@v6
+      with:
+        # For now, we only build amd64 lint image
+        platforms: 'linux/amd64'
+        context: .
+        file: .github/workflows/dockerfiles/Dockerfile.lint
+        push: true
+        labels: ${{ steps.meta.outputs.labels }}
+        tags: ${{ steps.meta.outputs.tags }}
+        provenance: false
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index fd2dc626..e18a67be 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -38,40 +38,6 @@ repos:
   - id: actionlint
 - repo: local
   hooks:
-  # For local development, you can run mypy using tools/mypy.sh script if needed.
-  # - id: mypy-local
-  #   name: Run mypy for local Python installation
-  #   entry: tools/mypy.sh 0 "local"
-  #   language: system
-  #   types: [python]
-  #   stages: [pre-commit] # Don't run in CI
-  - id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
-    name: Run mypy for Python 3.10
-    entry: tools/mypy.sh 1 "3.10"
-    # Use system python because vllm installation is required
-    language: system
-    types: [python]
-    stages: [manual] # Only run in CI
-  - id: mypy-3.11 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
-    name: Run mypy for Python 3.11
-    entry: tools/mypy.sh 1 "3.11"
-    # Use system python because vllm installation is required
-    language: system
-    types: [python]
-    stages: [manual] # Only run in CI
-  - id: mypy-3.12 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
-    name: Run mypy for Python 3.12
-    entry: tools/mypy.sh 1 "3.12"
-    # Use system python because vllm installation is required
-    language: system
-    types: [python]
-    stages: [manual] # Only run in CI
-  # FIXME: enable shellcheck
-  # - id: shellcheck
-  #   name: Lint shell scripts
-  #   entry: tools/shellcheck.sh
-  #   language: script
-  #   types: [shell]
   - id: png-lint
     name: Lint PNG exports from excalidraw
     entry: tools/png-lint.sh
diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
index 1d7fd9ec..400d9778 100644
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@@ -453,9 +453,9 @@ class VllmRunner:
             if images is not None and (image := images[i]) is not None:
                 multi_modal_data["image"] = image
             if videos is not None and (video := videos[i]) is not None:
-                multi_modal_data["video"] = video
+                multi_modal_data["video"] = video # type: ignore
             if audios is not None and (audio := audios[i]) is not None:
-                multi_modal_data["audio"] = audio
+                multi_modal_data["audio"] = audio # type: ignore
 
             text_prompt_kwargs: dict[str, Any] = {
                 "multi_modal_data": multi_modal_data or None
diff --git a/tools/mypy.sh b/tools/mypy.sh
index caac0a1d..b8b14b0d 100755
--- a/tools/mypy.sh
+++ b/tools/mypy.sh
@@ -30,9 +30,13 @@ if [ $PYTHON_VERSION == "local" ]; then
     PYTHON_VERSION=$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')
 fi
 
+# Define colors
+GREEN='\033[0;32m'
+NC='\033[0m' # No Color
+
 run_mypy() {
-    echo "Running mypy on $1"
-    mypy --follow-imports skip --python-version "${PYTHON_VERSION}" "$@"
+    echo -e "${GREEN}Running mypy for $1 on python version: ${PYTHON_VERSION}${NC}"
+    mypy --follow-imports skip --check-untyped-defs --python-version "${PYTHON_VERSION}" "$@"
 }
 
 run_mypy vllm_ascend
diff --git a/vllm_ascend/_310p/attention/attention_v1.py b/vllm_ascend/_310p/attention/attention_v1.py
index 347c4bc1..9e3ccf2f 100644
--- a/vllm_ascend/_310p/attention/attention_v1.py
+++ b/vllm_ascend/_310p/attention/attention_v1.py
@@ -15,6 +15,7 @@
 # This file is a part of the vllm-ascend project.
 #
 
+from typing import Any
 
 import torch
 import torch_npu
@@ -23,7 +24,7 @@ from vllm_ascend._310p.attention.attention_mask import AttentionMaskBuilder, bui
 from vllm_ascend._310p.attention.metadata_builder import AscendAttentionMetadataBuilder310P
 from vllm_ascend.attention.attention_v1 import AscendAttentionBackend as _BaseBackend
 from vllm_ascend.attention.attention_v1 import AscendAttentionBackendImpl as _BaseImpl
-from vllm_ascend.attention.attention_v1 import AscendAttentionMetadataBuilder, AscendAttentionState
+from vllm_ascend.attention.attention_v1 import AscendAttentionMetadataBuilder, AscendAttentionState, AscendMetadata
 from vllm_ascend.utils import ACL_FORMAT_FRACTAL_NZ, aligned_16, nd_to_nz_2d
 
 
@@ -47,9 +48,17 @@ class AscendAttentionBackend310(_BaseBackend):
 
 
 class AscendAttentionBackendImpl310(_BaseImpl):
-    def forward_paged_attention(self, query, attn_metadata, output):
+    def forward_paged_attention(
+        self,
+        query: Any,
+        attn_metadata: AscendMetadata,
+        output: Any | None = None,
+    ) -> Any:
         if attn_metadata.seq_lens.device != query.device:
-            attn_metadata.seq_lens = attn_metadata.seq_lens.to(device=query.device, non_blocking=True)
+            attn_metadata.seq_lens = attn_metadata.seq_lens.to(
+                device=query.device,
+                non_blocking=True,
+            )
         return super().forward_paged_attention(query, attn_metadata, output)
 
     def _forward_prefill_310p_fallback(self, query, key, value, attn_metadata, output):
diff --git a/vllm_ascend/attention/attention_v1.py b/vllm_ascend/attention/attention_v1.py
index 933fde28..6c45b6dc 100644
--- a/vllm_ascend/attention/attention_v1.py
+++ b/vllm_ascend/attention/attention_v1.py
@@ -46,9 +46,7 @@ from vllm_ascend.device.device_op import DeviceOperator
 from vllm_ascend.ops.flashcomm2_oshard_manager import flashcomm2_oshard_manager
 from vllm_ascend.utils import vllm_version_is, weak_ref_tensors
 
-# isort: off
 if vllm_version_is("0.13.0"):
-    from vllm.v1.attention.backends.utils import AttentionCGSupport, AttentionMetadataBuilder
     from vllm.attention.backends.abstract import (  # type: ignore
         AttentionBackend,
         AttentionImpl,
@@ -59,20 +57,21 @@ if vllm_version_is("0.13.0"):
         AttentionBackendEnum,
         register_backend,
     )
+    from vllm.v1.attention.backends.utils import AttentionCGSupport, AttentionMetadataBuilder
 else:
     from vllm.v1.attention.backend import (  # type: ignore
         AttentionBackend,
         AttentionCGSupport,
         AttentionImpl,
         AttentionLayer,
-        AttentionType,
         AttentionMetadataBuilder,
+        AttentionType,
     )
     from vllm.v1.attention.backends.registry import (  # type: ignore
         AttentionBackendEnum,
         register_backend,
     )
-# isort: on
+
 
 # default max value of sliding window size
 SWA_INT_MAX = 2147483647
diff --git a/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py b/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py
index a54ef6cb..ee07e5f7 100644
--- a/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py
+++ b/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py
@@ -13,7 +13,7 @@ from collections import defaultdict, deque
 from collections.abc import Iterator
 from concurrent.futures import ThreadPoolExecutor
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, List, Optional, OrderedDict, Tuple
+from typing import TYPE_CHECKING, Any, List, Optional, OrderedDict, Tuple, TypedDict
 
 import msgspec
 import numpy as np
@@ -60,6 +60,11 @@ GET_META_MSG = b"get_meta_msg"
 DONE_RECVING_MSG = b"done_recving_msg"
 
 
+class RemotePortInfo(TypedDict):
+    num: int
+    host: str
+
+
 class MooncakeAgentMetadata(msgspec.Struct, omit_defaults=True, dict=True):
     engine_id: str
     te_rpc_port: int
@@ -384,7 +389,7 @@ class KVCacheRecvingThread(threading.Thread):
                     remote_handshake_port: int,
                     offset: int,
                     tp_num_need_pulls: int,
-                    remote_port_send_num: dict[int, dict[str, int | str]] = {},
+                    remote_port_send_num: dict[int, RemotePortInfo] = {},
                     all_task_done: bool = False):
         """Add a new request to the queue for processing."""
         logger.debug(f"Adding request {request_id} to the queue.")
@@ -458,8 +463,9 @@ class KVCacheRecvingThread(threading.Thread):
             self._send_done_signal_to_free_remote_port(remote_request_id, remote_host,
                                                        remote_port_send_num)
 
-    def _send_done_signal_to_free_remote_port(self, request_id, remote_host,
-                                              remote_port_send_num):
+    def _send_done_signal_to_free_remote_port(
+            self, request_id: str, remote_host: str,
+            remote_port_send_num: dict[int, RemotePortInfo]):
         if self.side_channel_port != self.local_handshake_port \
             or not remote_port_send_num:
             return
@@ -708,9 +714,10 @@ class KVCacheRecvingThread(threading.Thread):
                 logger.debug("Returned socket to pool for %s:%d", remote_host,
                              remote_handshake_port)
 
-    def _send_done_recv_signal(self, request_id: str, remote_host: str,
-                               remote_handshake_port: int,
-                               remote_port_send_num: dict[int, dict[str, int | str]]):
+    def _send_done_recv_signal(
+            self, request_id: str, remote_host: str,
+            remote_handshake_port: int,
+            remote_port_send_num: dict[int, RemotePortInfo]):
         logger.debug("Sending done recving signal for request %s to %s:%d",
                      request_id, remote_host, remote_handshake_port)
         sock: Optional[zmq.Socket] = None  # type: ignore
@@ -1177,7 +1184,7 @@ class MooncakeConnectorWorker:
             self.tp_num_need_pulls = num_d_block_heads // num_p_block_heads
         self.local_remote_block_port_mapping: dict[
             str, Optional[List[List[int]]]] = {}
-        self.remote_port_send_num: dict[str, dict[int, dict[str, int | str]]] = {}
+        self.remote_port_send_num: dict[str, dict[int, RemotePortInfo]] = {}
 
     def _get_prefill_decode_size(self, vllm_config: VllmConfig):
         # get prefill tp and dp size from extra config
@@ -1463,16 +1470,20 @@ class MooncakeConnectorWorker:
 
             return local_remote_block_port_mappings
 
-        def get_remote_port_send_num(local_remote_block_port_mappings):
-            remote_port_send_num: dict[int, dict[str, int | str]] = {}
+        def get_remote_port_send_num(
+                local_remote_block_port_mappings: dict[int, list[list[int]]]
+        ) -> dict[int, RemotePortInfo]:
+            remote_port_send_num: dict[int, RemotePortInfo] = {}
             for port in range(self._prefill_tp_size * meta.remote_pcp_size):
-                remote_host = meta.remote_multi_nodes_meta_mapping[str(port)]['host']
-                remote_port_send_num[meta.remote_port + port] = {}
-                remote_port_send_num[meta.remote_port + port]['num'] = 0
-                remote_port_send_num[meta.remote_port + port]['host'] = remote_host
-            for local_port in local_remote_block_port_mappings.keys():
-                remote_port_head_list = local_remote_block_port_mappings[
-                    local_port]
+                remote_host = str(meta.remote_multi_nodes_meta_mapping[str(
+                    port)]['host'])
+                remote_port_send_num[meta.remote_port + port] = {
+                    'num': 0,
+                    'host': remote_host
+                }
+
+            for remote_port_head_list in local_remote_block_port_mappings.values(
+            ):
                 for remote_port_list in remote_port_head_list:
                     for remote_port in remote_port_list:
                         remote_port_send_num[remote_port]['num'] += 1
diff --git a/vllm_ascend/xlite/xlite.py b/vllm_ascend/xlite/xlite.py
index f3007a6f..aa21a944 100644
--- a/vllm_ascend/xlite/xlite.py
+++ b/vllm_ascend/xlite/xlite.py
@@ -25,7 +25,7 @@ from vllm.distributed import (get_ep_group,
 from vllm.forward_context import get_forward_context
 from vllm.logger import logger
 from vllm.sequence import IntermediateTensors
-from xlite._C import (AttnMHA, Model, ModelAttnMeta, ModelConfig, Runtime,
+from xlite._C import (AttnMHA, Model, ModelAttnMeta, ModelConfig, Runtime, # type: ignore[attr-defined]
                       ScoringFuncSoftmax)
 
 import vllm_ascend.envs as envs_ascend
@@ -214,10 +214,10 @@ class QwenMoeXliteModel(LlamaXliteModel):
         config.def_dp_size = vllm_config.parallel_config.data_parallel_size
         config.moe_ep_size = ep_group.world_size if vllm_config.parallel_config.enable_expert_parallel else 1
         config.moe_tp_size = 1 if vllm_config.parallel_config.enable_expert_parallel else ep_group.world_size
-        config.experts_weight_transpose = True
+        config.experts_weight_transpose = True # type: ignore
         config.moe_intermediate_size = hf_config.moe_intermediate_size
-        config.norm_topk_prob = hf_config.norm_topk_prob
-        config.scoring_func = ScoringFuncSoftmax
+        config.norm_topk_prob = hf_config.norm_topk_prob # type: ignore
+        config.scoring_func = ScoringFuncSoftmax # type: ignore
         return config
 
     def _build_model(self, runnable: nn.Module, vllm_config: VllmConfig,