[CI] Run e2e after pre check pass (#1132)

Make sure the lint test passed before start the e2e test to save compute resource. Updated the patch doc to make sure the CI works as expect. Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-06-10 17:18:09 +08:00
parent b75cb788dd
commit 95414bae70
6 changed files with 130 additions and 262 deletions
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -1,47 +0,0 @@
-#
-# Copyright 2023 The vLLM team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Adapted from vllm-project/vllm/blob/main/.github
-#
-
-name: codespell
-
-on:
-  pull_request:
-    branches:
-      - 'main'
-      - '*-dev'
-
-jobs:
-  codespell:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.12"]
-    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install -r requirements-lint.txt
-      - name: Run codespell check
-        run: |
-          CODESPELL_EXCLUDES=('--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**')
-          CODESPELL_IGNORE_WORDS=('-L' 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn')
-
-          codespell --toml pyproject.toml "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}"
--- a/.github/workflows/mypy.yaml
+++ b/.github/workflows/mypy.yaml
@@ -1,68 +0,0 @@
-#
-# Copyright 2023 The vLLM team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Adapted from vllm-project/vllm/blob/main/.github
-#
-
-name: mypy
-
-on:
-  pull_request:
-    branches:
-      - 'main'
-      - '*-dev'
-    # This workflow is only relevant when one of the following files changes.
-    # However, we have github configured to expect and require this workflow
-    # to run and pass before github with auto-merge a pull request. Until github
-    # allows more flexible auto-merge policy, we can just run this on every PR.
-    # It doesn't take that long to run, anyway.
-    paths:
-     - '**/*.py'
-     - '.github/workflows/mypy.yaml'
-     - 'tools/mypy.sh'
-     - 'mypy.ini'
-
-jobs:
-  mypy:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        # TODO(yikun): Add 3.12 back when torch-npu support 3.12
-        python-version: ["3.9", "3.10", "3.11"]
-    steps:
-    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu
-
-    - name: Checkout vllm-project/vllm repo
-      uses: actions/checkout@v4
-      with:
-        repository: vllm-project/vllm
-        path: vllm-empty
-
-    - name: Install vllm-project/vllm from source
-      working-directory: vllm-empty
-      run: |
-        pip install -r requirements/build.txt --extra-index-url https://download.pytorch.org/whl/cpu
-        VLLM_TARGET_DEVICE=empty pip install .
-
-    - name: Mypy
-      run: |
-        echo "::add-matcher::.github/workflows/matchers/mypy.json"
-        tools/mypy.sh 1 ${{ matrix.python-version }}
--- a/.github/workflows/ruff.yml
+++ b/.github/workflows/ruff.yml
@@ -1,48 +0,0 @@
-#
-# Copyright 2023 The vLLM team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Adapted from vllm-project/vllm/blob/main/.github
-#
-
-name: ruff
-
-on:
-  pull_request:
-    branches:
-      - 'main'
-      - '*-dev'
-
-jobs:
-  ruff:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.12"]
-    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install -r requirements-lint.txt
-      - name: Analysing the code with ruff
-        run: |
-          echo "::add-matcher::.github/workflows/matchers/ruff.json"
-          ruff check --output-format github .
-      - name: Run isort
-        run: |
-          isort . --check-only
--- a/.github/workflows/vllm_ascend_test.yaml
+++ b/.github/workflows/vllm_ascend_test.yaml
@@ -15,7 +15,7 @@
 # This file is a part of the vllm-ascend project.
 #

-name: 'e2e test'
+name: 'test'

 on:
  schedule:
@@ -31,6 +31,9 @@ on:
      - '!docs/**'
      - 'pytest.ini'
      - '!benchmarks/**'
+      - 'tools/mypy.sh'
+      - 'mypy.ini'
+
 # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
 # declared as "shell: bash -el {0}" on steps that need to be properly activated.
 # It's used to activate ascend-toolkit environment variables.
@@ -39,7 +42,65 @@ defaults:
    shell: bash -el {0}

 jobs:
-  test:
+  lint:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10"]
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements-lint.txt
+      - name: Run codespell check
+        run: |
+          CODESPELL_EXCLUDES=('--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**')
+          CODESPELL_IGNORE_WORDS=('-L' 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn')
+
+          codespell --toml pyproject.toml "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}"
+      - name: Analysing the code with ruff
+        run: |
+          echo "::add-matcher::.github/workflows/matchers/ruff.json"
+          ruff check --output-format github .
+      - name: Run isort
+        run: |
+          isort . --check-only
+      - name: Running yapf
+        run: |
+          python -m pip install --upgrade pip
+          pip install toml
+          pip install yapf==0.32.0
+          yapf --diff --recursive .
+
+      - name: Install dependencies
+        run: |
+          pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu
+
+      - name: Checkout vllm-project/vllm repo
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          path: vllm-empty
+
+      - name: Install vllm-project/vllm from source
+        working-directory: vllm-empty
+        run: |
+          pip install -r requirements/build.txt --extra-index-url https://download.pytorch.org/whl/cpu
+          VLLM_TARGET_DEVICE=empty pip install .
+
+      - name: Mypy Check
+        run: |
+          echo "::add-matcher::.github/workflows/matchers/mypy.json"
+          tools/mypy.sh 1 ${{ matrix.python-version }}
+
+  e2e:
+    needs: [lint]
+    if: ${{ needs.lint.result == 'success' }}
    strategy:
      max-parallel: 2
      matrix:
--- a/.github/workflows/yapf.yml
+++ b/.github/workflows/yapf.yml
@@ -1,48 +0,0 @@
-#
-# Copyright 2023 The vLLM team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Adapted from vllm-project/vllm/blob/main/.github
-#
-
-name: yapf
-
-on:
-  pull_request:
-    branches:
-      - 'main'
-      - '*-dev'
-    paths:
-      - "**/*.py"
-      - .github/workflows/yapf.yml
-
-jobs:
-  yapf:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.12"]
-    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install toml
-          pip install yapf==0.32.0
-      - name: Running yapf
-        run: |
-          yapf --diff --recursive .
--- a/vllm_ascend/patch/init.py
+++ b/vllm_ascend/patch/init.py
@@ -35,6 +35,17 @@
 # --------------------------------
 # * Platform Patch:
 # =================
+# ** File: platform/patch_0_9_0/patch_distributed.py**
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#   1. `vllm.distributed.utils.stateless_init_torch_distributed_process_group()`
+#    Why:
+#       vllm distributed use gloo backend by default to initialize stateless process group, but we want to use hccl here
+#    How：
+#       Add hccl backend to the `stateless_init_torch_distributed_process_group`
+#    Related PR (if no, explain why):
+#       https://github.com/vllm-project/vllm/pull/18763
+#    Future Plan:
+#       Remove this patch once vllm is upgraded to 0.9.1
 # ** File: platform/patch_common/patch_distributed.py**
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #   1. `vllm.distributed.parallel_state.destroy_model_parallel()`
@@ -44,56 +55,40 @@
 #       platform owned `CoordinatorGroup` to make sure all the CoordinateGroup can be properly destroyed
 #    How：
 #       Call `vllm_ascend.distributed.parallel_state method `destroy_platform_model_parallel` to destroy all the `CoordinateGroup`
-#    Related PR (if no, explain why): no related PR, we want add this ability into vllm
+#    Related PR (if no, explain why):
 #    Future Plan:
 #       Remove those patch when vllm merged them
-#   2. `vllm.distributed.stateless_init_torch_distributed_process_group()`
+#   2. `vllm.v1.engine.core.DPEngineCoreProc._init_data_parallel`
 #    Why:
-#       The stateless process group can not be initialized except from gloo and nccl backend, vllm-ascend
-#       needs to initialize its own stateless process group for communication, so we add the platform related
-#       call to the `stateless_init_torch_distributed_process_group`, to enable other platform which may support
-#       stateless process group initialize method
+#       There is some bug for ASCEND_RT_VISIBLE_DEVICES usage.
 #    How：
-#       rewrite stateless_init_torch_distributed_process_group to judge if there is a stateless process group initialize
-#       method and call platform method `platform_register_backend` to initialize them
-#    Related PR (if no, explain why): no related PR, we want add this ability into vllm
+#       The ASCEND_RT_VISIBLE_DEVICES related code is dropped.
+#    Related PR (if no, explain why):
+#       No, this is a bug for vllm ascend
+#    Future Plan:
+#       Remove this patch once ASCEND_RT_VISIBLE_DEVICES bug is fixed.
+#   3. `vllm.config.ParallelConfig.get_next_dp_init_port`
+#    Why:
+#       vllm doesn't support get port from environment.
+#    How：
+#       Add the logic to get port from environment.
+#    Related PR (if no, explain why):
+#       Need a PR to vllm to support get port from environment.
 #    Future Plan:
 #       Remove those patch when vllm merged them
-#   3. `ParallelConfig.get_next_dp_init_port`
-#    Why:
-#       We want to get dp port from env variable, so the multi-node inference can be properly initialized and run.
-#    How：
-#       Get the dp port from env variable enable multi-mode dp inference
-#    Related PR (if no, explain why): no related PR, we want add this ability into vllm
-#    Future Plan:
-#       Its a workaround in vllm-ascend to enable multi-node dp inference, maybe removed if vllm have better plan
-#       on multi-node dp inference implementation
-#   4. `ParallelConfig.stateless_init_dp_group`
+#   4. `vllm.config.ParallelConfig.ParallelConfig.stateless_init_dp_group`
 #    Why:
 #       vLLM use gloo backend by default to initialize stateless dp process gourp, but we want to use hccl here to
 #       get better performance
 #    How：
-#       adopt nccl backend to init process group
-#    Related PR (if no, explain why): no related PR, we want add this ability into vllm
+#       adopt nccl backend to init process group.(Now we still use gloo, it's just a placeholder, we'll use nccl in the future)
+#    Related PR (if no, explain why):
+#       Need a PR to vllm to support more backend.
 #    Future Plan:
-#       Remove those patch when vllm merged them
-#
+#       Remove those patch when vllm support more backend.
 #
 # * Worker Patch:
 # ===============
-# ** File: worker/patch_common/patch_metrics.py **
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#   1. `vllm.spec_decode.metrics.AsyncMetricsCollector.maybe_collect_rejsample_metrics`
-#    Why:
-#       There are cuda hard code (current_platform.is_cuda_alike()) in
-#       `AsyncMetricsCollector.maybe_collect_rejsample_metrics`
-#    How：
-#       Change to use `current_platform.Event` to determine whether to return None
-#    Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
-#       https://github.com/vllm-project/vllm/pull/14411
-#    Future Plan:
-#       Revert it when the related pr is merged in vllm.
-#
 # ** File: worker/patch_common/patch_minicpm.py **
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #   1. `vllm.model_executor.models.minicpm.MiniCPMAttention.forward`
@@ -103,7 +98,7 @@
 #       However float32 is not supported in cann rope op, thus we keep this patch
 #    How：
 #       Removed the dtype convert operations in forward
-#    Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
+#    Related PR (if no, explain why):
 #       NO, only for npu due to rope op.
 #    Future Plan:
 #       Keep this patch in vllm-ascend.
@@ -119,7 +114,7 @@
 #       - support attention metadata register to the set supported spec decode
 #       - offer a api in platform to determine whether spec decode is supported,
 #         and deprecate is_cuda_alike in it.
-#    Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
+#    Related PR (if no, explain why):
 #       - https://github.com/vllm-project/vllm/pull/15195
 #       - https://github.com/vllm-project/vllm-ascend/pull/395
 #    Future Plan:
@@ -131,14 +126,14 @@
 #       vLLM `Remove Sampler from Model Code` so vllm-ascend needs adapt to this change.
 #    How：
 #       Use vLLM 0.8.4 method to patch it.
-#    Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
+#    Related PR (if no, explain why):
 #       - https://github.com/vllm-project/vllm/pull/15195
 #       - https://github.com/vllm-project/vllm-ascend/pull/395
 #    Future Plan:
 #       Remove it when we identify the reasons clearly.
 #
 # ** File: worker/patch_common/patch_spec_decode_worker.py **
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #   1. `vllm.spec_decode.spec_decode_worker.SpecDecodeWorker.create_worker`
 #    Why:
 #       We need to use the patched `TP1DraftModelRunner` in `SpecDecodeWorker.create_worker`.
@@ -146,14 +141,14 @@
 #           `FlashAttentionMetadata`
 #    How：
 #       ditto
-#    Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
+#    Related PR (if no, explain why):
 #       - https://github.com/vllm-project/vllm/pull/15195
 #       - https://github.com/vllm-project/vllm-ascend/pull/395
 #    Future Plan:
 #       Revert it when the related pr is merged in vllm and vllm-ascend.
 #
 # ** File: worker/patch_common/patch_eagle.py **
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #   1. `vllm.v1.spec_decode.eagle.prepare_inputs`
 #    Why:
 #       We need to use the patched `prepare_input_kernel` in `eagle.prepare_inputs`.
@@ -161,12 +156,12 @@
 #       kernel, ascend is now not support triton kernel.
 #    How：
 #       Re-implementation the `prepare_input_kernel` triton kernel by pytorch
-#    Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
+#    Related PR (if no, explain why):
 #       - Ascend doesn't support triton
 #    Future Plan:
 #       Revert it when the ascend support triton kernel.
 #
-# ** File: v1/sample/sampler.py **
+# ** File: worker/patch_common/patch_sampler.py **
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #   1. `vllm.v1.sample.sampler.Sampler.apply_top_k_top_p`
 #    Why:
@@ -175,21 +170,44 @@
 #       to improve performance.
 #    How：
 #       Re-implementation the `apply_top_k_top_p` function by pytorch
-#    Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
+#    Related PR (if no, explain why):
 #       - https://github.com/vllm-project/vllm-ascend/pull/970
 #    Future Plan:
 #       Revert it when the ascend scatter performance improves.
 #
-# ** File: v1/sample/sampler.py **
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~s
-#   1. `vllm.v1.sample.sampler.Sampler.apply_min_p`
+#   2. `vllm.v1.sample.sampler.Sampler.apply_min_p`
 #    Why:
 #       We need to use the patched `apply_min_p` in `sample`.
 #       The mainly reason to overwrite `apply_min_p` is
 #       to improve performance.
 #    How：
 #       Re-implementation the `apply_min_p` function by pytorch
-#    Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
+#    Related PR (if no, explain why):
 #       - https://github.com/vllm-project/vllm-ascend/pull/970
 #    Future Plan:
 #       Revert it when the ascend indexput performance improves.
+#
+# ** File: worker/patch_common/patch_distributed.py **
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#   1. `vllm.distributed.parallel_state.GroupCoordinator`
+#    Why:
+#       vllm doesn't support all_to_all for GroupCoordinator.
+#    How：
+#       Add all_to_all implementation for GroupCoordinator.
+#    Related PR (if no, explain why):
+#       Need a PR to vllm to support all_to_all for GroupCoordinator.
+#    Future Plan:
+#       Remove this patch when vllm merged them.
+#
+# ** File: worker/patch_common/patch_utils.py **
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#   1. `vllm.utils.direct_register_custom_op`
+#    Why:
+#       pytorch 2.7.o is not compatible with pytorch 2.5.1. While vllm is based on pytorch 2.7.0, but vllm ascend
+#       is based on pytorch 2.5.1, so we need to use this patch to make vllm compatible with pytorch 2.5.1.
+#    How：
+#       patch __annotations__ check to make it compatible with pytorch 2.5.1.
+#    Related PR (if no, explain why):
+#       This is the problem in vllm-ascend
+#    Future Plan:
+#       Remove this patch once pytorch 2.7.0 is supported for vllm ascend.