[CI] Run e2e after pre check pass (#1132)
Make sure the lint test passed before start the e2e test to save compute resource. Updated the patch doc to make sure the CI works as expect. Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
47
.github/workflows/codespell.yml
vendored
47
.github/workflows/codespell.yml
vendored
@@ -1,47 +0,0 @@
|
||||
#
|
||||
# Copyright 2023 The vLLM team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# Adapted from vllm-project/vllm/blob/main/.github
|
||||
#
|
||||
|
||||
name: codespell
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
|
||||
jobs:
|
||||
codespell:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.12"]
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements-lint.txt
|
||||
- name: Run codespell check
|
||||
run: |
|
||||
CODESPELL_EXCLUDES=('--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**')
|
||||
CODESPELL_IGNORE_WORDS=('-L' 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn')
|
||||
|
||||
codespell --toml pyproject.toml "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}"
|
||||
68
.github/workflows/mypy.yaml
vendored
68
.github/workflows/mypy.yaml
vendored
@@ -1,68 +0,0 @@
|
||||
#
|
||||
# Copyright 2023 The vLLM team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# Adapted from vllm-project/vllm/blob/main/.github
|
||||
#
|
||||
|
||||
name: mypy
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
# This workflow is only relevant when one of the following files changes.
|
||||
# However, we have github configured to expect and require this workflow
|
||||
# to run and pass before github with auto-merge a pull request. Until github
|
||||
# allows more flexible auto-merge policy, we can just run this on every PR.
|
||||
# It doesn't take that long to run, anyway.
|
||||
paths:
|
||||
- '**/*.py'
|
||||
- '.github/workflows/mypy.yaml'
|
||||
- 'tools/mypy.sh'
|
||||
- 'mypy.ini'
|
||||
|
||||
jobs:
|
||||
mypy:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
# TODO(yikun): Add 3.12 back when torch-npu support 3.12
|
||||
python-version: ["3.9", "3.10", "3.11"]
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
|
||||
- name: Checkout vllm-project/vllm repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: vllm-project/vllm
|
||||
path: vllm-empty
|
||||
|
||||
- name: Install vllm-project/vllm from source
|
||||
working-directory: vllm-empty
|
||||
run: |
|
||||
pip install -r requirements/build.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
VLLM_TARGET_DEVICE=empty pip install .
|
||||
|
||||
- name: Mypy
|
||||
run: |
|
||||
echo "::add-matcher::.github/workflows/matchers/mypy.json"
|
||||
tools/mypy.sh 1 ${{ matrix.python-version }}
|
||||
48
.github/workflows/ruff.yml
vendored
48
.github/workflows/ruff.yml
vendored
@@ -1,48 +0,0 @@
|
||||
#
|
||||
# Copyright 2023 The vLLM team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# Adapted from vllm-project/vllm/blob/main/.github
|
||||
#
|
||||
|
||||
name: ruff
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
|
||||
jobs:
|
||||
ruff:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.12"]
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements-lint.txt
|
||||
- name: Analysing the code with ruff
|
||||
run: |
|
||||
echo "::add-matcher::.github/workflows/matchers/ruff.json"
|
||||
ruff check --output-format github .
|
||||
- name: Run isort
|
||||
run: |
|
||||
isort . --check-only
|
||||
65
.github/workflows/vllm_ascend_test.yaml
vendored
65
.github/workflows/vllm_ascend_test.yaml
vendored
@@ -15,7 +15,7 @@
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
name: 'e2e test'
|
||||
name: 'test'
|
||||
|
||||
on:
|
||||
schedule:
|
||||
@@ -31,6 +31,9 @@ on:
|
||||
- '!docs/**'
|
||||
- 'pytest.ini'
|
||||
- '!benchmarks/**'
|
||||
- 'tools/mypy.sh'
|
||||
- 'mypy.ini'
|
||||
|
||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||
# It's used to activate ascend-toolkit environment variables.
|
||||
@@ -39,7 +42,65 @@ defaults:
|
||||
shell: bash -el {0}
|
||||
|
||||
jobs:
|
||||
test:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements-lint.txt
|
||||
- name: Run codespell check
|
||||
run: |
|
||||
CODESPELL_EXCLUDES=('--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**')
|
||||
CODESPELL_IGNORE_WORDS=('-L' 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn')
|
||||
|
||||
codespell --toml pyproject.toml "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}"
|
||||
- name: Analysing the code with ruff
|
||||
run: |
|
||||
echo "::add-matcher::.github/workflows/matchers/ruff.json"
|
||||
ruff check --output-format github .
|
||||
- name: Run isort
|
||||
run: |
|
||||
isort . --check-only
|
||||
- name: Running yapf
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install toml
|
||||
pip install yapf==0.32.0
|
||||
yapf --diff --recursive .
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
|
||||
- name: Checkout vllm-project/vllm repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: vllm-project/vllm
|
||||
path: vllm-empty
|
||||
|
||||
- name: Install vllm-project/vllm from source
|
||||
working-directory: vllm-empty
|
||||
run: |
|
||||
pip install -r requirements/build.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
VLLM_TARGET_DEVICE=empty pip install .
|
||||
|
||||
- name: Mypy Check
|
||||
run: |
|
||||
echo "::add-matcher::.github/workflows/matchers/mypy.json"
|
||||
tools/mypy.sh 1 ${{ matrix.python-version }}
|
||||
|
||||
e2e:
|
||||
needs: [lint]
|
||||
if: ${{ needs.lint.result == 'success' }}
|
||||
strategy:
|
||||
max-parallel: 2
|
||||
matrix:
|
||||
|
||||
48
.github/workflows/yapf.yml
vendored
48
.github/workflows/yapf.yml
vendored
@@ -1,48 +0,0 @@
|
||||
#
|
||||
# Copyright 2023 The vLLM team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# Adapted from vllm-project/vllm/blob/main/.github
|
||||
#
|
||||
|
||||
name: yapf
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
paths:
|
||||
- "**/*.py"
|
||||
- .github/workflows/yapf.yml
|
||||
|
||||
jobs:
|
||||
yapf:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.12"]
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install toml
|
||||
pip install yapf==0.32.0
|
||||
- name: Running yapf
|
||||
run: |
|
||||
yapf --diff --recursive .
|
||||
@@ -35,6 +35,17 @@
|
||||
# --------------------------------
|
||||
# * Platform Patch:
|
||||
# =================
|
||||
# ** File: platform/patch_0_9_0/patch_distributed.py**
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# 1. `vllm.distributed.utils.stateless_init_torch_distributed_process_group()`
|
||||
# Why:
|
||||
# vllm distributed use gloo backend by default to initialize stateless process group, but we want to use hccl here
|
||||
# How:
|
||||
# Add hccl backend to the `stateless_init_torch_distributed_process_group`
|
||||
# Related PR (if no, explain why):
|
||||
# https://github.com/vllm-project/vllm/pull/18763
|
||||
# Future Plan:
|
||||
# Remove this patch once vllm is upgraded to 0.9.1
|
||||
# ** File: platform/patch_common/patch_distributed.py**
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# 1. `vllm.distributed.parallel_state.destroy_model_parallel()`
|
||||
@@ -44,56 +55,40 @@
|
||||
# platform owned `CoordinatorGroup` to make sure all the CoordinateGroup can be properly destroyed
|
||||
# How:
|
||||
# Call `vllm_ascend.distributed.parallel_state method `destroy_platform_model_parallel` to destroy all the `CoordinateGroup`
|
||||
# Related PR (if no, explain why): no related PR, we want add this ability into vllm
|
||||
# Related PR (if no, explain why):
|
||||
# Future Plan:
|
||||
# Remove those patch when vllm merged them
|
||||
# 2. `vllm.distributed.stateless_init_torch_distributed_process_group()`
|
||||
# 2. `vllm.v1.engine.core.DPEngineCoreProc._init_data_parallel`
|
||||
# Why:
|
||||
# The stateless process group can not be initialized except from gloo and nccl backend, vllm-ascend
|
||||
# needs to initialize its own stateless process group for communication, so we add the platform related
|
||||
# call to the `stateless_init_torch_distributed_process_group`, to enable other platform which may support
|
||||
# stateless process group initialize method
|
||||
# There is some bug for ASCEND_RT_VISIBLE_DEVICES usage.
|
||||
# How:
|
||||
# rewrite stateless_init_torch_distributed_process_group to judge if there is a stateless process group initialize
|
||||
# method and call platform method `platform_register_backend` to initialize them
|
||||
# Related PR (if no, explain why): no related PR, we want add this ability into vllm
|
||||
# The ASCEND_RT_VISIBLE_DEVICES related code is dropped.
|
||||
# Related PR (if no, explain why):
|
||||
# No, this is a bug for vllm ascend
|
||||
# Future Plan:
|
||||
# Remove this patch once ASCEND_RT_VISIBLE_DEVICES bug is fixed.
|
||||
# 3. `vllm.config.ParallelConfig.get_next_dp_init_port`
|
||||
# Why:
|
||||
# vllm doesn't support get port from environment.
|
||||
# How:
|
||||
# Add the logic to get port from environment.
|
||||
# Related PR (if no, explain why):
|
||||
# Need a PR to vllm to support get port from environment.
|
||||
# Future Plan:
|
||||
# Remove those patch when vllm merged them
|
||||
# 3. `ParallelConfig.get_next_dp_init_port`
|
||||
# Why:
|
||||
# We want to get dp port from env variable, so the multi-node inference can be properly initialized and run.
|
||||
# How:
|
||||
# Get the dp port from env variable enable multi-mode dp inference
|
||||
# Related PR (if no, explain why): no related PR, we want add this ability into vllm
|
||||
# Future Plan:
|
||||
# Its a workaround in vllm-ascend to enable multi-node dp inference, maybe removed if vllm have better plan
|
||||
# on multi-node dp inference implementation
|
||||
# 4. `ParallelConfig.stateless_init_dp_group`
|
||||
# 4. `vllm.config.ParallelConfig.ParallelConfig.stateless_init_dp_group`
|
||||
# Why:
|
||||
# vLLM use gloo backend by default to initialize stateless dp process gourp, but we want to use hccl here to
|
||||
# get better performance
|
||||
# How:
|
||||
# adopt nccl backend to init process group
|
||||
# Related PR (if no, explain why): no related PR, we want add this ability into vllm
|
||||
# adopt nccl backend to init process group.(Now we still use gloo, it's just a placeholder, we'll use nccl in the future)
|
||||
# Related PR (if no, explain why):
|
||||
# Need a PR to vllm to support more backend.
|
||||
# Future Plan:
|
||||
# Remove those patch when vllm merged them
|
||||
#
|
||||
# Remove those patch when vllm support more backend.
|
||||
#
|
||||
# * Worker Patch:
|
||||
# ===============
|
||||
# ** File: worker/patch_common/patch_metrics.py **
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# 1. `vllm.spec_decode.metrics.AsyncMetricsCollector.maybe_collect_rejsample_metrics`
|
||||
# Why:
|
||||
# There are cuda hard code (current_platform.is_cuda_alike()) in
|
||||
# `AsyncMetricsCollector.maybe_collect_rejsample_metrics`
|
||||
# How:
|
||||
# Change to use `current_platform.Event` to determine whether to return None
|
||||
# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
|
||||
# https://github.com/vllm-project/vllm/pull/14411
|
||||
# Future Plan:
|
||||
# Revert it when the related pr is merged in vllm.
|
||||
#
|
||||
# ** File: worker/patch_common/patch_minicpm.py **
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# 1. `vllm.model_executor.models.minicpm.MiniCPMAttention.forward`
|
||||
@@ -103,7 +98,7 @@
|
||||
# However float32 is not supported in cann rope op, thus we keep this patch
|
||||
# How:
|
||||
# Removed the dtype convert operations in forward
|
||||
# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
|
||||
# Related PR (if no, explain why):
|
||||
# NO, only for npu due to rope op.
|
||||
# Future Plan:
|
||||
# Keep this patch in vllm-ascend.
|
||||
@@ -119,7 +114,7 @@
|
||||
# - support attention metadata register to the set supported spec decode
|
||||
# - offer a api in platform to determine whether spec decode is supported,
|
||||
# and deprecate is_cuda_alike in it.
|
||||
# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
|
||||
# Related PR (if no, explain why):
|
||||
# - https://github.com/vllm-project/vllm/pull/15195
|
||||
# - https://github.com/vllm-project/vllm-ascend/pull/395
|
||||
# Future Plan:
|
||||
@@ -131,14 +126,14 @@
|
||||
# vLLM `Remove Sampler from Model Code` so vllm-ascend needs adapt to this change.
|
||||
# How:
|
||||
# Use vLLM 0.8.4 method to patch it.
|
||||
# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
|
||||
# Related PR (if no, explain why):
|
||||
# - https://github.com/vllm-project/vllm/pull/15195
|
||||
# - https://github.com/vllm-project/vllm-ascend/pull/395
|
||||
# Future Plan:
|
||||
# Remove it when we identify the reasons clearly.
|
||||
#
|
||||
# ** File: worker/patch_common/patch_spec_decode_worker.py **
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# 1. `vllm.spec_decode.spec_decode_worker.SpecDecodeWorker.create_worker`
|
||||
# Why:
|
||||
# We need to use the patched `TP1DraftModelRunner` in `SpecDecodeWorker.create_worker`.
|
||||
@@ -146,14 +141,14 @@
|
||||
# `FlashAttentionMetadata`
|
||||
# How:
|
||||
# ditto
|
||||
# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
|
||||
# Related PR (if no, explain why):
|
||||
# - https://github.com/vllm-project/vllm/pull/15195
|
||||
# - https://github.com/vllm-project/vllm-ascend/pull/395
|
||||
# Future Plan:
|
||||
# Revert it when the related pr is merged in vllm and vllm-ascend.
|
||||
#
|
||||
# ** File: worker/patch_common/patch_eagle.py **
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# 1. `vllm.v1.spec_decode.eagle.prepare_inputs`
|
||||
# Why:
|
||||
# We need to use the patched `prepare_input_kernel` in `eagle.prepare_inputs`.
|
||||
@@ -161,12 +156,12 @@
|
||||
# kernel, ascend is now not support triton kernel.
|
||||
# How:
|
||||
# Re-implementation the `prepare_input_kernel` triton kernel by pytorch
|
||||
# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
|
||||
# Related PR (if no, explain why):
|
||||
# - Ascend doesn't support triton
|
||||
# Future Plan:
|
||||
# Revert it when the ascend support triton kernel.
|
||||
#
|
||||
# ** File: v1/sample/sampler.py **
|
||||
# ** File: worker/patch_common/patch_sampler.py **
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# 1. `vllm.v1.sample.sampler.Sampler.apply_top_k_top_p`
|
||||
# Why:
|
||||
@@ -175,21 +170,44 @@
|
||||
# to improve performance.
|
||||
# How:
|
||||
# Re-implementation the `apply_top_k_top_p` function by pytorch
|
||||
# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
|
||||
# Related PR (if no, explain why):
|
||||
# - https://github.com/vllm-project/vllm-ascend/pull/970
|
||||
# Future Plan:
|
||||
# Revert it when the ascend scatter performance improves.
|
||||
#
|
||||
# ** File: v1/sample/sampler.py **
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~s
|
||||
# 1. `vllm.v1.sample.sampler.Sampler.apply_min_p`
|
||||
# 2. `vllm.v1.sample.sampler.Sampler.apply_min_p`
|
||||
# Why:
|
||||
# We need to use the patched `apply_min_p` in `sample`.
|
||||
# The mainly reason to overwrite `apply_min_p` is
|
||||
# to improve performance.
|
||||
# How:
|
||||
# Re-implementation the `apply_min_p` function by pytorch
|
||||
# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
|
||||
# Related PR (if no, explain why):
|
||||
# - https://github.com/vllm-project/vllm-ascend/pull/970
|
||||
# Future Plan:
|
||||
# Revert it when the ascend indexput performance improves.
|
||||
#
|
||||
# ** File: worker/patch_common/patch_distributed.py **
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# 1. `vllm.distributed.parallel_state.GroupCoordinator`
|
||||
# Why:
|
||||
# vllm doesn't support all_to_all for GroupCoordinator.
|
||||
# How:
|
||||
# Add all_to_all implementation for GroupCoordinator.
|
||||
# Related PR (if no, explain why):
|
||||
# Need a PR to vllm to support all_to_all for GroupCoordinator.
|
||||
# Future Plan:
|
||||
# Remove this patch when vllm merged them.
|
||||
#
|
||||
# ** File: worker/patch_common/patch_utils.py **
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# 1. `vllm.utils.direct_register_custom_op`
|
||||
# Why:
|
||||
# pytorch 2.7.o is not compatible with pytorch 2.5.1. While vllm is based on pytorch 2.7.0, but vllm ascend
|
||||
# is based on pytorch 2.5.1, so we need to use this patch to make vllm compatible with pytorch 2.5.1.
|
||||
# How:
|
||||
# patch __annotations__ check to make it compatible with pytorch 2.5.1.
|
||||
# Related PR (if no, explain why):
|
||||
# This is the problem in vllm-ascend
|
||||
# Future Plan:
|
||||
# Remove this patch once pytorch 2.7.0 is supported for vllm ascend.
|
||||
|
||||
Reference in New Issue
Block a user