[1/N][CI] Move linting system to pre-commits hooks (#1256)
### What this PR does / why we need it?
Follow vllm-project/vllm lint way:
https://github.com/vllm-project/vllm/blob/main/.pre-commit-config.yaml
Enable pre-commit to avoid some low level error AMAP.
This pr is one step of #1241, The purpose is make linting system more
clear and convenient, on this step, Mainly did the following things:
yapf, actionlint, ruff, typos, isort, mypy, png-lint, signoff-commit,
enforce-import-regex-instead-of-re.
TODO:
- clang-format(check for csrc with google style)
need clean code, disable for now
- pymarkdown
need clean code, disable for now
- shellcheck
need clean code, disable for now
### Does this PR introduce _any_ user-facing change?
Only developer UX change:
https://vllm-ascend--1256.org.readthedocs.build/en/1256/developer_guide/contributing.html#run-lint-locally
```
pip install -r requirements-lint.txt && pre-commit install
bash format.sh
```
### How was this patch tested?
CI passed with new added/existing test.
Co-authored-by: Yikun [yikunkero@gmail.com](mailto:yikunkero@gmail.com)
Co-authored-by: wangli
[wangli858794774@gmail.com](mailto:wangli858794774@gmail.com)
- vLLM version: v0.9.1
- vLLM main:
5358cce5ff
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
@@ -95,6 +95,6 @@ body:
|
|||||||
|
|
||||||
- [ ] Upload 310p wheel to Github release page
|
- [ ] Upload 310p wheel to Github release page
|
||||||
|
|
||||||
- [ ] Brodcast the release news (By message, blog , etc)
|
- [ ] Broadcast the release news (By message, blog , etc)
|
||||||
|
|
||||||
- [ ] Close this issue
|
- [ ] Close this issue
|
||||||
|
|||||||
33
.github/workflows/doc_codespell.yaml
vendored
33
.github/workflows/doc_codespell.yaml
vendored
@@ -1,33 +0,0 @@
|
|||||||
|
|
||||||
name: 'doc-codespell'
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
branches:
|
|
||||||
- 'main'
|
|
||||||
- '*-dev'
|
|
||||||
paths:
|
|
||||||
- 'docs/**'
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
codespell:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
python-version: ["3.10"]
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
|
||||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
|
||||||
with:
|
|
||||||
python-version: ${{ matrix.python-version }}
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
python -m pip install --upgrade pip
|
|
||||||
pip install -r requirements-lint.txt
|
|
||||||
- name: Run codespell check
|
|
||||||
run: |
|
|
||||||
CODESPELL_EXCLUDES=('--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**')
|
|
||||||
CODESPELL_IGNORE_WORDS=('-L' 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,assertIn,rever')
|
|
||||||
|
|
||||||
codespell --toml pyproject.toml "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}"
|
|
||||||
37
.github/workflows/pre-commit.yml
vendored
Normal file
37
.github/workflows/pre-commit.yml
vendored
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
name: pre-commit
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_call:
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
pre-commit:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout vllm-project/vllm-ascend repo
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
- uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
|
||||||
|
with:
|
||||||
|
python-version: "3.10"
|
||||||
|
- run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
|
||||||
|
- run: echo "::add-matcher::.github/workflows/matchers/mypy.json"
|
||||||
|
- name: Checkout vllm-project/vllm repo
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
repository: vllm-project/vllm
|
||||||
|
path: ./vllm-empty
|
||||||
|
- name: Install vllm
|
||||||
|
working-directory: vllm-empty
|
||||||
|
run: |
|
||||||
|
pip install -r requirements/build.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
||||||
|
VLLM_TARGET_DEVICE=empty pip install .
|
||||||
|
- name: Install vllm-ascend dev
|
||||||
|
run: |
|
||||||
|
pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
||||||
|
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||||
|
env:
|
||||||
|
SHELLCHECK_OPTS: "--exclude=SC2046,SC2006,SC2086" # Exclude SC2046, SC2006, SC2086 for actionlint
|
||||||
|
with:
|
||||||
|
extra_args: --all-files --hook-stage manual
|
||||||
49
.github/workflows/shellcheck.yml
vendored
49
.github/workflows/shellcheck.yml
vendored
@@ -1,49 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright 2023 The vLLM team.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
# Adapted from vllm-project/vllm/blob/main/.github
|
|
||||||
#
|
|
||||||
|
|
||||||
name: Lint shell scripts
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
branches:
|
|
||||||
- 'main'
|
|
||||||
- '*-dev'
|
|
||||||
paths:
|
|
||||||
- '**/*.sh'
|
|
||||||
- '.github/workflows/shellcheck.yml'
|
|
||||||
|
|
||||||
env:
|
|
||||||
LC_ALL: en_US.UTF-8
|
|
||||||
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
shellcheck:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: "Checkout"
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: "Check shell scripts"
|
|
||||||
run: |
|
|
||||||
tools/shellcheck.sh
|
|
||||||
62
.github/workflows/vllm_ascend_test.yaml
vendored
62
.github/workflows/vllm_ascend_test.yaml
vendored
@@ -69,67 +69,7 @@ jobs:
|
|||||||
lint:
|
lint:
|
||||||
# Only trigger lint on pull request
|
# Only trigger lint on pull request
|
||||||
if: ${{ github.event_name == 'pull_request' }}
|
if: ${{ github.event_name == 'pull_request' }}
|
||||||
runs-on: ubuntu-latest
|
uses: ./.github/workflows/pre-commit.yml
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
python-version: ["3.10"]
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
|
||||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
|
||||||
with:
|
|
||||||
python-version: ${{ matrix.python-version }}
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
python -m pip install --upgrade pip
|
|
||||||
pip install -r requirements-lint.txt
|
|
||||||
- name: Run codespell check
|
|
||||||
run: |
|
|
||||||
CODESPELL_EXCLUDES=('--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**')
|
|
||||||
CODESPELL_IGNORE_WORDS=('-L' 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,assertIn,rever')
|
|
||||||
|
|
||||||
codespell --toml pyproject.toml "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}"
|
|
||||||
- name: Analysing the code with ruff
|
|
||||||
run: |
|
|
||||||
echo "::add-matcher::.github/workflows/matchers/ruff.json"
|
|
||||||
ruff check --output-format github .
|
|
||||||
- name: Run isort
|
|
||||||
run: |
|
|
||||||
isort . --check-only
|
|
||||||
- name: Running yapf
|
|
||||||
run: |
|
|
||||||
python -m pip install --upgrade pip
|
|
||||||
pip install toml
|
|
||||||
pip install yapf==0.32.0
|
|
||||||
yapf --diff --recursive .
|
|
||||||
|
|
||||||
- name: Checkout vllm-project/vllm repo
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
repository: vllm-project/vllm
|
|
||||||
path: vllm-empty
|
|
||||||
|
|
||||||
- name: Actionlint Check
|
|
||||||
env:
|
|
||||||
SHELLCHECK_OPTS: --exclude=SC2046,SC2006,SC2086
|
|
||||||
run: |
|
|
||||||
echo "::add-matcher::.github/workflows/matchers/actionlint.json"
|
|
||||||
tools/actionlint.sh -color
|
|
||||||
|
|
||||||
- name: Install vllm-project/vllm from source
|
|
||||||
working-directory: vllm-empty
|
|
||||||
run: |
|
|
||||||
pip install -r requirements/build.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
|
||||||
VLLM_TARGET_DEVICE=empty pip install .
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
|
||||||
|
|
||||||
- name: Mypy Check
|
|
||||||
run: |
|
|
||||||
echo "::add-matcher::.github/workflows/matchers/mypy.json"
|
|
||||||
tools/mypy.sh 1 ${{ matrix.python-version }}
|
|
||||||
|
|
||||||
ut:
|
ut:
|
||||||
needs: [lint]
|
needs: [lint]
|
||||||
|
|||||||
141
.pre-commit-config.yaml
Normal file
141
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
default_install_hook_types:
|
||||||
|
- pre-commit
|
||||||
|
- commit-msg
|
||||||
|
default_stages:
|
||||||
|
- pre-commit # Run locally
|
||||||
|
- manual # Run in CI
|
||||||
|
exclude: 'examples/.*' # Exclude examples from all hooks by default
|
||||||
|
repos:
|
||||||
|
- repo: https://github.com/codespell-project/codespell
|
||||||
|
rev: v2.4.1
|
||||||
|
hooks:
|
||||||
|
- id: codespell
|
||||||
|
args: [
|
||||||
|
--toml, pyproject.toml,
|
||||||
|
'--skip', 'tests/e2e/multicard/test_torchair_graph_mode.py,tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**,.github/**,typos.toml',
|
||||||
|
'-L', 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn'
|
||||||
|
]
|
||||||
|
additional_dependencies:
|
||||||
|
- tomli
|
||||||
|
- repo: https://github.com/google/yapf
|
||||||
|
rev: v0.43.0
|
||||||
|
hooks:
|
||||||
|
- id: yapf
|
||||||
|
args: [--in-place, --verbose]
|
||||||
|
# Keep the same list from yapfignore here to avoid yapf failing without any inputs
|
||||||
|
exclude: '(.github|benchmarks|examples|docs)/.*'
|
||||||
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
|
rev: v0.11.7
|
||||||
|
hooks:
|
||||||
|
- id: ruff
|
||||||
|
args: [--output-format, github, --fix]
|
||||||
|
- id: ruff-format
|
||||||
|
files: ^(benchmarks|examples)/.*
|
||||||
|
- repo: https://github.com/crate-ci/typos
|
||||||
|
rev: v1.32.0
|
||||||
|
hooks:
|
||||||
|
- id: typos
|
||||||
|
- repo: https://github.com/PyCQA/isort
|
||||||
|
rev: 6.0.1
|
||||||
|
hooks:
|
||||||
|
- id: isort
|
||||||
|
# - repo: https://github.com/pre-commit/mirrors-clang-format
|
||||||
|
# rev: v20.1.3
|
||||||
|
# hooks:
|
||||||
|
# - id: clang-format
|
||||||
|
# files: ^csrc/.*\.(cpp|hpp|cc|hh|cxx|hxx)$
|
||||||
|
# types_or: [c++]
|
||||||
|
# args: [--style=google, --verbose]
|
||||||
|
# - repo: https://github.com/jackdewinter/pymarkdown
|
||||||
|
# rev: v0.9.29
|
||||||
|
# hooks:
|
||||||
|
# - id: pymarkdown
|
||||||
|
# args: [fix]
|
||||||
|
- repo: https://github.com/rhysd/actionlint
|
||||||
|
rev: v1.7.7
|
||||||
|
hooks:
|
||||||
|
- id: actionlint
|
||||||
|
- repo: local
|
||||||
|
hooks:
|
||||||
|
# For local development, you can run mypy using tools/mypy.sh script if needed.
|
||||||
|
# - id: mypy-local
|
||||||
|
# name: Run mypy for local Python installation
|
||||||
|
# entry: tools/mypy.sh 0 "local"
|
||||||
|
# language: system
|
||||||
|
# types: [python]
|
||||||
|
# stages: [pre-commit] # Don't run in CI
|
||||||
|
- id: mypy-3.9 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
|
||||||
|
name: Run mypy for Python 3.9
|
||||||
|
entry: tools/mypy.sh 1 "3.9"
|
||||||
|
# Use system python because vllm installation is required
|
||||||
|
language: system
|
||||||
|
types: [python]
|
||||||
|
stages: [manual] # Only run in CI
|
||||||
|
- id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
|
||||||
|
name: Run mypy for Python 3.10
|
||||||
|
entry: tools/mypy.sh 1 "3.10"
|
||||||
|
# Use system python because vllm installation is required
|
||||||
|
language: system
|
||||||
|
types: [python]
|
||||||
|
stages: [manual] # Only run in CI
|
||||||
|
- id: mypy-3.11 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
|
||||||
|
name: Run mypy for Python 3.11
|
||||||
|
entry: tools/mypy.sh 1 "3.11"
|
||||||
|
# Use system python because vllm installation is required
|
||||||
|
language: system
|
||||||
|
types: [python]
|
||||||
|
stages: [manual] # Only run in CI
|
||||||
|
- id: mypy-3.12 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
|
||||||
|
name: Run mypy for Python 3.12
|
||||||
|
entry: tools/mypy.sh 1 "3.12"
|
||||||
|
# Use system python because vllm installation is required
|
||||||
|
language: system
|
||||||
|
types: [python]
|
||||||
|
stages: [manual] # Only run in CI
|
||||||
|
# FIXME: enable shellcheck
|
||||||
|
# - id: shellcheck
|
||||||
|
# name: Lint shell scripts
|
||||||
|
# entry: tools/shellcheck.sh
|
||||||
|
# language: script
|
||||||
|
# types: [shell]
|
||||||
|
- id: png-lint
|
||||||
|
name: Lint PNG exports from excalidraw
|
||||||
|
entry: tools/png-lint.sh
|
||||||
|
language: script
|
||||||
|
types: [png]
|
||||||
|
- id: signoff-commit
|
||||||
|
name: Sign-off Commit
|
||||||
|
entry: bash
|
||||||
|
args:
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
if ! grep -q "^Signed-off-by: $(git config user.name) <$(git config user.email)>" "$(git rev-parse --git-path COMMIT_EDITMSG)"; then
|
||||||
|
printf "\nSigned-off-by: $(git config user.name) <$(git config user.email)>\n" >> "$(git rev-parse --git-path COMMIT_EDITMSG)"
|
||||||
|
fi
|
||||||
|
language: system
|
||||||
|
verbose: true
|
||||||
|
stages: [commit-msg]
|
||||||
|
- id: check-filenames
|
||||||
|
name: Check for spaces in all filenames
|
||||||
|
entry: bash
|
||||||
|
args:
|
||||||
|
- -c
|
||||||
|
- 'git ls-files | grep " " && echo "Filenames should not contain spaces!" && exit 1 || exit 0'
|
||||||
|
language: system
|
||||||
|
always_run: true
|
||||||
|
pass_filenames: false
|
||||||
|
- id: enforce-import-regex-instead-of-re
|
||||||
|
name: Enforce import regex as re
|
||||||
|
entry: python tools/enforce_regex_import.py
|
||||||
|
language: python
|
||||||
|
types: [python]
|
||||||
|
pass_filenames: false
|
||||||
|
additional_dependencies: [regex]
|
||||||
|
# Keep `suggestion` last
|
||||||
|
- id: suggestion
|
||||||
|
name: Suggestion
|
||||||
|
entry: bash -c 'echo "To bypass pre-commit hooks, add --no-verify to git commit."'
|
||||||
|
language: system
|
||||||
|
verbose: true
|
||||||
|
pass_filenames: false
|
||||||
|
# Insert new entries above the `suggestion` entry
|
||||||
@@ -41,19 +41,26 @@ def benchmark_npu(fn, num_iterations=100, num_warmup_iterations=50):
|
|||||||
|
|
||||||
|
|
||||||
def get_masked_input_and_mask_ref(
|
def get_masked_input_and_mask_ref(
|
||||||
input_: torch.Tensor, org_vocab_start_index: int,
|
input_: torch.Tensor,
|
||||||
org_vocab_end_index: int, num_org_vocab_padding: int,
|
org_vocab_start_index: int,
|
||||||
added_vocab_start_index: int,
|
org_vocab_end_index: int,
|
||||||
added_vocab_end_index: int) -> Tuple[torch.Tensor, torch.Tensor]:
|
num_org_vocab_padding: int,
|
||||||
|
added_vocab_start_index: int,
|
||||||
|
added_vocab_end_index: int,
|
||||||
|
) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||||
"""Reference implementation for verification"""
|
"""Reference implementation for verification"""
|
||||||
org_vocab_mask = (input_ >= org_vocab_start_index) & (input_ <
|
org_vocab_mask = (input_ >= org_vocab_start_index) & (input_ < org_vocab_end_index)
|
||||||
org_vocab_end_index)
|
|
||||||
added_vocab_mask = (input_ >= added_vocab_start_index) & (
|
added_vocab_mask = (input_ >= added_vocab_start_index) & (
|
||||||
input_ < added_vocab_end_index)
|
input_ < added_vocab_end_index
|
||||||
added_offset = added_vocab_start_index - (
|
)
|
||||||
org_vocab_end_index - org_vocab_start_index) - num_org_vocab_padding
|
added_offset = (
|
||||||
valid_offset = (org_vocab_start_index *
|
added_vocab_start_index
|
||||||
org_vocab_mask) + (added_offset * added_vocab_mask)
|
- (org_vocab_end_index - org_vocab_start_index)
|
||||||
|
- num_org_vocab_padding
|
||||||
|
)
|
||||||
|
valid_offset = (org_vocab_start_index * org_vocab_mask) + (
|
||||||
|
added_offset * added_vocab_mask
|
||||||
|
)
|
||||||
vocab_mask = org_vocab_mask | added_vocab_mask
|
vocab_mask = org_vocab_mask | added_vocab_mask
|
||||||
masked_input = vocab_mask * (input_ - valid_offset)
|
masked_input = vocab_mask * (input_ - valid_offset)
|
||||||
return masked_input, ~vocab_mask
|
return masked_input, ~vocab_mask
|
||||||
@@ -94,21 +101,25 @@ def test_get_masked_input_and_mask(
|
|||||||
|
|
||||||
# Define reference function
|
# Define reference function
|
||||||
def ref_fn():
|
def ref_fn():
|
||||||
return get_masked_input_and_mask_ref(input_tensor,
|
return get_masked_input_and_mask_ref(
|
||||||
test_case["org_start"],
|
input_tensor,
|
||||||
test_case["org_end"],
|
test_case["org_start"],
|
||||||
test_case["padding"],
|
test_case["org_end"],
|
||||||
test_case["added_start"],
|
test_case["padding"],
|
||||||
test_case["added_end"])
|
test_case["added_start"],
|
||||||
|
test_case["added_end"],
|
||||||
|
)
|
||||||
|
|
||||||
# Define custom function
|
# Define custom function
|
||||||
def custom_fn():
|
def custom_fn():
|
||||||
return torch.ops._C.get_masked_input_and_mask(input_tensor,
|
return torch.ops._C.get_masked_input_and_mask(
|
||||||
test_case["org_start"],
|
input_tensor,
|
||||||
test_case["org_end"],
|
test_case["org_start"],
|
||||||
test_case["padding"],
|
test_case["org_end"],
|
||||||
test_case["added_start"],
|
test_case["padding"],
|
||||||
test_case["added_end"])
|
test_case["added_start"],
|
||||||
|
test_case["added_end"],
|
||||||
|
)
|
||||||
|
|
||||||
# Get results for correctness testing
|
# Get results for correctness testing
|
||||||
ref_masked_input, ref_mask = ref_fn()
|
ref_masked_input, ref_mask = ref_fn()
|
||||||
@@ -120,9 +131,9 @@ def test_get_masked_input_and_mask(
|
|||||||
|
|
||||||
# Print performance results
|
# Print performance results
|
||||||
print("\nPerformance Results:")
|
print("\nPerformance Results:")
|
||||||
print(f"Reference implementation: {ref_time*1000:.3f} ms")
|
print(f"Reference implementation: {ref_time * 1000:.3f} ms")
|
||||||
print(f"Custom implementation: {custom_time*1000:.3f} ms")
|
print(f"Custom implementation: {custom_time * 1000:.3f} ms")
|
||||||
print(f"Speedup: {ref_time/custom_time:.2f}x")
|
print(f"Speedup: {ref_time / custom_time:.2f}x")
|
||||||
|
|
||||||
# Compare results for correctness
|
# Compare results for correctness
|
||||||
ref_masked_input = ref_masked_input.to(dtype)
|
ref_masked_input = ref_masked_input.to(dtype)
|
||||||
@@ -136,9 +147,12 @@ def test_get_masked_input_and_mask(
|
|||||||
ref_masked_input,
|
ref_masked_input,
|
||||||
rtol=1e-5,
|
rtol=1e-5,
|
||||||
atol=1e-5,
|
atol=1e-5,
|
||||||
msg=f"Masked input mismatch for case: {test_case}")
|
msg=f"Masked input mismatch for case: {test_case}",
|
||||||
torch.testing.assert_close(custom_mask,
|
)
|
||||||
ref_mask,
|
torch.testing.assert_close(
|
||||||
rtol=1e-5,
|
custom_mask,
|
||||||
atol=1e-5,
|
ref_mask,
|
||||||
msg=f"Mask mismatch for case: {test_case}")
|
rtol=1e-5,
|
||||||
|
atol=1e-5,
|
||||||
|
msg=f"Mask mismatch for case: {test_case}",
|
||||||
|
)
|
||||||
|
|||||||
@@ -49,36 +49,43 @@ def read_markdown(file):
|
|||||||
|
|
||||||
|
|
||||||
def results_to_json(latency, throughput, serving):
|
def results_to_json(latency, throughput, serving):
|
||||||
return json.dumps({
|
return json.dumps(
|
||||||
'latency': latency.to_dict(),
|
{
|
||||||
'throughput': throughput.to_dict(),
|
"latency": latency.to_dict(),
|
||||||
'serving': serving.to_dict()
|
"throughput": throughput.to_dict(),
|
||||||
})
|
"serving": serving.to_dict(),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="Process the results of the benchmark tests.")
|
description="Process the results of the benchmark tests."
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--results_folder",
|
"--results_folder",
|
||||||
type=str,
|
type=str,
|
||||||
default="../results/",
|
default="../results/",
|
||||||
help="The folder where the benchmark results are stored.")
|
help="The folder where the benchmark results are stored.",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output_folder",
|
"--output_folder",
|
||||||
type=str,
|
type=str,
|
||||||
default="../results/",
|
default="../results/",
|
||||||
help="The folder where the benchmark results are stored.")
|
help="The folder where the benchmark results are stored.",
|
||||||
parser.add_argument("--markdown_template",
|
)
|
||||||
type=str,
|
parser.add_argument(
|
||||||
default="./perf_result_template.md",
|
"--markdown_template",
|
||||||
help="The template file for the markdown report.")
|
type=str,
|
||||||
parser.add_argument("--tag",
|
default="./perf_result_template.md",
|
||||||
default="main",
|
help="The template file for the markdown report.",
|
||||||
help="Tag to be used for release message.")
|
)
|
||||||
parser.add_argument("--commit_id",
|
parser.add_argument(
|
||||||
default="",
|
"--tag", default="main", help="Tag to be used for release message."
|
||||||
help="Commit ID to be used for release message.")
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--commit_id", default="", help="Commit ID to be used for release message."
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
results_folder = (CUR_PATH / args.results_folder).resolve()
|
results_folder = (CUR_PATH / args.results_folder).resolve()
|
||||||
@@ -87,7 +94,6 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
# collect results
|
# collect results
|
||||||
for test_file in results_folder.glob("*.json"):
|
for test_file in results_folder.glob("*.json"):
|
||||||
|
|
||||||
with open(test_file) as f:
|
with open(test_file) as f:
|
||||||
raw_result = json.loads(f.read())
|
raw_result = json.loads(f.read())
|
||||||
|
|
||||||
@@ -111,7 +117,8 @@ if __name__ == "__main__":
|
|||||||
for perc in [10, 25, 50, 75, 90, 99]:
|
for perc in [10, 25, 50, 75, 90, 99]:
|
||||||
# Multiply 1000 to convert the time unit from s to ms
|
# Multiply 1000 to convert the time unit from s to ms
|
||||||
raw_result.update(
|
raw_result.update(
|
||||||
{f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]})
|
{f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]}
|
||||||
|
)
|
||||||
raw_result["avg_latency"] = raw_result["avg_latency"] * 1000
|
raw_result["avg_latency"] = raw_result["avg_latency"] * 1000
|
||||||
|
|
||||||
# add the result to raw_result
|
# add the result to raw_result
|
||||||
@@ -129,55 +136,53 @@ if __name__ == "__main__":
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
print(f"Skipping {test_file}")
|
print(f"Skipping {test_file}")
|
||||||
serving_results.sort(key=lambda x: (len(x['test_name']), x['test_name']))
|
serving_results.sort(key=lambda x: (len(x["test_name"]), x["test_name"]))
|
||||||
|
|
||||||
latency_results = pd.DataFrame.from_dict(latency_results)
|
latency_results = pd.DataFrame.from_dict(latency_results)
|
||||||
serving_results = pd.DataFrame.from_dict(serving_results)
|
serving_results = pd.DataFrame.from_dict(serving_results)
|
||||||
throughput_results = pd.DataFrame.from_dict(throughput_results)
|
throughput_results = pd.DataFrame.from_dict(throughput_results)
|
||||||
|
|
||||||
raw_results_json = results_to_json(latency_results, throughput_results,
|
raw_results_json = results_to_json(
|
||||||
serving_results)
|
latency_results, throughput_results, serving_results
|
||||||
|
)
|
||||||
|
|
||||||
# remapping the key, for visualization purpose
|
# remapping the key, for visualization purpose
|
||||||
if not latency_results.empty:
|
if not latency_results.empty:
|
||||||
latency_results = latency_results[list(
|
latency_results = latency_results[list(latency_column_mapping.keys())].rename(
|
||||||
latency_column_mapping.keys())].rename(
|
columns=latency_column_mapping
|
||||||
columns=latency_column_mapping)
|
)
|
||||||
if not serving_results.empty:
|
if not serving_results.empty:
|
||||||
serving_results = serving_results[list(
|
serving_results = serving_results[list(serving_column_mapping.keys())].rename(
|
||||||
serving_column_mapping.keys())].rename(
|
columns=serving_column_mapping
|
||||||
columns=serving_column_mapping)
|
)
|
||||||
if not throughput_results.empty:
|
if not throughput_results.empty:
|
||||||
throughput_results = throughput_results[list(
|
throughput_results = throughput_results[
|
||||||
throughput_results_column_mapping.keys())].rename(
|
list(throughput_results_column_mapping.keys())
|
||||||
columns=throughput_results_column_mapping)
|
].rename(columns=throughput_results_column_mapping)
|
||||||
|
|
||||||
processed_results_json = results_to_json(latency_results,
|
processed_results_json = results_to_json(
|
||||||
throughput_results,
|
latency_results, throughput_results, serving_results
|
||||||
serving_results)
|
)
|
||||||
|
|
||||||
# get markdown tables
|
# get markdown tables
|
||||||
latency_md_table = tabulate(latency_results,
|
latency_md_table = tabulate(
|
||||||
headers='keys',
|
latency_results, headers="keys", tablefmt="pipe", showindex=False
|
||||||
tablefmt='pipe',
|
)
|
||||||
showindex=False)
|
serving_md_table = tabulate(
|
||||||
serving_md_table = tabulate(serving_results,
|
serving_results, headers="keys", tablefmt="pipe", showindex=False
|
||||||
headers='keys',
|
)
|
||||||
tablefmt='pipe',
|
throughput_md_table = tabulate(
|
||||||
showindex=False)
|
throughput_results, headers="keys", tablefmt="pipe", showindex=False
|
||||||
throughput_md_table = tabulate(throughput_results,
|
)
|
||||||
headers='keys',
|
|
||||||
tablefmt='pipe',
|
|
||||||
showindex=False)
|
|
||||||
|
|
||||||
# document the result
|
# document the result
|
||||||
print(output_folder)
|
print(output_folder)
|
||||||
with open(output_folder / "benchmark_results.md", "w") as f:
|
with open(output_folder / "benchmark_results.md", "w") as f:
|
||||||
|
|
||||||
results = read_markdown(markdown_template)
|
results = read_markdown(markdown_template)
|
||||||
results = results.format(
|
results = results.format(
|
||||||
latency_tests_markdown_table=latency_md_table,
|
latency_tests_markdown_table=latency_md_table,
|
||||||
throughput_tests_markdown_table=throughput_md_table,
|
throughput_tests_markdown_table=throughput_md_table,
|
||||||
serving_tests_markdown_table=serving_md_table,
|
serving_tests_markdown_table=serving_md_table,
|
||||||
benchmarking_results_in_json_string=processed_results_json)
|
benchmarking_results_in_json_string=processed_results_json,
|
||||||
|
)
|
||||||
f.write(results)
|
f.write(results)
|
||||||
|
|||||||
@@ -7,9 +7,8 @@ import libcst.matchers as m
|
|||||||
# Patch the benchmark_dataset.py file to set streaming=False in load_dataset calls
|
# Patch the benchmark_dataset.py file to set streaming=False in load_dataset calls
|
||||||
|
|
||||||
|
|
||||||
# TDOO(Potabk): Remove this patch when the issue is fixed in the upstream
|
# TODO(Potabk): Remove this patch when the issue is fixed in the upstream
|
||||||
class StreamingFalseTransformer(cst.CSTTransformer):
|
class StreamingFalseTransformer(cst.CSTTransformer):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.in_target_class = False
|
self.in_target_class = False
|
||||||
self.in_target_func = False
|
self.in_target_func = False
|
||||||
@@ -63,15 +62,18 @@ def patch_file(path):
|
|||||||
print(f"Patched: {abs_path}")
|
print(f"Patched: {abs_path}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
parser = ArgumentParser(
|
parser = ArgumentParser(
|
||||||
description=
|
description="Patch benchmark_dataset.py to set streaming=False in load_dataset calls"
|
||||||
"Patch benchmark_dataset.py to set streaming=False in load_dataset calls"
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--path", type=str, help="Path to the benchmark_dataset.py file"
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--path",
|
"--path",
|
||||||
type=str,
|
type=str,
|
||||||
default="/vllm-workspace/vllm/vllm/benchmarks/datasets.py",
|
default="/vllm-workspace/vllm/vllm/benchmarks/datasets.py",
|
||||||
help="Path to the benchmark_dataset.py file")
|
help="Path to the benchmark_dataset.py file",
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
patch_file(args.path)
|
patch_file(args.path)
|
||||||
|
|||||||
@@ -44,82 +44,72 @@ BATCH_SIZE = {"ceval-valid": 1, "mmlu": 1, "gsm8k": "auto", "mmmu_val": 1}
|
|||||||
MODEL_TYPE = {
|
MODEL_TYPE = {
|
||||||
"Qwen/Qwen3-8B-Base": "vllm",
|
"Qwen/Qwen3-8B-Base": "vllm",
|
||||||
"Qwen/Qwen3-30B-A3B": "vllm",
|
"Qwen/Qwen3-30B-A3B": "vllm",
|
||||||
"Qwen/Qwen2.5-VL-7B-Instruct": "vllm-vlm"
|
"Qwen/Qwen2.5-VL-7B-Instruct": "vllm-vlm",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Command templates for running evaluations
|
# Command templates for running evaluations
|
||||||
MODEL_RUN_INFO = {
|
MODEL_RUN_INFO = {
|
||||||
"Qwen/Qwen3-30B-A3B":
|
"Qwen/Qwen3-30B-A3B": (
|
||||||
("export MODEL_ARGS='pretrained={model},max_model_len=4096,dtype=auto,tensor_parallel_size=4,gpu_memory_utilization=0.6,enable_expert_parallel=True'\n"
|
"export MODEL_ARGS='pretrained={model},max_model_len=4096,dtype=auto,tensor_parallel_size=4,gpu_memory_utilization=0.6,enable_expert_parallel=True'\n"
|
||||||
"lm_eval --model vllm --model_args $MODEL_ARGS --tasks {datasets} \ \n"
|
"lm_eval --model vllm --model_args $MODEL_ARGS --tasks {datasets} \ \n"
|
||||||
"--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1"
|
"--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1"
|
||||||
),
|
),
|
||||||
"Qwen/Qwen3-8B-Base":
|
"Qwen/Qwen3-8B-Base": (
|
||||||
("export MODEL_ARGS='pretrained={model},max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6'\n"
|
"export MODEL_ARGS='pretrained={model},max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6'\n"
|
||||||
"lm_eval --model vllm --model_args $MODEL_ARGS --tasks {datasets} \ \n"
|
"lm_eval --model vllm --model_args $MODEL_ARGS --tasks {datasets} \ \n"
|
||||||
"--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1"
|
"--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1"
|
||||||
),
|
),
|
||||||
"Qwen/Qwen2.5-VL-7B-Instruct":
|
"Qwen/Qwen2.5-VL-7B-Instruct": (
|
||||||
("export MODEL_ARGS='pretrained={model},max_model_len=8192,dtype=auto,tensor_parallel_size=2,max_images=2'\n"
|
"export MODEL_ARGS='pretrained={model},max_model_len=8192,dtype=auto,tensor_parallel_size=2,max_images=2'\n"
|
||||||
"lm_eval --model vllm-vlm --model_args $MODEL_ARGS --tasks {datasets} \ \n"
|
"lm_eval --model vllm-vlm --model_args $MODEL_ARGS --tasks {datasets} \ \n"
|
||||||
"--apply_chat_template --fewshot_as_multiturn --batch_size 1"),
|
"--apply_chat_template --fewshot_as_multiturn --batch_size 1"
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
# Evaluation metric filters per task
|
# Evaluation metric filters per task
|
||||||
FILTER = {
|
FILTER = {
|
||||||
"gsm8k": "exact_match,flexible-extract",
|
"gsm8k": "exact_match,flexible-extract",
|
||||||
"ceval-valid": "acc,none",
|
"ceval-valid": "acc,none",
|
||||||
"mmmu_val": "acc,none"
|
"mmmu_val": "acc,none",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Expected accuracy values for models
|
# Expected accuracy values for models
|
||||||
EXPECTED_VALUE = {
|
EXPECTED_VALUE = {
|
||||||
"Qwen/Qwen3-30B-A3B": {
|
"Qwen/Qwen3-30B-A3B": {"ceval-valid": 0.83, "gsm8k": 0.85},
|
||||||
"ceval-valid": 0.83,
|
"Qwen/Qwen3-8B-Base": {"ceval-valid": 0.82, "gsm8k": 0.83},
|
||||||
"gsm8k": 0.85
|
"Qwen/Qwen2.5-VL-7B-Instruct": {"mmmu_val": 0.51},
|
||||||
},
|
|
||||||
"Qwen/Qwen3-8B-Base": {
|
|
||||||
"ceval-valid": 0.82,
|
|
||||||
"gsm8k": 0.83
|
|
||||||
},
|
|
||||||
"Qwen/Qwen2.5-VL-7B-Instruct": {
|
|
||||||
"mmmu_val": 0.51
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
PARALLEL_MODE = {
|
PARALLEL_MODE = {
|
||||||
"Qwen/Qwen3-8B-Base": "TP",
|
"Qwen/Qwen3-8B-Base": "TP",
|
||||||
"Qwen/Qwen2.5-VL-7B-Instruct": "TP",
|
"Qwen/Qwen2.5-VL-7B-Instruct": "TP",
|
||||||
"Qwen/Qwen3-30B-A3B": "EP"
|
"Qwen/Qwen3-30B-A3B": "EP",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Execution backend configuration
|
# Execution backend configuration
|
||||||
EXECUTION_MODE = {
|
EXECUTION_MODE = {
|
||||||
"Qwen/Qwen3-8B-Base": "ACLGraph",
|
"Qwen/Qwen3-8B-Base": "ACLGraph",
|
||||||
"Qwen/Qwen2.5-VL-7B-Instruct": "ACLGraph",
|
"Qwen/Qwen2.5-VL-7B-Instruct": "ACLGraph",
|
||||||
"Qwen/Qwen3-30B-A3B": "ACLGraph"
|
"Qwen/Qwen3-30B-A3B": "ACLGraph",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Model arguments for evaluation
|
# Model arguments for evaluation
|
||||||
MODEL_ARGS = {
|
MODEL_ARGS = {
|
||||||
"Qwen/Qwen3-8B-Base":
|
"Qwen/Qwen3-8B-Base": "pretrained=Qwen/Qwen3-8B-Base,max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6",
|
||||||
"pretrained=Qwen/Qwen3-8B-Base,max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6",
|
"Qwen/Qwen2.5-VL-7B-Instruct": "pretrained=Qwen/Qwen2.5-VL-7B-Instruct,max_model_len=8192,dtype=auto,tensor_parallel_size=2,max_images=2",
|
||||||
"Qwen/Qwen2.5-VL-7B-Instruct":
|
"Qwen/Qwen3-30B-A3B": "pretrained=Qwen/Qwen3-30B-A3B,max_model_len=4096,dtype=auto,tensor_parallel_size=4,gpu_memory_utilization=0.6,enable_expert_parallel=True",
|
||||||
"pretrained=Qwen/Qwen2.5-VL-7B-Instruct,max_model_len=8192,dtype=auto,tensor_parallel_size=2,max_images=2",
|
|
||||||
"Qwen/Qwen3-30B-A3B":
|
|
||||||
"pretrained=Qwen/Qwen3-30B-A3B,max_model_len=4096,dtype=auto,tensor_parallel_size=4,gpu_memory_utilization=0.6,enable_expert_parallel=True"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Whether to apply chat template formatting
|
# Whether to apply chat template formatting
|
||||||
APPLY_CHAT_TEMPLATE = {
|
APPLY_CHAT_TEMPLATE = {
|
||||||
"Qwen/Qwen3-8B-Base": True,
|
"Qwen/Qwen3-8B-Base": True,
|
||||||
"Qwen/Qwen2.5-VL-7B-Instruct": True,
|
"Qwen/Qwen2.5-VL-7B-Instruct": True,
|
||||||
"Qwen/Qwen3-30B-A3B": False
|
"Qwen/Qwen3-30B-A3B": False,
|
||||||
}
|
}
|
||||||
# Few-shot examples handling as multi-turn dialogues.
|
# Few-shot examples handling as multi-turn dialogues.
|
||||||
FEWSHOT_AS_MULTITURN = {
|
FEWSHOT_AS_MULTITURN = {
|
||||||
"Qwen/Qwen3-8B-Base": True,
|
"Qwen/Qwen3-8B-Base": True,
|
||||||
"Qwen/Qwen2.5-VL-7B-Instruct": True,
|
"Qwen/Qwen2.5-VL-7B-Instruct": True,
|
||||||
"Qwen/Qwen3-30B-A3B": False
|
"Qwen/Qwen3-30B-A3B": False,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Relative tolerance for accuracy checks
|
# Relative tolerance for accuracy checks
|
||||||
@@ -136,7 +126,7 @@ def run_accuracy_test(queue, model, dataset):
|
|||||||
"tasks": dataset,
|
"tasks": dataset,
|
||||||
"apply_chat_template": APPLY_CHAT_TEMPLATE[model],
|
"apply_chat_template": APPLY_CHAT_TEMPLATE[model],
|
||||||
"fewshot_as_multiturn": FEWSHOT_AS_MULTITURN[model],
|
"fewshot_as_multiturn": FEWSHOT_AS_MULTITURN[model],
|
||||||
"batch_size": BATCH_SIZE[dataset]
|
"batch_size": BATCH_SIZE[dataset],
|
||||||
}
|
}
|
||||||
|
|
||||||
if MODEL_TYPE[model] == "vllm":
|
if MODEL_TYPE[model] == "vllm":
|
||||||
@@ -151,7 +141,7 @@ def run_accuracy_test(queue, model, dataset):
|
|||||||
queue.put(e)
|
queue.put(e)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
finally:
|
finally:
|
||||||
if 'results' in locals():
|
if "results" in locals():
|
||||||
del results
|
del results
|
||||||
gc.collect()
|
gc.collect()
|
||||||
torch.npu.empty_cache()
|
torch.npu.empty_cache()
|
||||||
@@ -161,16 +151,15 @@ def run_accuracy_test(queue, model, dataset):
|
|||||||
def generate_md(model_name, tasks_list, args, datasets):
|
def generate_md(model_name, tasks_list, args, datasets):
|
||||||
"""Generate Markdown report with evaluation results"""
|
"""Generate Markdown report with evaluation results"""
|
||||||
# Format the run command
|
# Format the run command
|
||||||
run_cmd = MODEL_RUN_INFO[model_name].format(model=model_name,
|
run_cmd = MODEL_RUN_INFO[model_name].format(model=model_name, datasets=datasets)
|
||||||
datasets=datasets)
|
|
||||||
model = model_name.split("/")[1]
|
model = model_name.split("/")[1]
|
||||||
|
|
||||||
# Version information section
|
# Version information section
|
||||||
version_info = (
|
version_info = (
|
||||||
f"**vLLM Version**: vLLM: {args.vllm_version} "
|
f"**vLLM Version**: vLLM: {args.vllm_version} "
|
||||||
f"([{args.vllm_commit}]({VLLM_URL+args.vllm_commit})), "
|
f"([{args.vllm_commit}]({VLLM_URL + args.vllm_commit})), "
|
||||||
f"vLLM Ascend: {args.vllm_ascend_version} "
|
f"vLLM Ascend: {args.vllm_ascend_version} "
|
||||||
f"([{args.vllm_ascend_commit}]({VLLM_ASCEND_URL+args.vllm_ascend_commit})) "
|
f"([{args.vllm_ascend_commit}]({VLLM_ASCEND_URL + args.vllm_ascend_commit})) "
|
||||||
)
|
)
|
||||||
|
|
||||||
# Report header with system info
|
# Report header with system info
|
||||||
@@ -218,21 +207,39 @@ def generate_md(model_name, tasks_list, args, datasets):
|
|||||||
else:
|
else:
|
||||||
n_shot = "0"
|
n_shot = "0"
|
||||||
flag = ACCURACY_FLAG.get(task_name, "")
|
flag = ACCURACY_FLAG.get(task_name, "")
|
||||||
row = (f"| {task_name:<37} "
|
row = (
|
||||||
f"| {flt:<6} "
|
f"| {task_name:<37} "
|
||||||
f"| {n_shot:6} "
|
f"| {flt:<6} "
|
||||||
f"| {metric:<6} "
|
f"| {n_shot:6} "
|
||||||
f"| {flag}{value:>5.4f} "
|
f"| {metric:<6} "
|
||||||
f"| ± {stderr:>5.4f} |")
|
f"| {flag}{value:>5.4f} "
|
||||||
|
f"| ± {stderr:>5.4f} |"
|
||||||
|
)
|
||||||
if not task_name.startswith("-"):
|
if not task_name.startswith("-"):
|
||||||
rows.append(row)
|
rows.append(row)
|
||||||
rows_sub.append("<details>" + "\n" + "<summary>" + task_name +
|
rows_sub.append(
|
||||||
" details" + "</summary>" + "\n" * 2 + header)
|
"<details>"
|
||||||
|
+ "\n"
|
||||||
|
+ "<summary>"
|
||||||
|
+ task_name
|
||||||
|
+ " details"
|
||||||
|
+ "</summary>"
|
||||||
|
+ "\n" * 2
|
||||||
|
+ header
|
||||||
|
)
|
||||||
rows_sub.append(row)
|
rows_sub.append(row)
|
||||||
rows_sub.append("</details>")
|
rows_sub.append("</details>")
|
||||||
# Combine all Markdown sections
|
# Combine all Markdown sections
|
||||||
md = preamble + "\n" + header + "\n" + "\n".join(rows) + "\n" + "\n".join(
|
md = (
|
||||||
rows_sub) + "\n"
|
preamble
|
||||||
|
+ "\n"
|
||||||
|
+ header
|
||||||
|
+ "\n"
|
||||||
|
+ "\n".join(rows)
|
||||||
|
+ "\n"
|
||||||
|
+ "\n".join(rows_sub)
|
||||||
|
+ "\n"
|
||||||
|
)
|
||||||
print(md)
|
print(md)
|
||||||
return md
|
return md
|
||||||
|
|
||||||
@@ -262,8 +269,9 @@ def main(args):
|
|||||||
# Evaluate model on each dataset
|
# Evaluate model on each dataset
|
||||||
for dataset in datasets:
|
for dataset in datasets:
|
||||||
accuracy_expected = EXPECTED_VALUE[args.model][dataset]
|
accuracy_expected = EXPECTED_VALUE[args.model][dataset]
|
||||||
p = multiprocessing.Process(target=run_accuracy_test,
|
p = multiprocessing.Process(
|
||||||
args=(result_queue, args.model, dataset))
|
target=run_accuracy_test, args=(result_queue, args.model, dataset)
|
||||||
|
)
|
||||||
p.start()
|
p.start()
|
||||||
p.join()
|
p.join()
|
||||||
if p.is_alive():
|
if p.is_alive():
|
||||||
@@ -274,8 +282,11 @@ def main(args):
|
|||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
result = result_queue.get()
|
result = result_queue.get()
|
||||||
print(result)
|
print(result)
|
||||||
if accuracy_expected - RTOL < result[dataset][
|
if (
|
||||||
FILTER[dataset]] < accuracy_expected + RTOL:
|
accuracy_expected - RTOL
|
||||||
|
< result[dataset][FILTER[dataset]]
|
||||||
|
< accuracy_expected + RTOL
|
||||||
|
):
|
||||||
ACCURACY_FLAG[dataset] = "✅"
|
ACCURACY_FLAG[dataset] = "✅"
|
||||||
else:
|
else:
|
||||||
ACCURACY_FLAG[dataset] = "❌"
|
ACCURACY_FLAG[dataset] = "❌"
|
||||||
@@ -285,10 +296,11 @@ def main(args):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
multiprocessing.set_start_method('spawn', force=True)
|
multiprocessing.set_start_method("spawn", force=True)
|
||||||
# Initialize argument parser
|
# Initialize argument parser
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="Run model accuracy evaluation and generate report")
|
description="Run model accuracy evaluation and generate report"
|
||||||
|
)
|
||||||
parser.add_argument("--output", type=str, required=True)
|
parser.add_argument("--output", type=str, required=True)
|
||||||
parser.add_argument("--model", type=str, required=True)
|
parser.add_argument("--model", type=str, required=True)
|
||||||
parser.add_argument("--vllm_ascend_version", type=str, required=False)
|
parser.add_argument("--vllm_ascend_version", type=str, required=False)
|
||||||
|
|||||||
@@ -12,38 +12,55 @@ Theoretically, the vllm-ascend build is only supported on Linux because
|
|||||||
But you can still set up dev env on Linux/Windows/macOS for linting and basic
|
But you can still set up dev env on Linux/Windows/macOS for linting and basic
|
||||||
test as following commands:
|
test as following commands:
|
||||||
|
|
||||||
|
#### Run lint locally
|
||||||
```bash
|
```bash
|
||||||
# Choose a base dir (~/vllm-project/) and set up venv
|
# Choose a base dir (~/vllm-project/) and set up venv
|
||||||
cd ~/vllm-project/
|
cd ~/vllm-project/
|
||||||
python3 -m venv .venv
|
python3 -m venv .venv
|
||||||
source ./.venv/bin/activate
|
source ./.venv/bin/activate
|
||||||
|
|
||||||
# Clone vllm code and install
|
# Clone vllm-ascend and install
|
||||||
git clone https://github.com/vllm-project/vllm.git
|
git clone https://github.com/vllm-project/vllm-ascend.git
|
||||||
|
cd vllm-ascend
|
||||||
|
|
||||||
|
# Install lint requirement and enable pre-commit hook
|
||||||
|
pip install -r requirements-lint.txt
|
||||||
|
|
||||||
|
# Run lint (You need install pre-commits deps via proxy network at first time)
|
||||||
|
bash format.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Run CI locally
|
||||||
|
|
||||||
|
After complete "Run lint" setup, you can run CI locally:
|
||||||
|
|
||||||
|
```{code-block} bash
|
||||||
|
:substitutions:
|
||||||
|
|
||||||
|
cd ~/vllm-project/
|
||||||
|
|
||||||
|
# Run CI need vLLM installed
|
||||||
|
git clone --branch |vllm_version| https://github.com/vllm-project/vllm.git
|
||||||
cd vllm
|
cd vllm
|
||||||
pip install -r requirements/build.txt
|
pip install -r requirements/build.txt
|
||||||
VLLM_TARGET_DEVICE="empty" pip install .
|
VLLM_TARGET_DEVICE="empty" pip install .
|
||||||
cd ..
|
cd ..
|
||||||
|
|
||||||
# Clone vllm-ascend and install
|
# Install requirements
|
||||||
git clone https://github.com/vllm-project/vllm-ascend.git
|
|
||||||
cd vllm-ascend
|
cd vllm-ascend
|
||||||
# install system requirement
|
# For Linux:
|
||||||
apt install -y gcc g++ cmake libnuma-dev
|
|
||||||
# install project requirement
|
|
||||||
pip install -r requirements-dev.txt
|
pip install -r requirements-dev.txt
|
||||||
|
# For non Linux:
|
||||||
|
cat requirements-dev.txt | grep -Ev '^#|^--|^$|^-r' | while read PACKAGE; do pip install "$PACKAGE"; done
|
||||||
|
cat requirements.txt | grep -Ev '^#|^--|^$|^-r' | while read PACKAGE; do pip install "$PACKAGE"; done
|
||||||
|
|
||||||
# Then you can run lint and mypy test
|
# Run ci:
|
||||||
bash format.sh
|
bash format.sh ci
|
||||||
|
```
|
||||||
|
|
||||||
# Build:
|
#### Submit the commit
|
||||||
# - only supported on Linux (torch_npu available)
|
|
||||||
# pip install -e .
|
|
||||||
# - build without deps for debugging in other OS
|
|
||||||
# pip install -e . --no-deps
|
|
||||||
# - build without custom ops
|
|
||||||
# COMPILE_CUSTOM_KERNELS=0 pip install -e .
|
|
||||||
|
|
||||||
|
```bash
|
||||||
# Commit changed files using `-s`
|
# Commit changed files using `-s`
|
||||||
git commit -sm "your commit info"
|
git commit -sm "your commit info"
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# Additional Configuration
|
# Additional Configuration
|
||||||
|
|
||||||
addintional configuration is a mechanism provided by vLLM to allow plugins to control inner behavior by their own. vLLM Ascend uses this mechanism to make the project more flexible.
|
additional configuration is a mechanism provided by vLLM to allow plugins to control inner behavior by their own. vLLM Ascend uses this mechanism to make the project more flexible.
|
||||||
|
|
||||||
## How to use
|
## How to use
|
||||||
|
|
||||||
|
|||||||
327
format.sh
327
format.sh
@@ -19,325 +19,26 @@
|
|||||||
# Adapted from https://github.com/vllm-project/vllm/tree/main/tools
|
# Adapted from https://github.com/vllm-project/vllm/tree/main/tools
|
||||||
#
|
#
|
||||||
|
|
||||||
# YAPF formatter, adapted from ray and skypilot.
|
|
||||||
#
|
|
||||||
# Usage:
|
|
||||||
# # Do work and commit your work.
|
|
||||||
|
|
||||||
# # Format files that differ from origin/main.
|
|
||||||
# bash format.sh
|
|
||||||
|
|
||||||
# # Commit changed files with message 'Run yapf and ruff'
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# YAPF + Clang formatter (if installed). This script formats all changed files from the last mergebase.
|
|
||||||
# You are encouraged to run this locally before pushing changes for review.
|
|
||||||
|
|
||||||
# Cause the script to exit if a single command fails
|
|
||||||
set -eo pipefail
|
|
||||||
|
|
||||||
# this stops git rev-parse from failing if we run this from the .git directory
|
|
||||||
builtin cd "$(dirname "${BASH_SOURCE:-$0}")"
|
|
||||||
ROOT="$(git rev-parse --show-toplevel)"
|
|
||||||
builtin cd "$ROOT" || exit 1
|
|
||||||
|
|
||||||
check_command() {
|
check_command() {
|
||||||
if ! command -v "$1" &> /dev/null; then
|
if ! command -v "$1" &> /dev/null; then
|
||||||
echo "❓❓$1 is not installed, please run \`pip install -r requirements-lint.txt\`"
|
echo "❓❓$1 is not installed, please run:"
|
||||||
|
echo "# Install lint deps"
|
||||||
|
echo "pip install -r requirements-lint.txt"
|
||||||
|
echo "# (optional) Enable git commit pre check"
|
||||||
|
echo "pre-commit install"
|
||||||
|
echo ""
|
||||||
|
echo "See step by step contribution guide:"
|
||||||
|
echo "https://vllm-ascend.readthedocs.io/en/latest/developer_guide/contribution"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
check_command yapf
|
check_command pre-commit
|
||||||
check_command ruff
|
|
||||||
check_command mypy
|
|
||||||
check_command codespell
|
|
||||||
check_command isort
|
|
||||||
check_command clang-format
|
|
||||||
|
|
||||||
YAPF_VERSION=$(yapf --version | awk '{print $2}')
|
# TODO: cleanup SC exclude
|
||||||
RUFF_VERSION=$(ruff --version | awk '{print $2}')
|
export SHELLCHECK_OPTS="--exclude=SC2046,SC2006,SC2086"
|
||||||
MYPY_VERSION=$(mypy --version | awk '{print $2}')
|
if [[ "$1" != 'ci' ]]; then
|
||||||
CODESPELL_VERSION=$(codespell --version)
|
pre-commit run --all-files
|
||||||
ISORT_VERSION=$(isort --vn)
|
|
||||||
CLANGFORMAT_VERSION=$(clang-format --version | awk '{print $3}')
|
|
||||||
SPHINX_LINT_VERSION=$(sphinx-lint --version | awk '{print $2}')
|
|
||||||
|
|
||||||
# params: tool name, tool version, required version
|
|
||||||
tool_version_check() {
|
|
||||||
expected=$(grep "$1" requirements-lint.txt | cut -d'=' -f3)
|
|
||||||
if [[ "$2" != "$expected" ]]; then
|
|
||||||
echo "❓❓Wrong $1 version installed: $expected is required, not $2."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
tool_version_check "yapf" "$YAPF_VERSION"
|
|
||||||
tool_version_check "ruff" "$RUFF_VERSION"
|
|
||||||
tool_version_check "mypy" "$MYPY_VERSION"
|
|
||||||
tool_version_check "isort" "$ISORT_VERSION"
|
|
||||||
tool_version_check "codespell" "$CODESPELL_VERSION"
|
|
||||||
tool_version_check "clang-format" "$CLANGFORMAT_VERSION"
|
|
||||||
tool_version_check "sphinx-lint" "$SPHINX_LINT_VERSION"
|
|
||||||
|
|
||||||
YAPF_FLAGS=(
|
|
||||||
'--recursive'
|
|
||||||
'--parallel'
|
|
||||||
)
|
|
||||||
|
|
||||||
YAPF_EXCLUDES=(
|
|
||||||
'--exclude' 'build/**'
|
|
||||||
)
|
|
||||||
|
|
||||||
# Format specified files
|
|
||||||
format() {
|
|
||||||
yapf --in-place "${YAPF_FLAGS[@]}" "$@"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Format files that differ from main branch. Ignores dirs that are not slated
|
|
||||||
# for autoformat yet.
|
|
||||||
format_changed() {
|
|
||||||
# The `if` guard ensures that the list of filenames is not empty, which
|
|
||||||
# could cause yapf to receive 0 positional arguments, making it hang
|
|
||||||
# waiting for STDIN.
|
|
||||||
#
|
|
||||||
# `diff-filter=ACM` and $MERGEBASE is to ensure we only format files that
|
|
||||||
# exist on both branches.
|
|
||||||
MERGEBASE="$(git merge-base origin/main HEAD)"
|
|
||||||
|
|
||||||
if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
|
|
||||||
git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs -P 5 \
|
|
||||||
yapf --in-place "${YAPF_EXCLUDES[@]}" "${YAPF_FLAGS[@]}"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Format all files
|
|
||||||
format_all() {
|
|
||||||
yapf --in-place "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" .
|
|
||||||
}
|
|
||||||
|
|
||||||
echo 'vllm-ascend yapf:'
|
|
||||||
## This flag formats individual files. --files *must* be the first command line
|
|
||||||
## arg to use this option.
|
|
||||||
if [[ "$1" == '--files' ]]; then
|
|
||||||
format "${@:2}"
|
|
||||||
# If `--all` is passed, then any further arguments are ignored and the
|
|
||||||
# entire python directory is formatted.
|
|
||||||
elif [[ "$1" == '--all' ]]; then
|
|
||||||
format_all
|
|
||||||
else
|
else
|
||||||
# Format only the files that changed in last commit.
|
pre-commit run --all-files --hook-stage manual
|
||||||
format_changed
|
|
||||||
fi
|
fi
|
||||||
echo 'vllm-ascend yapf: Done'
|
|
||||||
|
|
||||||
# Run mypy
|
|
||||||
echo 'vllm-ascend mypy:'
|
|
||||||
tools/mypy.sh
|
|
||||||
echo 'vllm-ascend mypy: Done'
|
|
||||||
|
|
||||||
|
|
||||||
# If git diff returns a file that is in the skip list, the file may be checked anyway:
|
|
||||||
# https://github.com/codespell-project/codespell/issues/1915
|
|
||||||
# Avoiding the "./" prefix and using "/**" globs for directories appears to solve the problem
|
|
||||||
CODESPELL_EXCLUDES=(
|
|
||||||
'--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**'
|
|
||||||
)
|
|
||||||
|
|
||||||
CODESPELL_IGNORE_WORDS=(
|
|
||||||
'-L' 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,assertIn,rever'
|
|
||||||
)
|
|
||||||
|
|
||||||
# check spelling of specified files
|
|
||||||
spell_check() {
|
|
||||||
codespell "$@" "${CODESPELL_IGNORE_WORDS[@]}"
|
|
||||||
}
|
|
||||||
|
|
||||||
spell_check_all() {
|
|
||||||
codespell --toml pyproject.toml "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Spelling check of files that differ from main branch.
|
|
||||||
spell_check_changed() {
|
|
||||||
# The `if` guard ensures that the list of filenames is not empty, which
|
|
||||||
# could cause ruff to receive 0 positional arguments, making it hang
|
|
||||||
# waiting for STDIN.
|
|
||||||
#
|
|
||||||
# `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that
|
|
||||||
# exist on both branches.
|
|
||||||
MERGEBASE="$(git merge-base origin/main HEAD)"
|
|
||||||
if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
|
|
||||||
git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
|
|
||||||
codespell "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}"
|
|
||||||
codespell "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
echo 'vllm-ascend codespell:'
|
|
||||||
# Run Codespell
|
|
||||||
## This flag runs spell check of individual files. --files *must* be the first command line
|
|
||||||
## arg to use this option.
|
|
||||||
if [[ "$1" == '--files' ]]; then
|
|
||||||
spell_check "${@:2}"
|
|
||||||
# If `--all` is passed, then any further arguments are ignored and the
|
|
||||||
# entire python directory is linted.
|
|
||||||
elif [[ "$1" == '--all' ]]; then
|
|
||||||
spell_check_all
|
|
||||||
else
|
|
||||||
# Check spelling only of the files that changed in last commit.
|
|
||||||
spell_check_changed
|
|
||||||
fi
|
|
||||||
echo 'vllm-ascend codespell: Done'
|
|
||||||
|
|
||||||
|
|
||||||
# Lint specified files
|
|
||||||
lint() {
|
|
||||||
ruff check "$@"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Lint files that differ from main branch. Ignores dirs that are not slated
|
|
||||||
# for autolint yet.
|
|
||||||
lint_changed() {
|
|
||||||
# The `if` guard ensures that the list of filenames is not empty, which
|
|
||||||
# could cause ruff to receive 0 positional arguments, making it hang
|
|
||||||
# waiting for STDIN.
|
|
||||||
#
|
|
||||||
# `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that
|
|
||||||
# exist on both branches.
|
|
||||||
MERGEBASE="$(git merge-base origin/main HEAD)"
|
|
||||||
|
|
||||||
if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
|
|
||||||
git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
|
|
||||||
ruff check
|
|
||||||
fi
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
echo 'vllm-ascend ruff:'
|
|
||||||
# Run Ruff
|
|
||||||
### This flag lints individual files. --files *must* be the first command line
|
|
||||||
### arg to use this option.
|
|
||||||
if [[ "$1" == '--files' ]]; then
|
|
||||||
lint "${@:2}"
|
|
||||||
# If `--all` is passed, then any further arguments are ignored and the
|
|
||||||
# entire python directory is linted.
|
|
||||||
elif [[ "$1" == '--all' ]]; then
|
|
||||||
lint vllm tests
|
|
||||||
else
|
|
||||||
# Format only the files that changed in last commit.
|
|
||||||
lint_changed
|
|
||||||
fi
|
|
||||||
echo 'vllm-ascend ruff: Done'
|
|
||||||
|
|
||||||
# check spelling of specified files
|
|
||||||
isort_check() {
|
|
||||||
isort "$@"
|
|
||||||
}
|
|
||||||
|
|
||||||
isort_check_all(){
|
|
||||||
isort .
|
|
||||||
}
|
|
||||||
|
|
||||||
# Spelling check of files that differ from main branch.
|
|
||||||
isort_check_changed() {
|
|
||||||
# The `if` guard ensures that the list of filenames is not empty, which
|
|
||||||
# could cause ruff to receive 0 positional arguments, making it hang
|
|
||||||
# waiting for STDIN.
|
|
||||||
#
|
|
||||||
# `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that
|
|
||||||
# exist on both branches.
|
|
||||||
MERGEBASE="$(git merge-base origin/main HEAD)"
|
|
||||||
|
|
||||||
if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
|
|
||||||
git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
|
|
||||||
isort
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
echo 'vllm-ascend isort:'
|
|
||||||
# Run Isort
|
|
||||||
# This flag runs spell check of individual files. --files *must* be the first command line
|
|
||||||
# arg to use this option.
|
|
||||||
if [[ "$1" == '--files' ]]; then
|
|
||||||
isort_check "${@:2}"
|
|
||||||
# If `--all` is passed, then any further arguments are ignored and the
|
|
||||||
# entire python directory is linted.
|
|
||||||
elif [[ "$1" == '--all' ]]; then
|
|
||||||
isort_check_all
|
|
||||||
else
|
|
||||||
# Check spelling only of the files that changed in last commit.
|
|
||||||
isort_check_changed
|
|
||||||
fi
|
|
||||||
echo 'vllm-ascend isort: Done'
|
|
||||||
|
|
||||||
# Clang-format section
|
|
||||||
# Exclude some files for formatting because they are vendored
|
|
||||||
CLANG_FORMAT_EXCLUDES=(
|
|
||||||
'csrc/kernels/utils.h' 'csrc/kernels/pos_encoding_kernels.cpp' 'csrc/kernels/advance_step.cpp' 'csrc/kernels/get_masked_input_and_mask_kernel.cpp' 'csrc/torch_binding.cpp' 'csrc/ops.h'
|
|
||||||
)
|
|
||||||
|
|
||||||
# Format specified files with clang-format
|
|
||||||
clang_format() {
|
|
||||||
clang-format -i "$@"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Format files that differ from main branch with clang-format.
|
|
||||||
clang_format_changed() {
|
|
||||||
# The `if` guard ensures that the list of filenames is not empty, which
|
|
||||||
# could cause clang-format to receive 0 positional arguments, making it hang
|
|
||||||
# waiting for STDIN.
|
|
||||||
#
|
|
||||||
# `diff-filter=ACM` and $MERGEBASE is to ensure we only format files that
|
|
||||||
# exist on both branches.
|
|
||||||
MERGEBASE="$(git merge-base origin/main HEAD)"
|
|
||||||
|
|
||||||
# Get the list of changed files, excluding the specified ones
|
|
||||||
changed_files=$(git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.h' '*.cpp' '*.cu' '*.cuh' | (grep -vFf <(printf "%s\n" "${CLANG_FORMAT_EXCLUDES[@]}") || echo -e))
|
|
||||||
if [ -n "$changed_files" ]; then
|
|
||||||
echo "$changed_files" | xargs -P 5 clang-format -i
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Format all files with clang-format
|
|
||||||
clang_format_all() {
|
|
||||||
find csrc/ \( -name '*.h' -o -name '*.cpp' -o -name '*.cu' -o -name '*.cuh' \) -print \
|
|
||||||
| grep -vFf <(printf "%s\n" "${CLANG_FORMAT_EXCLUDES[@]}") \
|
|
||||||
| xargs clang-format -i
|
|
||||||
}
|
|
||||||
|
|
||||||
# Run clang-format
|
|
||||||
if [[ "$1" == '--files' ]]; then
|
|
||||||
clang_format "${@:2}"
|
|
||||||
elif [[ "$1" == '--all' ]]; then
|
|
||||||
clang_format_all
|
|
||||||
else
|
|
||||||
clang_format_changed
|
|
||||||
fi
|
|
||||||
echo 'vllm-ascend clang-format: Done'
|
|
||||||
|
|
||||||
echo 'vllm-ascend actionlint:'
|
|
||||||
tools/actionlint.sh -color
|
|
||||||
echo 'vllm-ascend actionlint: Done'
|
|
||||||
|
|
||||||
echo 'vllm-ascend shellcheck:'
|
|
||||||
tools/shellcheck.sh
|
|
||||||
echo 'vllm-ascend shellcheck: Done'
|
|
||||||
|
|
||||||
echo 'excalidraw png check:'
|
|
||||||
tools/png-lint.sh
|
|
||||||
echo 'excalidraw png check: Done'
|
|
||||||
|
|
||||||
if ! git diff --quiet &>/dev/null; then
|
|
||||||
echo
|
|
||||||
echo "🔍🔍There are files changed by the format checker or by you that are not added and committed:"
|
|
||||||
git --no-pager diff --name-only
|
|
||||||
echo "🔍🔍Format checker passed, but please add, commit and push all the files above to include changes made by the format checker."
|
|
||||||
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
echo "✨🎉 Format check passed! Congratulations! 🎉✨"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# echo 'vLLM sphinx-lint:'
|
|
||||||
# tools/sphinx-lint.sh
|
|
||||||
# echo 'vLLM sphinx-lint: Done'
|
|
||||||
|
|||||||
@@ -1,15 +1,8 @@
|
|||||||
# formatting
|
# formatting
|
||||||
yapf==0.32.0
|
pre-commit==4.0.1
|
||||||
toml==0.10.2
|
|
||||||
tomli==2.0.2
|
|
||||||
ruff==0.6.5
|
|
||||||
codespell==2.3.0
|
|
||||||
isort==5.13.2
|
|
||||||
clang-format==18.1.5
|
|
||||||
sphinx-lint==1.0.0
|
|
||||||
|
|
||||||
# type checking
|
# type checking
|
||||||
mypy==1.15.0
|
mypy==1.11.1
|
||||||
types-PyYAML
|
types-PyYAML
|
||||||
types-requests
|
types-requests
|
||||||
types-setuptools
|
types-setuptools
|
||||||
|
|||||||
@@ -324,8 +324,10 @@ def test_stop_via_update_from_output():
|
|||||||
|
|
||||||
model_output = ModelRunnerOutput(
|
model_output = ModelRunnerOutput(
|
||||||
req_ids=[req.request_id for req in requests],
|
req_ids=[req.request_id for req in requests],
|
||||||
req_id_to_index={req.request_id: i
|
req_id_to_index={
|
||||||
for i, req in enumerate(requests)},
|
req.request_id: i
|
||||||
|
for i, req in enumerate(requests)
|
||||||
|
},
|
||||||
sampled_token_ids=[[EOS_TOKEN_ID],
|
sampled_token_ids=[[EOS_TOKEN_ID],
|
||||||
[10,
|
[10,
|
||||||
11]], # First request hits EOS, second continues
|
11]], # First request hits EOS, second continues
|
||||||
@@ -374,8 +376,10 @@ def test_stop_via_update_from_output():
|
|||||||
|
|
||||||
model_output = ModelRunnerOutput(
|
model_output = ModelRunnerOutput(
|
||||||
req_ids=[req.request_id for req in requests],
|
req_ids=[req.request_id for req in requests],
|
||||||
req_id_to_index={req.request_id: i
|
req_id_to_index={
|
||||||
for i, req in enumerate(requests)},
|
req.request_id: i
|
||||||
|
for i, req in enumerate(requests)
|
||||||
|
},
|
||||||
sampled_token_ids=[[10, 42, 12],
|
sampled_token_ids=[[10, 42, 12],
|
||||||
[13, 14]], # First request hits stop token
|
[13, 14]], # First request hits stop token
|
||||||
spec_token_ids=None,
|
spec_token_ids=None,
|
||||||
@@ -422,8 +426,10 @@ def test_stop_via_update_from_output():
|
|||||||
|
|
||||||
model_output = ModelRunnerOutput(
|
model_output = ModelRunnerOutput(
|
||||||
req_ids=[req.request_id for req in requests],
|
req_ids=[req.request_id for req in requests],
|
||||||
req_id_to_index={req.request_id: i
|
req_id_to_index={
|
||||||
for i, req in enumerate(requests)},
|
req.request_id: i
|
||||||
|
for i, req in enumerate(requests)
|
||||||
|
},
|
||||||
sampled_token_ids=[[10, 11, 12],
|
sampled_token_ids=[[10, 11, 12],
|
||||||
[13]], # First request exceeds max_tokens
|
[13]], # First request exceeds max_tokens
|
||||||
spec_token_ids=None,
|
spec_token_ids=None,
|
||||||
|
|||||||
@@ -21,8 +21,8 @@ def get_masked_input_and_mask_ref(
|
|||||||
added_vocab_start_index: int,
|
added_vocab_start_index: int,
|
||||||
added_vocab_end_index: int) -> Tuple[torch.Tensor, torch.Tensor]:
|
added_vocab_end_index: int) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||||
"""Reference implementation for verification"""
|
"""Reference implementation for verification"""
|
||||||
org_vocab_mask = (input_ >= org_vocab_start_index) & (input_ <
|
org_vocab_mask = (input_ >= org_vocab_start_index) & (
|
||||||
org_vocab_end_index)
|
input_ < org_vocab_end_index)
|
||||||
added_vocab_mask = (input_ >= added_vocab_start_index) & (
|
added_vocab_mask = (input_ >= added_vocab_start_index) & (
|
||||||
input_ < added_vocab_end_index)
|
input_ < added_vocab_end_index)
|
||||||
added_offset = added_vocab_start_index - (
|
added_offset = added_vocab_start_index - (
|
||||||
|
|||||||
@@ -394,8 +394,8 @@ def test_rejection_sampling_approximates_target_distribution():
|
|||||||
distance_wrt_reference)
|
distance_wrt_reference)
|
||||||
|
|
||||||
expected_improvement_multiplier = 20
|
expected_improvement_multiplier = 20
|
||||||
assert (relative_change_in_distance_wrt_target >
|
assert (relative_change_in_distance_wrt_target
|
||||||
relative_change_in_distance_wrt_reference *
|
> relative_change_in_distance_wrt_reference *
|
||||||
expected_improvement_multiplier)
|
expected_improvement_multiplier)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -231,8 +231,10 @@ def test_stop_via_update_from_output():
|
|||||||
|
|
||||||
model_output = ModelRunnerOutput(
|
model_output = ModelRunnerOutput(
|
||||||
req_ids=[req.request_id for req in requests],
|
req_ids=[req.request_id for req in requests],
|
||||||
req_id_to_index={req.request_id: i
|
req_id_to_index={
|
||||||
for i, req in enumerate(requests)},
|
req.request_id: i
|
||||||
|
for i, req in enumerate(requests)
|
||||||
|
},
|
||||||
sampled_token_ids=[[EOS_TOKEN_ID],
|
sampled_token_ids=[[EOS_TOKEN_ID],
|
||||||
[10,
|
[10,
|
||||||
11]], # First request hits EOS, second continues
|
11]], # First request hits EOS, second continues
|
||||||
@@ -279,8 +281,10 @@ def test_stop_via_update_from_output():
|
|||||||
|
|
||||||
model_output = ModelRunnerOutput(
|
model_output = ModelRunnerOutput(
|
||||||
req_ids=[req.request_id for req in requests],
|
req_ids=[req.request_id for req in requests],
|
||||||
req_id_to_index={req.request_id: i
|
req_id_to_index={
|
||||||
for i, req in enumerate(requests)},
|
req.request_id: i
|
||||||
|
for i, req in enumerate(requests)
|
||||||
|
},
|
||||||
sampled_token_ids=[[10, 42, 12],
|
sampled_token_ids=[[10, 42, 12],
|
||||||
[13, 14]], # First request hits stop token
|
[13, 14]], # First request hits stop token
|
||||||
spec_token_ids=None,
|
spec_token_ids=None,
|
||||||
@@ -325,8 +329,10 @@ def test_stop_via_update_from_output():
|
|||||||
|
|
||||||
model_output = ModelRunnerOutput(
|
model_output = ModelRunnerOutput(
|
||||||
req_ids=[req.request_id for req in requests],
|
req_ids=[req.request_id for req in requests],
|
||||||
req_id_to_index={req.request_id: i
|
req_id_to_index={
|
||||||
for i, req in enumerate(requests)},
|
req.request_id: i
|
||||||
|
for i, req in enumerate(requests)
|
||||||
|
},
|
||||||
sampled_token_ids=[[10, 11, 12],
|
sampled_token_ids=[[10, 11, 12],
|
||||||
[13]], # First request exceeds max_tokens
|
[13]], # First request exceeds max_tokens
|
||||||
spec_token_ids=None,
|
spec_token_ids=None,
|
||||||
|
|||||||
104
tools/enforce_regex_import.py
Normal file
104
tools/enforce_regex_import.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
# Copyright 2023 The vLLM team.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# This file is a part of the vllm-ascend project.
|
||||||
|
# Adapted from https://github.com/vllm-project/vllm/tree/main/tools
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import regex as re
|
||||||
|
|
||||||
|
FORBIDDEN_PATTERNS = re.compile(
|
||||||
|
r'^\s*(?:import\s+re(?:$|\s|,)|from\s+re\s+import)')
|
||||||
|
ALLOWED_PATTERNS = [
|
||||||
|
re.compile(r'^\s*import\s+regex\s+as\s+re\s*$'),
|
||||||
|
re.compile(r'^\s*import\s+regex\s*$'),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def get_staged_python_files() -> list[str]:
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
['git', 'diff', '--cached', '--name-only', '--diff-filter=AM'],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=True)
|
||||||
|
files = result.stdout.strip().split(
|
||||||
|
'\n') if result.stdout.strip() else []
|
||||||
|
return [f for f in files if f.endswith('.py')]
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def is_forbidden_import(line: str) -> bool:
|
||||||
|
line = line.strip()
|
||||||
|
return bool(
|
||||||
|
FORBIDDEN_PATTERNS.match(line)
|
||||||
|
and not any(pattern.match(line) for pattern in ALLOWED_PATTERNS))
|
||||||
|
|
||||||
|
|
||||||
|
def check_file(filepath: str) -> list[tuple[int, str]]:
|
||||||
|
violations = []
|
||||||
|
try:
|
||||||
|
with open(filepath, encoding='utf-8') as f:
|
||||||
|
for line_num, line in enumerate(f, 1):
|
||||||
|
if is_forbidden_import(line):
|
||||||
|
violations.append((line_num, line.strip()))
|
||||||
|
except (OSError, UnicodeDecodeError):
|
||||||
|
pass
|
||||||
|
return violations
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
files = get_staged_python_files()
|
||||||
|
if not files:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
total_violations = 0
|
||||||
|
|
||||||
|
for filepath in files:
|
||||||
|
if not Path(filepath).exists():
|
||||||
|
continue
|
||||||
|
|
||||||
|
if filepath == "setup.py":
|
||||||
|
continue
|
||||||
|
|
||||||
|
violations = check_file(filepath)
|
||||||
|
if violations:
|
||||||
|
print(f"\n❌ {filepath}:")
|
||||||
|
for line_num, line in violations:
|
||||||
|
print(f" Line {line_num}: {line}")
|
||||||
|
total_violations += 1
|
||||||
|
|
||||||
|
if total_violations > 0:
|
||||||
|
print(f"\n💡 Found {total_violations} violation(s).")
|
||||||
|
print("❌ Please replace 'import re' with 'import regex as re'")
|
||||||
|
print(
|
||||||
|
" Also replace 'from re import ...' with 'from regex import ...'"
|
||||||
|
) # noqa: E501
|
||||||
|
print("✅ Allowed imports:")
|
||||||
|
print(" - import regex as re")
|
||||||
|
print(" - import regex") # noqa: E501
|
||||||
|
return 1
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
@@ -20,12 +20,16 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
CI=${1:-0}
|
CI=${1:-0}
|
||||||
PYTHON_VERSION=${2:-3.9}
|
PYTHON_VERSION=${2:-local}
|
||||||
|
|
||||||
if [ "$CI" -eq 1 ]; then
|
if [ "$CI" -eq 1 ]; then
|
||||||
set -e
|
set -e
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ $PYTHON_VERSION == "local" ]; then
|
||||||
|
PYTHON_VERSION=$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')
|
||||||
|
fi
|
||||||
|
|
||||||
run_mypy() {
|
run_mypy() {
|
||||||
echo "Running mypy on $1"
|
echo "Running mypy on $1"
|
||||||
mypy --check-untyped-defs --follow-imports skip --python-version "${PYTHON_VERSION}" "$@"
|
mypy --check-untyped-defs --follow-imports skip --python-version "${PYTHON_VERSION}" "$@"
|
||||||
|
|||||||
@@ -39,3 +39,7 @@ if ! [ -x "$(command -v shellcheck)" ]; then
|
|||||||
PATH="$PATH:$(pwd)/shellcheck-${scversion}"
|
PATH="$PATH:$(pwd)/shellcheck-${scversion}"
|
||||||
export PATH
|
export PATH
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# should enable this
|
||||||
|
# find . -path ./.git -prune -o -name "*.sh" -print0 \
|
||||||
|
# | xargs -0 -I {} sh -c 'git check-ignore -q "{}" || shellcheck -s bash "{}"'
|
||||||
|
|||||||
177
typos.toml
Normal file
177
typos.toml
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
[files]
|
||||||
|
# these files may be written in non english words
|
||||||
|
extend-exclude = []
|
||||||
|
ignore-hidden = true
|
||||||
|
ignore-files = true
|
||||||
|
ignore-dot = true
|
||||||
|
ignore-vcs = true
|
||||||
|
ignore-global = true
|
||||||
|
ignore-parent = true
|
||||||
|
|
||||||
|
[default]
|
||||||
|
binary = false
|
||||||
|
check-filename = false
|
||||||
|
check-file = true
|
||||||
|
unicode = true
|
||||||
|
ignore-hex = true
|
||||||
|
identifier-leading-digits = false
|
||||||
|
locale = "en"
|
||||||
|
extend-ignore-identifiers-re = [".*Unc.*", ".*_thw",
|
||||||
|
".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*", ".*fo.*", ".*ba.*",
|
||||||
|
".*ot.*", ".*[Tt]h[rR].*"]
|
||||||
|
extend-ignore-words-re = ["CANN", "cann"]
|
||||||
|
extend-ignore-re = []
|
||||||
|
|
||||||
|
[default.extend-identifiers]
|
||||||
|
nd_to_nz_2d = "nd_to_nz_2d"
|
||||||
|
bbc5b7ede = "bbc5b7ede"
|
||||||
|
womens_doubles = "womens_doubles"
|
||||||
|
v_2nd = "v_2nd"
|
||||||
|
splitted_input = "splitted_input"
|
||||||
|
NOOPs = "NOOPs"
|
||||||
|
typ = "typ"
|
||||||
|
nin_shortcut = "nin_shortcut"
|
||||||
|
UperNetDecoder = "UperNetDecoder"
|
||||||
|
subtile = "subtile"
|
||||||
|
SFOuput = "SFOuput"
|
||||||
|
# huggingface transformers repo uses these words
|
||||||
|
depthwise_seperable_out_channel = "depthwise_seperable_out_channel"
|
||||||
|
DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d"
|
||||||
|
depthwise_seperable_CNN = "depthwise_seperable_CNN"
|
||||||
|
|
||||||
|
[default.extend-words]
|
||||||
|
iy = "iy"
|
||||||
|
tendencias = "tendencias"
|
||||||
|
# intel cpu features
|
||||||
|
tme = "tme"
|
||||||
|
dout = "dout"
|
||||||
|
Pn = "Pn"
|
||||||
|
arange = "arange"
|
||||||
|
|
||||||
|
[type.py]
|
||||||
|
extend-glob = []
|
||||||
|
extend-ignore-identifiers-re = []
|
||||||
|
extend-ignore-words-re = []
|
||||||
|
extend-ignore-re = []
|
||||||
|
|
||||||
|
[type.py.extend-identifiers]
|
||||||
|
arange = "arange"
|
||||||
|
NDArray = "NDArray"
|
||||||
|
EOFError = "EOFError"
|
||||||
|
|
||||||
|
[type.py.extend-words]
|
||||||
|
|
||||||
|
[type.cpp]
|
||||||
|
extend-glob = []
|
||||||
|
extend-ignore-identifiers-re = []
|
||||||
|
extend-ignore-words-re = []
|
||||||
|
extend-ignore-re = []
|
||||||
|
|
||||||
|
[type.cpp.extend-identifiers]
|
||||||
|
countr_one = "countr_one"
|
||||||
|
|
||||||
|
[type.cpp.extend-words]
|
||||||
|
|
||||||
|
[type.rust]
|
||||||
|
extend-glob = []
|
||||||
|
extend-ignore-identifiers-re = []
|
||||||
|
extend-ignore-words-re = []
|
||||||
|
extend-ignore-re = []
|
||||||
|
|
||||||
|
[type.rust.extend-identifiers]
|
||||||
|
flate2 = "flate2"
|
||||||
|
|
||||||
|
[type.rust.extend-words]
|
||||||
|
ser = "ser"
|
||||||
|
|
||||||
|
[type.lock]
|
||||||
|
extend-glob = []
|
||||||
|
check-file = false
|
||||||
|
extend-ignore-identifiers-re = []
|
||||||
|
extend-ignore-words-re = []
|
||||||
|
extend-ignore-re = []
|
||||||
|
|
||||||
|
[type.lock.extend-identifiers]
|
||||||
|
|
||||||
|
[type.lock.extend-words]
|
||||||
|
|
||||||
|
[type.jl]
|
||||||
|
extend-glob = []
|
||||||
|
extend-ignore-identifiers-re = []
|
||||||
|
extend-ignore-words-re = []
|
||||||
|
extend-ignore-re = []
|
||||||
|
|
||||||
|
[type.jl.extend-identifiers]
|
||||||
|
|
||||||
|
[type.jl.extend-words]
|
||||||
|
modul = "modul"
|
||||||
|
egals = "egals"
|
||||||
|
usig = "usig"
|
||||||
|
egal = "egal"
|
||||||
|
|
||||||
|
[type.go]
|
||||||
|
extend-glob = []
|
||||||
|
extend-ignore-identifiers-re = []
|
||||||
|
extend-ignore-words-re = []
|
||||||
|
extend-ignore-re = []
|
||||||
|
|
||||||
|
[type.go.extend-identifiers]
|
||||||
|
flate = "flate"
|
||||||
|
|
||||||
|
[type.go.extend-words]
|
||||||
|
|
||||||
|
[type.css]
|
||||||
|
extend-glob = []
|
||||||
|
extend-ignore-identifiers-re = []
|
||||||
|
extend-ignore-words-re = []
|
||||||
|
extend-ignore-re = []
|
||||||
|
|
||||||
|
[type.css.extend-identifiers]
|
||||||
|
nd = "nd"
|
||||||
|
|
||||||
|
[type.css.extend-words]
|
||||||
|
|
||||||
|
[type.man]
|
||||||
|
extend-glob = []
|
||||||
|
extend-ignore-identifiers-re = []
|
||||||
|
extend-ignore-words-re = []
|
||||||
|
extend-ignore-re = []
|
||||||
|
|
||||||
|
[type.man.extend-identifiers]
|
||||||
|
Nd = "Nd"
|
||||||
|
|
||||||
|
[type.man.extend-words]
|
||||||
|
|
||||||
|
[type.cert]
|
||||||
|
extend-glob = []
|
||||||
|
check-file = false
|
||||||
|
extend-ignore-identifiers-re = []
|
||||||
|
extend-ignore-words-re = []
|
||||||
|
extend-ignore-re = []
|
||||||
|
|
||||||
|
[type.cert.extend-identifiers]
|
||||||
|
|
||||||
|
[type.cert.extend-words]
|
||||||
|
|
||||||
|
[type.sh]
|
||||||
|
extend-glob = []
|
||||||
|
extend-ignore-identifiers-re = []
|
||||||
|
extend-ignore-words-re = []
|
||||||
|
extend-ignore-re = []
|
||||||
|
|
||||||
|
[type.sh.extend-identifiers]
|
||||||
|
stap = "stap"
|
||||||
|
ot = "ot"
|
||||||
|
|
||||||
|
[type.sh.extend-words]
|
||||||
|
|
||||||
|
[type.vimscript]
|
||||||
|
extend-glob = []
|
||||||
|
extend-ignore-identifiers-re = []
|
||||||
|
extend-ignore-words-re = []
|
||||||
|
extend-ignore-re = []
|
||||||
|
|
||||||
|
[type.vimscript.extend-identifiers]
|
||||||
|
windo = "windo"
|
||||||
|
|
||||||
|
[type.vimscript.extend-words]
|
||||||
@@ -232,7 +232,7 @@ class AscendScheduler(Scheduler):
|
|||||||
token_budget -= num_new_tokens
|
token_budget -= num_new_tokens
|
||||||
request.status = RequestStatus.RUNNING
|
request.status = RequestStatus.RUNNING
|
||||||
request.num_computed_tokens = num_computed_tokens
|
request.num_computed_tokens = num_computed_tokens
|
||||||
# Count the number of prifix cached tokens.
|
# Count the number of prefix cached tokens.
|
||||||
if request.num_cached_tokens < 0:
|
if request.num_cached_tokens < 0:
|
||||||
request.num_cached_tokens = num_computed_tokens
|
request.num_cached_tokens = num_computed_tokens
|
||||||
|
|
||||||
|
|||||||
@@ -199,8 +199,11 @@ class SimpleConnector(KVConnectorBase):
|
|||||||
model_executable: torch.nn.Module,
|
model_executable: torch.nn.Module,
|
||||||
model_input: "ModelInputForGPUWithSamplingMetadata",
|
model_input: "ModelInputForGPUWithSamplingMetadata",
|
||||||
kv_caches: List[torch.Tensor],
|
kv_caches: List[torch.Tensor],
|
||||||
) -> Tuple[Union[torch.Tensor, IntermediateTensors], bool,
|
) -> Tuple[
|
||||||
"ModelInputForGPUWithSamplingMetadata", ]:
|
Union[torch.Tensor, IntermediateTensors],
|
||||||
|
bool,
|
||||||
|
"ModelInputForGPUWithSamplingMetadata",
|
||||||
|
]:
|
||||||
bypass_model_exec = True
|
bypass_model_exec = True
|
||||||
|
|
||||||
model_config = self.model_config
|
model_config = self.model_config
|
||||||
|
|||||||
@@ -108,7 +108,8 @@ class CustomDeepSeekMultiTokenPredictor(DeepSeekMultiTokenPredictor):
|
|||||||
self.num_mtp_layers = config.num_nextn_predict_layers
|
self.num_mtp_layers = config.num_nextn_predict_layers
|
||||||
# to map the exact layer index from weights
|
# to map the exact layer index from weights
|
||||||
self.layers = torch.nn.ModuleDict({
|
self.layers = torch.nn.ModuleDict({
|
||||||
str(idx): CustomDeepSeekMultiTokenPredictorLayer(
|
str(idx):
|
||||||
|
CustomDeepSeekMultiTokenPredictorLayer(
|
||||||
config,
|
config,
|
||||||
f"{prefix}.layers.{idx}",
|
f"{prefix}.layers.{idx}",
|
||||||
model_config=vllm_config.model_config,
|
model_config=vllm_config.model_config,
|
||||||
|
|||||||
@@ -79,8 +79,9 @@ def process_topk_ids(topk_ids: torch.Tensor, expert_num: int, ep_size: int,
|
|||||||
experts_per_ep_rank_val).to(original_dtype)
|
experts_per_ep_rank_val).to(original_dtype)
|
||||||
indices_arange = torch.arange(topk_ids.shape[0], device=device)
|
indices_arange = torch.arange(topk_ids.shape[0], device=device)
|
||||||
|
|
||||||
is_new_segment = torch.cat((torch.tensor([True], device=device),
|
is_new_segment = torch.cat(
|
||||||
assigned_ep_rank[1:] != assigned_ep_rank[:-1]))
|
(torch.tensor([True], device=device), assigned_ep_rank[1:]
|
||||||
|
!= assigned_ep_rank[:-1]))
|
||||||
temp_start_markers = torch.full_like(indices_arange,
|
temp_start_markers = torch.full_like(indices_arange,
|
||||||
-1,
|
-1,
|
||||||
dtype=indices_arange.dtype)
|
dtype=indices_arange.dtype)
|
||||||
@@ -469,13 +470,13 @@ def fused_experts_with_all2all_buffer(
|
|||||||
expert_idx_buffer_scatter.shape,
|
expert_idx_buffer_scatter.shape,
|
||||||
dtype=expert_idx_buffer_scatter.dtype,
|
dtype=expert_idx_buffer_scatter.dtype,
|
||||||
device=expert_idx_buffer_scatter.device)
|
device=expert_idx_buffer_scatter.device)
|
||||||
non_pad_len = torch.sum(
|
non_pad_len = torch.sum((expert_idx_buffer_scatter
|
||||||
(expert_idx_buffer_scatter != global_num_experts).to(torch.int32))
|
!= global_num_experts).to(torch.int32))
|
||||||
hidden_states_pad_idx[
|
hidden_states_pad_idx[expert_idx_buffer_scatter !=
|
||||||
expert_idx_buffer_scatter != global_num_experts] = torch.arange(
|
global_num_experts] = torch.arange(
|
||||||
non_pad_len,
|
non_pad_len,
|
||||||
dtype=expert_idx_buffer_scatter.dtype,
|
dtype=expert_idx_buffer_scatter.dtype,
|
||||||
device=hidden_states.device)
|
device=hidden_states.device)
|
||||||
|
|
||||||
hidden_states_buffer_scatter = hidden_states[hidden_states_pad_idx]
|
hidden_states_buffer_scatter = hidden_states[hidden_states_pad_idx]
|
||||||
expert_idx_buffer_gather = torch.empty_like(
|
expert_idx_buffer_gather = torch.empty_like(
|
||||||
@@ -528,8 +529,8 @@ def fused_experts_with_all2all_buffer(
|
|||||||
dist.all_to_all_single(hidden_states_gatter,
|
dist.all_to_all_single(hidden_states_gatter,
|
||||||
hidden_states_scatter,
|
hidden_states_scatter,
|
||||||
group=ep_group.device_group)
|
group=ep_group.device_group)
|
||||||
hidden_states_gatter = hidden_states_gatter[
|
hidden_states_gatter = hidden_states_gatter[expert_idx_buffer_scatter !=
|
||||||
expert_idx_buffer_scatter != global_num_experts]
|
global_num_experts]
|
||||||
if hidden_states_gatter.shape[0] != row_idx_len:
|
if hidden_states_gatter.shape[0] != row_idx_len:
|
||||||
hidden_states = torch.zeros((row_idx_len, hidden_states.shape[1]),
|
hidden_states = torch.zeros((row_idx_len, hidden_states.shape[1]),
|
||||||
dtype=hidden_states.dtype,
|
dtype=hidden_states.dtype,
|
||||||
|
|||||||
@@ -30,8 +30,8 @@ def get_masked_input_and_mask(
|
|||||||
added_vocab_end_index: int) -> Tuple[torch.Tensor, torch.Tensor]:
|
added_vocab_end_index: int) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||||
# torch.compile will fuse all of the pointwise ops below
|
# torch.compile will fuse all of the pointwise ops below
|
||||||
# into a single kernel, making it very fast
|
# into a single kernel, making it very fast
|
||||||
org_vocab_mask = (input_ >= org_vocab_start_index) & (input_ <
|
org_vocab_mask = (input_ >= org_vocab_start_index) & (
|
||||||
org_vocab_end_index)
|
input_ < org_vocab_end_index)
|
||||||
added_vocab_mask = (input_ >= added_vocab_start_index) & (
|
added_vocab_mask = (input_ >= added_vocab_start_index) & (
|
||||||
input_ < added_vocab_end_index)
|
input_ < added_vocab_end_index)
|
||||||
added_offset = added_vocab_start_index - (
|
added_offset = added_vocab_start_index - (
|
||||||
|
|||||||
@@ -880,8 +880,8 @@ class NPUModelRunner(LoRAModelRunnerMixin):
|
|||||||
assert total_num_scheduled_tokens > 0
|
assert total_num_scheduled_tokens > 0
|
||||||
num_reqs = self.input_batch.num_reqs
|
num_reqs = self.input_batch.num_reqs
|
||||||
assert num_reqs > 0
|
assert num_reqs > 0
|
||||||
if (self.use_aclgraph and
|
if (self.use_aclgraph and total_num_scheduled_tokens
|
||||||
total_num_scheduled_tokens <= self.aclgraph_batch_sizes[-1]):
|
<= self.aclgraph_batch_sizes[-1]):
|
||||||
# Add padding to the batch size.
|
# Add padding to the batch size.
|
||||||
num_input_tokens = self.vllm_config.pad_for_cudagraph(
|
num_input_tokens = self.vllm_config.pad_for_cudagraph(
|
||||||
total_num_scheduled_tokens)
|
total_num_scheduled_tokens)
|
||||||
|
|||||||
Reference in New Issue
Block a user