[Test] Remove VLLM_USE_V1 in example and tests (#1733)

V1 is enabled by default, no need to set it by hand now. This PR remove
the useless setting in example and tests

- vLLM version: v0.9.2
- vLLM main:
9ad0a4588b

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-07-15 12:49:57 +08:00
committed by GitHub
parent eb921d2b6f
commit 787010a637
29 changed files with 186 additions and 291 deletions

View File

@@ -41,16 +41,10 @@ concurrency:
jobs:
lint:
# Only trigger lint on pull request
if: ${{ github.event_name == 'pull_request' }}
uses: ./.github/workflows/pre-commit.yml
changes:
# Only trigger changes on pull request
if: ${{ github.event_name == 'pull_request' }}
runs-on: ubuntu-latest
permissions:
pull-requests: read
outputs:
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
@@ -60,20 +54,24 @@ jobs:
with:
filters: |
e2e_tracker:
- '.github/workflows/vllm_ascend_test.yaml'
- 'vllm_ascend/**'
- 'csrc/**'
- 'cmake/**'
- 'tests/e2e/**'
- 'tests/conftest.py'
- 'tests/model_utils.py'
- 'tests/utils.py'
- 'CMakeLists.txt'
- 'setup.py'
- 'requirements.txt'
- 'requirements-dev.txt'
- 'requirements-lint.txt'
- 'packages.txt'
ut_tracker:
- 'tests/ut/**'
ut:
needs: [lint, changes]
name: unit test
# only trigger unit test after lint passed and the change is e2e and ut related. Or the PR is merged.
if: ${{ github.event_name == 'push' || (needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true')) }}
# only trigger unit test after lint passed and the change is e2e and ut related.
if: ${{ needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true') }}
runs-on: ubuntu-latest
container:
image: quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
@@ -112,9 +110,8 @@ jobs:
python3 -m pip install -r requirements-dev.txt --extra-index https://download.pytorch.org/whl/cpu/
python3 -m pip install -v . --extra-index https://download.pytorch.org/whl/cpu/
- name: Run unit test for V1 Engine
- name: Run unit test
env:
VLLM_USE_V1: 1
VLLM_WORKER_MULTIPROC_METHOD: spawn
TORCH_DEVICE_BACKEND_AUTOLOAD: 0
run: |
@@ -133,8 +130,8 @@ jobs:
e2e:
needs: [lint, changes]
# only trigger e2e test after lint passed and the change is e2e related.
if: ${{ needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' }}
# only trigger e2e test after lint passed and the change is e2e related with pull request.
if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' }}
strategy:
max-parallel: 2
matrix:
@@ -189,9 +186,8 @@ jobs:
pip install -r requirements-dev.txt
pip install -v -e .
- name: Run e2e test for V1 Engine
- name: Run e2e test
env:
VLLM_USE_V1: 1
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
run: |
@@ -213,26 +209,6 @@ jobs:
# TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
- name: Run e2e test on V0 engine
if: ${{ github.event_name == 'schedule' }}
env:
VLLM_USE_V1: 0
VLLM_USE_MODELSCOPE: True
run: |
pytest -sv tests/e2e/singlecard/test_offline_inference.py
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
pytest -sv tests/e2e/singlecard/test_guided_decoding.py
pytest -sv tests/e2e/singlecard/test_camem.py
pytest -sv tests/e2e/singlecard/test_prompt_embedding.py
pytest -sv tests/e2e/singlecard/test_embedding.py
pytest -sv tests/e2e/singlecard/ \
--ignore=tests/e2e/singlecard/test_offline_inference.py \
--ignore=tests/e2e/singlecard/test_ilama_lora.py \
--ignore=tests/e2e/singlecard/test_guided_decoding.py \
--ignore=tests/e2e/singlecard/test_camem.py \
--ignore=tests/e2e/singlecard/test_prompt_embedding.py \
--ignore=tests/e2e/singlecard/test_embedding.py
e2e-4-cards:
needs: [e2e]
if: ${{ needs.e2e.result == 'success' }}
@@ -290,9 +266,8 @@ jobs:
pip install -r requirements-dev.txt
pip install -v -e .
- name: Run vllm-project/vllm-ascend test for V1 Engine
- name: Run vllm-project/vllm-ascend test
env:
VLLM_USE_V1: 1
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
run: |
@@ -308,19 +283,3 @@ jobs:
pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py \
--ignore=tests/e2e/multicard/test_offline_inference_distributed.py \
--ignore=tests/e2e/multicard/test_data_parallel.py
- name: Run vllm-project/vllm-ascend test on V0 engine
if: ${{ github.event_name == 'schedule' }}
env:
VLLM_USE_V1: 0
VLLM_USE_MODELSCOPE: True
run: |
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
# Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
# To avoid oom, we need to run the test in a single process.
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
pytest -sv tests/e2e/multicard/test_data_parallel.py
pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py \
--ignore=tests/e2e/multicard/test_offline_inference_distributed.py \
--ignore=tests/e2e/multicard/test_data_parallel.py