[Test] Remove VLLM_USE_V1 in example and tests (#1733)
V1 is enabled by default, no need to set it by hand now. This PR remove
the useless setting in example and tests
- vLLM version: v0.9.2
- vLLM main:
9ad0a4588b
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
69
.github/workflows/vllm_ascend_test.yaml
vendored
69
.github/workflows/vllm_ascend_test.yaml
vendored
@@ -41,16 +41,10 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
# Only trigger lint on pull request
|
||||
if: ${{ github.event_name == 'pull_request' }}
|
||||
uses: ./.github/workflows/pre-commit.yml
|
||||
|
||||
changes:
|
||||
# Only trigger changes on pull request
|
||||
if: ${{ github.event_name == 'pull_request' }}
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
outputs:
|
||||
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
|
||||
ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
|
||||
@@ -60,20 +54,24 @@ jobs:
|
||||
with:
|
||||
filters: |
|
||||
e2e_tracker:
|
||||
- '.github/workflows/vllm_ascend_test.yaml'
|
||||
- 'vllm_ascend/**'
|
||||
- 'csrc/**'
|
||||
- 'cmake/**'
|
||||
- 'tests/e2e/**'
|
||||
- 'tests/conftest.py'
|
||||
- 'tests/model_utils.py'
|
||||
- 'tests/utils.py'
|
||||
- 'CMakeLists.txt'
|
||||
- 'setup.py'
|
||||
- 'requirements.txt'
|
||||
- 'requirements-dev.txt'
|
||||
- 'requirements-lint.txt'
|
||||
- 'packages.txt'
|
||||
ut_tracker:
|
||||
- 'tests/ut/**'
|
||||
ut:
|
||||
needs: [lint, changes]
|
||||
name: unit test
|
||||
# only trigger unit test after lint passed and the change is e2e and ut related. Or the PR is merged.
|
||||
if: ${{ github.event_name == 'push' || (needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true')) }}
|
||||
# only trigger unit test after lint passed and the change is e2e and ut related.
|
||||
if: ${{ needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true') }}
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
|
||||
@@ -112,9 +110,8 @@ jobs:
|
||||
python3 -m pip install -r requirements-dev.txt --extra-index https://download.pytorch.org/whl/cpu/
|
||||
python3 -m pip install -v . --extra-index https://download.pytorch.org/whl/cpu/
|
||||
|
||||
- name: Run unit test for V1 Engine
|
||||
- name: Run unit test
|
||||
env:
|
||||
VLLM_USE_V1: 1
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
TORCH_DEVICE_BACKEND_AUTOLOAD: 0
|
||||
run: |
|
||||
@@ -133,8 +130,8 @@ jobs:
|
||||
|
||||
e2e:
|
||||
needs: [lint, changes]
|
||||
# only trigger e2e test after lint passed and the change is e2e related.
|
||||
if: ${{ needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' }}
|
||||
# only trigger e2e test after lint passed and the change is e2e related with pull request.
|
||||
if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' }}
|
||||
strategy:
|
||||
max-parallel: 2
|
||||
matrix:
|
||||
@@ -189,9 +186,8 @@ jobs:
|
||||
pip install -r requirements-dev.txt
|
||||
pip install -v -e .
|
||||
|
||||
- name: Run e2e test for V1 Engine
|
||||
- name: Run e2e test
|
||||
env:
|
||||
VLLM_USE_V1: 1
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
run: |
|
||||
@@ -213,26 +209,6 @@ jobs:
|
||||
# TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
|
||||
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
|
||||
|
||||
- name: Run e2e test on V0 engine
|
||||
if: ${{ github.event_name == 'schedule' }}
|
||||
env:
|
||||
VLLM_USE_V1: 0
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
run: |
|
||||
pytest -sv tests/e2e/singlecard/test_offline_inference.py
|
||||
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
|
||||
pytest -sv tests/e2e/singlecard/test_guided_decoding.py
|
||||
pytest -sv tests/e2e/singlecard/test_camem.py
|
||||
pytest -sv tests/e2e/singlecard/test_prompt_embedding.py
|
||||
pytest -sv tests/e2e/singlecard/test_embedding.py
|
||||
pytest -sv tests/e2e/singlecard/ \
|
||||
--ignore=tests/e2e/singlecard/test_offline_inference.py \
|
||||
--ignore=tests/e2e/singlecard/test_ilama_lora.py \
|
||||
--ignore=tests/e2e/singlecard/test_guided_decoding.py \
|
||||
--ignore=tests/e2e/singlecard/test_camem.py \
|
||||
--ignore=tests/e2e/singlecard/test_prompt_embedding.py \
|
||||
--ignore=tests/e2e/singlecard/test_embedding.py
|
||||
|
||||
e2e-4-cards:
|
||||
needs: [e2e]
|
||||
if: ${{ needs.e2e.result == 'success' }}
|
||||
@@ -290,9 +266,8 @@ jobs:
|
||||
pip install -r requirements-dev.txt
|
||||
pip install -v -e .
|
||||
|
||||
- name: Run vllm-project/vllm-ascend test for V1 Engine
|
||||
- name: Run vllm-project/vllm-ascend test
|
||||
env:
|
||||
VLLM_USE_V1: 1
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
run: |
|
||||
@@ -308,19 +283,3 @@ jobs:
|
||||
pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py \
|
||||
--ignore=tests/e2e/multicard/test_offline_inference_distributed.py \
|
||||
--ignore=tests/e2e/multicard/test_data_parallel.py
|
||||
|
||||
- name: Run vllm-project/vllm-ascend test on V0 engine
|
||||
if: ${{ github.event_name == 'schedule' }}
|
||||
env:
|
||||
VLLM_USE_V1: 0
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
run: |
|
||||
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
|
||||
# Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
|
||||
# To avoid oom, we need to run the test in a single process.
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
|
||||
pytest -sv tests/e2e/multicard/test_data_parallel.py
|
||||
pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py \
|
||||
--ignore=tests/e2e/multicard/test_offline_inference_distributed.py \
|
||||
--ignore=tests/e2e/multicard/test_data_parallel.py
|
||||
|
||||
Reference in New Issue
Block a user