[Test] Remove VLLM_USE_V1 in example and tests (#1733)

V1 is enabled by default, no need to set it by hand now. This PR remove the useless setting in example and tests - vLLM version: v0.9.2 - vLLM main: 9ad0a4588b Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-07-15 12:49:57 +08:00
parent eb921d2b6f
commit 787010a637
29 changed files with 186 additions and 291 deletions
--- a/.github/workflows/vllm_ascend_test.yaml
+++ b/.github/workflows/vllm_ascend_test.yaml
@@ -41,16 +41,10 @@ concurrency:

 jobs:
  lint:
-    # Only trigger lint on pull request
-    if: ${{ github.event_name == 'pull_request' }}
    uses: ./.github/workflows/pre-commit.yml

  changes:
-    # Only trigger changes on pull request
-    if: ${{ github.event_name == 'pull_request' }}
    runs-on: ubuntu-latest
-    permissions:
-      pull-requests: read
    outputs:
      e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
      ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
@@ -60,20 +54,24 @@ jobs:
      with:
        filters: |
          e2e_tracker:
+            - '.github/workflows/vllm_ascend_test.yaml'
            - 'vllm_ascend/**'
            - 'csrc/**'
            - 'cmake/**'
            - 'tests/e2e/**'
-            - 'tests/conftest.py'
-            - 'tests/model_utils.py'
-            - 'tests/utils.py'
+            - 'CMakeLists.txt'
+            - 'setup.py'
+            - 'requirements.txt'
+            - 'requirements-dev.txt'
+            - 'requirements-lint.txt'
+            - 'packages.txt'
          ut_tracker:
            - 'tests/ut/**'
  ut:
    needs: [lint, changes]
    name: unit test
-    # only trigger unit test after lint passed and the change is e2e and ut related. Or the PR is merged.
-    if: ${{ github.event_name == 'push' || (needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true')) }}
+    # only trigger unit test after lint passed and the change is e2e and ut related.
+    if: ${{ needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true') }}
    runs-on: ubuntu-latest
    container:
      image: quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
@@ -112,9 +110,8 @@ jobs:
          python3 -m pip install -r requirements-dev.txt --extra-index https://download.pytorch.org/whl/cpu/
          python3 -m pip install -v . --extra-index https://download.pytorch.org/whl/cpu/

-      - name: Run unit test for V1 Engine
+      - name: Run unit test
        env:
-          VLLM_USE_V1: 1
          VLLM_WORKER_MULTIPROC_METHOD: spawn
          TORCH_DEVICE_BACKEND_AUTOLOAD: 0
        run: |
@@ -133,8 +130,8 @@ jobs:

  e2e:
    needs: [lint, changes]
-    # only trigger e2e test after lint passed and the change is e2e related.
-    if: ${{ needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' }}
+    # only trigger e2e test after lint passed and the change is e2e related with pull request.
+    if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' }}
    strategy:
      max-parallel: 2
      matrix:
@@ -189,9 +186,8 @@ jobs:
          pip install -r requirements-dev.txt
          pip install -v -e .

-      - name: Run e2e test for V1 Engine
+      - name: Run e2e test
        env:
-          VLLM_USE_V1: 1
          VLLM_WORKER_MULTIPROC_METHOD: spawn
          VLLM_USE_MODELSCOPE: True
        run: |
@@ -213,26 +209,6 @@ jobs:
          # TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
          VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py

-      - name: Run e2e test on V0 engine
-        if: ${{ github.event_name == 'schedule' }}
-        env:
-          VLLM_USE_V1: 0
-          VLLM_USE_MODELSCOPE: True
-        run: |
-          pytest -sv tests/e2e/singlecard/test_offline_inference.py
-          pytest -sv tests/e2e/singlecard/test_ilama_lora.py
-          pytest -sv tests/e2e/singlecard/test_guided_decoding.py
-          pytest -sv tests/e2e/singlecard/test_camem.py
-          pytest -sv tests/e2e/singlecard/test_prompt_embedding.py
-          pytest -sv tests/e2e/singlecard/test_embedding.py
-          pytest -sv tests/e2e/singlecard/ \
-            --ignore=tests/e2e/singlecard/test_offline_inference.py \
-            --ignore=tests/e2e/singlecard/test_ilama_lora.py \
-            --ignore=tests/e2e/singlecard/test_guided_decoding.py \
-            --ignore=tests/e2e/singlecard/test_camem.py \
-            --ignore=tests/e2e/singlecard/test_prompt_embedding.py \
-            --ignore=tests/e2e/singlecard/test_embedding.py
-
  e2e-4-cards:
    needs: [e2e]
    if: ${{ needs.e2e.result == 'success' }}
@@ -290,9 +266,8 @@ jobs:
          pip install -r requirements-dev.txt
          pip install -v -e .

-      - name: Run vllm-project/vllm-ascend test for V1 Engine
+      - name: Run vllm-project/vllm-ascend test
        env:
-          VLLM_USE_V1: 1
          VLLM_WORKER_MULTIPROC_METHOD: spawn
          VLLM_USE_MODELSCOPE: True
        run: |
@@ -308,19 +283,3 @@ jobs:
          pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py \
            --ignore=tests/e2e/multicard/test_offline_inference_distributed.py \
            --ignore=tests/e2e/multicard/test_data_parallel.py
-
-      - name: Run vllm-project/vllm-ascend test on V0 engine
-        if: ${{ github.event_name == 'schedule' }}
-        env:
-          VLLM_USE_V1: 0
-          VLLM_USE_MODELSCOPE: True
-        run: |
-          pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
-          # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
-          # To avoid oom, we need to run the test in a single process.
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
-          pytest -sv tests/e2e/multicard/test_data_parallel.py
-          pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py \
-            --ignore=tests/e2e/multicard/test_offline_inference_distributed.py \
-            --ignore=tests/e2e/multicard/test_data_parallel.py