[Bugfix] Fix deepseek percision issue and add acc ci for it (#905)

### What this PR does / why we need it? Fix deepseek percision issue on V0 and add acc ci for it Fixes https://github.com/vllm-project/vllm-ascend/issues/1062 ### How was this patch tested? CI passed with new added test. Signed-off-by: MengqingCao <cmq0113@163.com>
2025-06-04 20:26:44 +08:00
parent da9acfca60
commit afc4c0cd03
9 changed files with 121 additions and 43 deletions
--- a/.github/workflows/vllm_ascend_test_long_term.yaml
+++ b/.github/workflows/vllm_ascend_test_long_term.yaml
@@ -41,9 +41,19 @@ jobs:
    strategy:
      max-parallel: 2
      matrix:
+        os: [linux-arm64-npu-1, linux-arm64-npu-4]
        vllm_version: [main, v0.9.0]
+    concurrency:
+      group: >
+        ${{
+        matrix.os == 'linux-arm64-npu-4'
+          && github.event.pull_request.number
+          && format('pr-{0}-limit-npu-4-long-term', github.event.pull_request.number)
+        || format('job-{0}-{1}-{2}-long-term', matrix.os, matrix.vllm_version, github.event.pull_request.number)
+        }}
+      cancel-in-progress: false
    name: vLLM Ascend long term test
-    runs-on: linux-arm64-npu-1
+    runs-on: ${{ matrix.os }}
    container:
      # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
      image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
@@ -92,8 +102,13 @@ jobs:

      - name: Run vllm-project/vllm-ascend long term test
        run: |
-          # spec decode test
-          VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
-          VLLM_USE_MODELSCOPE=true pytest -sv tests/long_term/spec_decode/e2e/test_v1_spec_decode.py
-          VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_mtp_correctness.py  # it needs a clean process
-          pytest -sv tests/long_term/spec_decode --ignore=tests/long_term/spec_decode/e2e/test_mtp_correctness.py --ignore=tests/long_term/spec_decode/e2e/test_v1_spec_decode.py --ignore=tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
+          if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
+            # spec decode test
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_v1_spec_decode.py
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_mtp_correctness.py  # it needs a clean process
+            pytest -sv tests/long_term/spec_decode --ignore=tests/long_term/spec_decode/e2e/test_mtp_correctness.py --ignore=tests/long_term/spec_decode/e2e/test_v1_spec_decode.py --ignore=tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
+            pytest -sv tests/long_term/test_accuracy.py
+          else
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/test_deepseek_v2_lite_tp2_accuracy.py
+          fi