[Bugfix] Fix deepseek percision issue and add acc ci for it (#905)
### What this PR does / why we need it? Fix deepseek percision issue on V0 and add acc ci for it Fixes https://github.com/vllm-project/vllm-ascend/issues/1062 ### How was this patch tested? CI passed with new added test. Signed-off-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
@@ -41,9 +41,19 @@ jobs:
|
||||
strategy:
|
||||
max-parallel: 2
|
||||
matrix:
|
||||
os: [linux-arm64-npu-1, linux-arm64-npu-4]
|
||||
vllm_version: [main, v0.9.0]
|
||||
concurrency:
|
||||
group: >
|
||||
${{
|
||||
matrix.os == 'linux-arm64-npu-4'
|
||||
&& github.event.pull_request.number
|
||||
&& format('pr-{0}-limit-npu-4-long-term', github.event.pull_request.number)
|
||||
|| format('job-{0}-{1}-{2}-long-term', matrix.os, matrix.vllm_version, github.event.pull_request.number)
|
||||
}}
|
||||
cancel-in-progress: false
|
||||
name: vLLM Ascend long term test
|
||||
runs-on: linux-arm64-npu-1
|
||||
runs-on: ${{ matrix.os }}
|
||||
container:
|
||||
# TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
|
||||
image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
|
||||
@@ -92,8 +102,13 @@ jobs:
|
||||
|
||||
- name: Run vllm-project/vllm-ascend long term test
|
||||
run: |
|
||||
# spec decode test
|
||||
VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
|
||||
VLLM_USE_MODELSCOPE=true pytest -sv tests/long_term/spec_decode/e2e/test_v1_spec_decode.py
|
||||
VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_mtp_correctness.py # it needs a clean process
|
||||
pytest -sv tests/long_term/spec_decode --ignore=tests/long_term/spec_decode/e2e/test_mtp_correctness.py --ignore=tests/long_term/spec_decode/e2e/test_v1_spec_decode.py --ignore=tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
|
||||
if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
|
||||
# spec decode test
|
||||
VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
|
||||
VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_v1_spec_decode.py
|
||||
VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_mtp_correctness.py # it needs a clean process
|
||||
pytest -sv tests/long_term/spec_decode --ignore=tests/long_term/spec_decode/e2e/test_mtp_correctness.py --ignore=tests/long_term/spec_decode/e2e/test_v1_spec_decode.py --ignore=tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
|
||||
pytest -sv tests/long_term/test_accuracy.py
|
||||
else
|
||||
VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/test_deepseek_v2_lite_tp2_accuracy.py
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user