[CI/UT][Refactor] move e2e spec decode and deepseek acc test to per pr (#1136)
### What this PR does / why we need it?
1. run deepseek acc ut per pr --- multicard CI time increased by 9 min
2. run spec decode e2e test on v1 per pr --- singlecard CI time
increased by 3 min (partly is disabled due to not work now)
~~3. align the output of whether dbo is enabled or not~~
The generated results with and without dbo cannot be aligned.
https://github.com/vllm-project/vllm-ascend/actions/runs/15822900528/job/44600029405?pr=1136
4. skip V0 mtp test due to failure in
https://github.com/vllm-project/vllm-ascend/actions/runs/16012172833/job/45171988816
5. fix some version conflicts
### How was this patch tested?
CI passed with new added test.
---------
Signed-off-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
12
.github/workflows/vllm_ascend_test.yaml
vendored
12
.github/workflows/vllm_ascend_test.yaml
vendored
@@ -267,7 +267,13 @@ jobs:
|
||||
--ignore=tests/e2e/singlecard/test_ilama_lora.py \
|
||||
--ignore=tests/e2e/singlecard/test_guided_decoding.py \
|
||||
--ignore=tests/e2e/singlecard/test_camem.py \
|
||||
--ignore=tests/e2e/singlecard/test_embedding.py
|
||||
--ignore=tests/e2e/singlecard/test_embedding.py \
|
||||
--ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py \
|
||||
--ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
|
||||
# ------------------------------------ v1 spec decode test ------------------------------------ #
|
||||
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
|
||||
# TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
|
||||
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
|
||||
|
||||
- name: Run e2e test on V0 engine
|
||||
if: ${{ github.event_name == 'schedule' }}
|
||||
@@ -287,8 +293,6 @@ jobs:
|
||||
--ignore=tests/e2e/singlecard/test_guided_decoding.py \
|
||||
--ignore=tests/e2e/singlecard/test_camem.py \
|
||||
--ignore=tests/e2e/singlecard/test_prompt_embedding.py \
|
||||
--ignore=tests/e2e/singlecard/core/test_ascend_scheduler.py \
|
||||
--ignore=tests/e2e/singlecard/core/test_ascend_scheduler_e2e.py \
|
||||
--ignore=tests/e2e/singlecard/test_embedding.py
|
||||
|
||||
e2e-4-cards:
|
||||
@@ -359,7 +363,6 @@ jobs:
|
||||
# To avoid oom, we need to run the test in a single process.
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_dbo
|
||||
@@ -379,7 +382,6 @@ jobs:
|
||||
# Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
|
||||
# To avoid oom, we need to run the test in a single process.
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
|
||||
pytest -sv tests/e2e/multicard/test_data_parallel.py
|
||||
|
||||
@@ -97,12 +97,9 @@ jobs:
|
||||
run: |
|
||||
if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
|
||||
# v0 spec decode test
|
||||
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode_v0/e2e/test_mtp_correctness.py # it needs a clean process
|
||||
# TODO: Revert me when test_mtp_correctness is fixed
|
||||
# VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode_v0/e2e/test_mtp_correctness.py # it needs a clean process
|
||||
pytest -sv tests/e2e/long_term/spec_decode_v0 --ignore=tests/e2e/long_term/spec_decode_v0/e2e/test_mtp_correctness.py
|
||||
# v1 spec decode test
|
||||
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode_v1/test_v1_mtp_correctness.py
|
||||
# TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
|
||||
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode_v1/test_v1_spec_decode.py
|
||||
# accuracy test single card
|
||||
pytest -sv tests/e2e/long_term/test_accuracy.py
|
||||
else
|
||||
|
||||
Reference in New Issue
Block a user