[DP] Tiny fix of dp and update example (#1273)
### What this PR does / why we need it? Add `max_num_tokens_across_dp` to AscendMetadata to fix dp This pr fixes the bug introduced by https://github.com/vllm-project/vllm-ascend/pull/1229, which add an arg `max_num_tokens_across_dp` when dp_size > 1. Signed-off-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
10
.github/workflows/vllm_ascend_test.yaml
vendored
10
.github/workflows/vllm_ascend_test.yaml
vendored
@@ -363,7 +363,10 @@ jobs:
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_dbo
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeekV3_dbo
|
||||
pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py --ignore=tests/e2e/multicard/test_offline_inference_distributed.py
|
||||
pytest -sv tests/e2e/multicard/test_data_parallel.py
|
||||
pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py \
|
||||
--ignore=tests/e2e/multicard/test_offline_inference_distributed.py \
|
||||
--ignore=tests/e2e/multicard/test_data_parallel.py
|
||||
|
||||
- name: Run vllm-project/vllm-ascend test on V0 engine
|
||||
if: ${{ github.event_name == 'schedule' }}
|
||||
@@ -380,4 +383,7 @@ jobs:
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
|
||||
pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py --ignore=tests/e2e/multicard/test_offline_inference_distributed.py
|
||||
pytest -sv tests/e2e/multicard/test_data_parallel.py
|
||||
pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py \
|
||||
--ignore=tests/e2e/multicard/test_offline_inference_distributed.py \
|
||||
--ignore=tests/e2e/multicard/test_data_parallel.py
|
||||
|
||||
Reference in New Issue
Block a user