From 35cb7b523488f02fc8b5c520e19c1dcfbee9d176 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Fri, 7 Mar 2025 09:47:13 +0800 Subject: [PATCH] [CI] Add dispatch job to leverage dynamic devices (#251) ### What this PR does / why we need it? Add dispatch job to leverage jobs to dynamic devices include 2 stage as below: The dispatch job will spend extra about `10s * parallel number + 30s` time to wait other job launch container and release lock. - **Stage 1: Acquire lock** add a dispatch job, this job use lockfile to acquire locks and then get device number dynamically - **Stage 2.1: Launch container with dynamic device** pass the device number via output and start the container job with dynamic device - **Stage 2.2: Release lock** once the job started, release the lock. In the backend, we use multiple path to setup multiple self host runners as load balancer: ``` $ pwd /home/action $ ll | grep actions drwx------ 6 action action 4096 Mar 7 08:55 actions-runner-01 drwx------ 6 action action 4096 Mar 7 08:55 actions-runner-02 drwx------ 6 action action 4096 Mar 7 08:55 actions-runner-03 drwx------ 6 action action 4096 Mar 7 08:56 actions-runner-04 drwx------ 4 action action 4096 Jan 24 22:08 actions-runner-05 drwx------ 4 action action 4096 Jan 24 22:08 actions-runner-06 ``` ``` adduser -G docker action su action pip3 install docker prettytable sudo yum install procmail ``` ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? - CI passed - E2E test manully, triggered 3 jobs in parallel: - [1st job](https://github.com/vllm-project/vllm-ascend/actions/runs/13711345757/job/38348309297) dispatch to /dev/davinci2. - [2nd job](https://github.com/vllm-project/vllm-ascend/actions/runs/13711348739/job/38348316250) dispatch to /dev/davinci3 - [3rd job](https://github.com/vllm-project/vllm-ascend/actions/runs/13711351493/job/38348324551) dispatch to /dev/davinci4 Signed-off-by: Yikun Jiang --- .github/workflows/vllm_ascend_test.yaml | 31 ++++++++++++++++++++++--- tests/test_offline_inference.py | 1 + 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index 832ec65..ea3d673 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -47,9 +47,30 @@ defaults: shell: bash -el {0} jobs: + dispatch: + name: vLLM Ascend test (dispatch) + runs-on: ascend-ci-arm64 + outputs: + number: ${{ steps.dispatch-device.outputs.number }} + steps: + - name: vLLM Ascend test (dispatch) + id: dispatch-device + run: | + # Try to acquire lock to dispatch devices + lockfile /tmp/dispatch.lock + + # Print npu info + npu-list /dev/null 2>&1 + + # Select first available device (Skip reserved davinci1 and davinci0) + NUMBER=$(npu-list /dev/null 2>&1 | grep None | grep -v davinci1 | grep -v davinci0 | head -1 | cut -b 15) + echo "Dispatch to /dev/davinci$NUMBER" + echo "number=$NUMBER" >> $GITHUB_OUTPUT + test: + needs: [dispatch] name: vLLM Ascend test (self-host) - runs-on: ascend-arm64 # actionlint-ignore: runner-label + runs-on: ascend-ci-arm64 # actionlint-ignore: runner-label container: image: quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py3.10 @@ -58,9 +79,11 @@ jobs: - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ # Use self-host cache speed up pip and model download - - /home/action/actions-runner/_work/cache:/github/home/.cache/ + - /home/action/cache:/github/home/.cache/ + # for dispatch lock + - /tmp/:/tmp/ options: >- - --device /dev/davinci6 + --device /dev/davinci${{ needs.dispatch.outputs.number }} --device /dev/davinci_manager --device /dev/devmm_svm --device /dev/hisi_hdc @@ -71,6 +94,8 @@ jobs: run: | npu-smi info cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + # unlock + rm -rf /tmp/dispatch.lock - name: Config mirrors run: | diff --git a/tests/test_offline_inference.py b/tests/test_offline_inference.py index 484bce6..6ad5c96 100644 --- a/tests/test_offline_inference.py +++ b/tests/test_offline_inference.py @@ -32,6 +32,7 @@ MODELS = [ "Qwen/Qwen2.5-0.5B-Instruct", ] os.environ["VLLM_USE_MODELSCOPE"] = "True" +os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256" TARGET_TEST_SUITE = os.environ.get("TARGET_TEST_SUITE", "L4")