From 35cb7b523488f02fc8b5c520e19c1dcfbee9d176 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Fri, 7 Mar 2025 09:47:13 +0800
Subject: [PATCH] [CI] Add dispatch job to leverage dynamic devices (#251)

### What this PR does / why we need it?
Add dispatch job to leverage jobs to dynamic devices include 2 stage as
below:

The dispatch job will spend extra about `10s * parallel number + 30s`
time to wait other job launch container and release lock.

- **Stage 1: Acquire lock**
add a dispatch job, this job use lockfile to acquire locks and then get
device number dynamically
- **Stage 2.1: Launch container with dynamic device**
pass the device number via output and start the container job with
dynamic device
- **Stage 2.2: Release lock**
once the job started, release the lock.

In the backend, we use multiple path to setup multiple self host runners
as load balancer:
```
$ pwd
/home/action
$ ll | grep actions
drwx------   6 action action 4096 Mar  7 08:55 actions-runner-01
drwx------   6 action action 4096 Mar  7 08:55 actions-runner-02
drwx------   6 action action 4096 Mar  7 08:55 actions-runner-03
drwx------   6 action action 4096 Mar  7 08:56 actions-runner-04
drwx------   4 action action 4096 Jan 24 22:08 actions-runner-05
drwx------   4 action action 4096 Jan 24 22:08 actions-runner-06
```

```
adduser -G docker action
su action
pip3 install docker prettytable
sudo yum install procmail
```

### Does this PR introduce _any_ user-facing change?
NO

### How was this patch tested?
- CI passed
- E2E test manully, triggered 3 jobs in parallel:
- [1st
job](https://github.com/vllm-project/vllm-ascend/actions/runs/13711345757/job/38348309297)
dispatch to /dev/davinci2.
- [2nd
job](https://github.com/vllm-project/vllm-ascend/actions/runs/13711348739/job/38348316250)
dispatch to /dev/davinci3
- [3rd
job](https://github.com/vllm-project/vllm-ascend/actions/runs/13711351493/job/38348324551)
dispatch to /dev/davinci4

Signed-off-by: Yikun Jiang <yikunkero@gmail.com>
---
 .github/workflows/vllm_ascend_test.yaml | 31 ++++++++++++++++++++++---
 tests/test_offline_inference.py         |  1 +
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml
index 832ec65..ea3d673 100644
--- a/.github/workflows/vllm_ascend_test.yaml
+++ b/.github/workflows/vllm_ascend_test.yaml
@@ -47,9 +47,30 @@ defaults:
     shell: bash -el {0}
 
 jobs:
+  dispatch:
+    name: vLLM Ascend test (dispatch)
+    runs-on: ascend-ci-arm64
+    outputs:
+      number: ${{ steps.dispatch-device.outputs.number }}
+    steps:
+      - name: vLLM Ascend test (dispatch)
+        id: dispatch-device
+        run: |
+          # Try to acquire lock to dispatch devices
+          lockfile /tmp/dispatch.lock
+
+          # Print npu info
+          npu-list /dev/null 2>&1
+
+          # Select first available device (Skip reserved davinci1 and davinci0)
+          NUMBER=$(npu-list /dev/null 2>&1 | grep None | grep -v davinci1 | grep -v davinci0 | head -1 | cut -b 15)
+          echo "Dispatch to /dev/davinci$NUMBER"
+          echo "number=$NUMBER" >> $GITHUB_OUTPUT
+
   test:
+    needs: [dispatch]
     name: vLLM Ascend test (self-host)
-    runs-on: ascend-arm64  # actionlint-ignore: runner-label
+    runs-on: ascend-ci-arm64  # actionlint-ignore: runner-label
 
     container:
       image: quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py3.10
@@ -58,9 +79,11 @@ jobs:
         - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
         - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/
         # Use self-host cache speed up pip and model download
-        - /home/action/actions-runner/_work/cache:/github/home/.cache/
+        - /home/action/cache:/github/home/.cache/
+        # for dispatch lock
+        - /tmp/:/tmp/
       options: >-
-        --device /dev/davinci6
+        --device /dev/davinci${{ needs.dispatch.outputs.number }}
         --device /dev/davinci_manager
         --device /dev/devmm_svm
         --device /dev/hisi_hdc
@@ -71,6 +94,8 @@ jobs:
         run: |
           npu-smi info
           cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+          # unlock
+          rm -rf /tmp/dispatch.lock
 
       - name: Config mirrors
         run: |
diff --git a/tests/test_offline_inference.py b/tests/test_offline_inference.py
index 484bce6..6ad5c96 100644
--- a/tests/test_offline_inference.py
+++ b/tests/test_offline_inference.py
@@ -32,6 +32,7 @@ MODELS = [
     "Qwen/Qwen2.5-0.5B-Instruct",
 ]
 os.environ["VLLM_USE_MODELSCOPE"] = "True"
+os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"
 
 TARGET_TEST_SUITE = os.environ.get("TARGET_TEST_SUITE", "L4")