### What this PR does / why we need it?
This patch optimize nightly CI:
1. Bug fixes ais_bench get None repo_type error
2. Fix A2 install kubectl error with arm arch
3. Fix the multi_node CI unable to determine whether the job was
successful error
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.11.0rc3
- vLLM main:
83f478bb19
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
58 lines
1.6 KiB
YAML
58 lines
1.6 KiB
YAML
name: 'resource clear'
|
|
|
|
on:
|
|
workflow_call:
|
|
inputs:
|
|
runner:
|
|
required: false
|
|
type: string
|
|
default: linux-aarch64-a3-0
|
|
secrets:
|
|
KUBECONFIG_B64:
|
|
required: true
|
|
|
|
|
|
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
|
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
|
# It's used to activate ascend-toolkit environment variables.
|
|
defaults:
|
|
run:
|
|
shell: bash -el {0}
|
|
|
|
jobs:
|
|
resource_clear:
|
|
# This is a runner with no NPU for k8s controller
|
|
runs-on: ${{ inputs.runner }}
|
|
container:
|
|
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
|
env:
|
|
KUBECONFIG: /tmp/kubeconfig
|
|
KUBECTL: /root/.cache/.kube/kubectl
|
|
NAMESPACE: vllm-project
|
|
LEADER_POD: vllm-0
|
|
RESULT_FILE: /root/.cache/tests/ret/test_result.txt
|
|
steps:
|
|
- name: Install kubectl
|
|
run: |
|
|
# Install kubectl
|
|
arch=$(uname -m)
|
|
|
|
if echo "$arch" | grep -qiE "arm|aarch64"; then
|
|
echo "Detected ARM architecture: $arch"
|
|
KUBECTL="$KUBECTL"_arm
|
|
fi
|
|
install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
|
|
|
|
# Verify kubectl installation
|
|
kubectl version --client=true
|
|
|
|
- name: Decode kubeconfig from secrets
|
|
run: |
|
|
# Decode and save kubeconfig
|
|
echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
|
|
|
|
- name: Clear LWS resources
|
|
if: always()
|
|
run: |
|
|
kubectl delete leaderworkerset vllm -n "$NAMESPACE" --ignore-not-found
|