[CI] Optimize nightly CI (#3858)
### What this PR does / why we need it?
This patch optimize nightly CI:
1. Bug fixes ais_bench get None repo_type error
2. Fix A2 install kubectl error with arm arch
3. Fix the multi_node CI unable to determine whether the job was
successful error
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.11.0rc3
- vLLM main:
83f478bb19
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
@@ -15,7 +15,7 @@ from tests.e2e.nightly.multi_node.config.utils import (get_avaliable_port,
|
||||
|
||||
setup_logger()
|
||||
logger = logging.getLogger(__name__)
|
||||
DISAGGREGATED_PREFILL_PROXY_SCRIPT = "examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py"
|
||||
DISAGGREGATED_PREFILL_PROXY_SCRIPT = "examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py"
|
||||
DISAGGEGATED_PREFILL_PORT = 5333
|
||||
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ spec:
|
||||
replicas: {{ replicas | default(1) }}
|
||||
leaderWorkerTemplate:
|
||||
size: {{ size | default(2) }}
|
||||
restartPolicy: RecreateGroupOnPodRestart
|
||||
restartPolicy: None
|
||||
leaderTemplate:
|
||||
metadata:
|
||||
labels:
|
||||
@@ -30,6 +30,10 @@ spec:
|
||||
value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }}
|
||||
- name: RESULT_FILE_PATH
|
||||
value: {{ result_file_path | default("/root/.cache/tests/ret/test_result.txt") }}
|
||||
- name: CONTROLLER_NAME
|
||||
value: {{ controller_name | default("placeholder") }}
|
||||
- name: SECRET
|
||||
value: {{ kb_secret | default("placeholder") }}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
|
||||
@@ -162,14 +162,31 @@ kill_npu_processes() {
|
||||
}
|
||||
|
||||
run_tests() {
|
||||
pytest -sv tests/e2e/nightly/multi_node/test_multi_node.py
|
||||
set +e
|
||||
kill_npu_processes
|
||||
pytest -sv tests/e2e/nightly/multi_node/test_multi_node.py
|
||||
ret=$?
|
||||
if [ "$LWS_WORKER_INDEX" -eq 0 ]; then
|
||||
mkdir -p "$(dirname "$RESULT_FILE_PATH")"
|
||||
echo $ret > "$RESULT_FILE_PATH"
|
||||
if [ $ret -eq 0 ]; then
|
||||
print_success "All tests passed!"
|
||||
else
|
||||
print_error "Some tests failed!"
|
||||
kubectl delete pod $CONTROLLER_NAME -n vllm-project
|
||||
fi
|
||||
fi
|
||||
return $ret
|
||||
set -e
|
||||
}
|
||||
|
||||
install_kubectl() {
|
||||
arch=$(uname -m)
|
||||
KUBECTL=/root/.cache/.kube/kubectl
|
||||
if echo "$arch" | grep -qiE "arm|aarch64"; then
|
||||
echo "Detected ARM architecture: $arch"
|
||||
KUBECTL="$KUBECTL"_arm
|
||||
fi
|
||||
install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
|
||||
echo "$SECRET" | base64 -d > /tmp/kubeconfig
|
||||
export KUBECONFIG=/tmp/kubeconfig
|
||||
}
|
||||
|
||||
main() {
|
||||
@@ -177,6 +194,7 @@ main() {
|
||||
check_and_config
|
||||
checkout_src
|
||||
install_sys_dependencies
|
||||
install_kubectl
|
||||
install_vllm
|
||||
install_ais_bench
|
||||
# to speed up mooncake build process, install Go here
|
||||
|
||||
Reference in New Issue
Block a user