[CI] Optimize nightly CI (#3898)
### What this PR does / why we need it?
This patch mainly fix the the problem of not being able to determine the
exit status of the pod's entrypoint script and some other tiny
optimizations:
1. Shorten wait for server timeout
2. fix typo
3. fix the issue of ais_bench failing to correctly access the proxy URL
in a PD separation scenario.
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.11.0
- vLLM main:
83f478bb19
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
24
.github/workflows/_e2e_nightly_multi_node.yaml
vendored
24
.github/workflows/_e2e_nightly_multi_node.yaml
vendored
@@ -60,13 +60,13 @@ defaults:
|
|||||||
# only cancel in-progress runs of the same workflow
|
# only cancel in-progress runs of the same workflow
|
||||||
# and ignore the lint / 8 cards test type
|
# and ignore the lint / 8 cards test type
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ascend-nightly-${{ github.ref }}-${{ inputs.config_file_path }}
|
group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.config_file_path }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
e2e:
|
e2e:
|
||||||
name: ${{ inputs.config_file_path }}
|
name: ${{ inputs.config_file_path }}
|
||||||
# This is a runner with no NPU for k8s controller
|
# This is the runner with no NPU for k8s controller
|
||||||
runs-on: ${{ inputs.runner }}
|
runs-on: ${{ inputs.runner }}
|
||||||
container:
|
container:
|
||||||
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
||||||
@@ -75,7 +75,7 @@ jobs:
|
|||||||
KUBECTL: /root/.cache/.kube/kubectl
|
KUBECTL: /root/.cache/.kube/kubectl
|
||||||
NAMESPACE: vllm-project
|
NAMESPACE: vllm-project
|
||||||
LEADER_POD: vllm-0
|
LEADER_POD: vllm-0
|
||||||
RESULT_FILE: /root/.cache/tests/ret/test_result.txt
|
RESULT_FILE: /root/.cache/tests/ret_${{ inputs.soc_version }}
|
||||||
steps:
|
steps:
|
||||||
- name: Install system denpendencies
|
- name: Install system denpendencies
|
||||||
run: |
|
run: |
|
||||||
@@ -84,7 +84,7 @@ jobs:
|
|||||||
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
||||||
pip install jinja2-cli
|
pip install jinja2-cli
|
||||||
|
|
||||||
apt-get update -y && apt-get install -y git curl
|
#apt-get update -y && apt-get install -y git curl
|
||||||
|
|
||||||
- name: Install kubectl
|
- name: Install kubectl
|
||||||
run: |
|
run: |
|
||||||
@@ -117,8 +117,8 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
# pre clear the crd resources created by lws
|
# pre clear the crd resources created by lws
|
||||||
kubectl delete leaderworkerset vllm -n "$NAMESPACE" --ignore-not-found
|
kubectl delete leaderworkerset vllm -n "$NAMESPACE" --ignore-not-found
|
||||||
|
|
||||||
- name: Launch cluster
|
- name: Launch cluster
|
||||||
|
id: launcher
|
||||||
run: |
|
run: |
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
@@ -130,6 +130,8 @@ jobs:
|
|||||||
vllm_ascend_ref="${{ inputs.vllm_ascend_ref }}"
|
vllm_ascend_ref="${{ inputs.vllm_ascend_ref }}"
|
||||||
vllm_ascend_remote_url="${{ inputs.vllm_ascend_remote_url }}"
|
vllm_ascend_remote_url="${{ inputs.vllm_ascend_remote_url }}"
|
||||||
result_file_path="$RESULT_FILE"
|
result_file_path="$RESULT_FILE"
|
||||||
|
fail_tag=FAIL_TAG_"${{ inputs.config_file_path }}"
|
||||||
|
echo "FAIL_TAG=${fail_tag}" >> $GITHUB_ENV
|
||||||
|
|
||||||
required_params=("size" "replicas" "image" "config_file_path")
|
required_params=("size" "replicas" "image" "config_file_path")
|
||||||
for param in "${required_params[@]}"; do
|
for param in "${required_params[@]}"; do
|
||||||
@@ -155,8 +157,7 @@ jobs:
|
|||||||
-D vllm_ascend_ref="$vllm_ascend_ref" \
|
-D vllm_ascend_ref="$vllm_ascend_ref" \
|
||||||
-D result_file_path="$result_file_path" \
|
-D result_file_path="$result_file_path" \
|
||||||
-D npu_per_node="$npu_per_node" \
|
-D npu_per_node="$npu_per_node" \
|
||||||
-D controller_name="$HOSTNAME" \
|
-D fail_tag="$fail_tag" \
|
||||||
-D kb_secret=${{ secrets.KUBECONFIG_B64 }} \
|
|
||||||
--outfile lws.yaml
|
--outfile lws.yaml
|
||||||
|
|
||||||
kubectl apply -f ./lws.yaml
|
kubectl apply -f ./lws.yaml
|
||||||
@@ -180,7 +181,14 @@ jobs:
|
|||||||
|
|
||||||
- name: Stream logs
|
- name: Stream logs
|
||||||
run: |
|
run: |
|
||||||
kubectl logs -f "$LEADER_POD" -n "$NAMESPACE"
|
set -euo pipefail
|
||||||
|
echo "Looking for logs containing: $FAIL_TAG"
|
||||||
|
kubectl logs -f "$LEADER_POD" -n "$NAMESPACE" | while read -r line; do
|
||||||
|
echo "$line"
|
||||||
|
if echo "$line" | grep -q "$FAIL_TAG"; then
|
||||||
|
exit 1 # workflow step failed
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
- name: Post process
|
- name: Post process
|
||||||
if: always()
|
if: always()
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ defaults:
|
|||||||
# only cancel in-progress runs of the same workflow
|
# only cancel in-progress runs of the same workflow
|
||||||
# and ignore the lint / 1 card / 4 cards test type
|
# and ignore the lint / 1 card / 4 cards test type
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ascend-nightly-${{ github.ref }}-${{ inputs.tests }}
|
group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.tests }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
|||||||
@@ -42,6 +42,7 @@ concurrency:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
single-node-tests:
|
single-node-tests:
|
||||||
|
name: single-node
|
||||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
@@ -63,6 +64,7 @@ jobs:
|
|||||||
tests: ${{ matrix.test_config.tests }}
|
tests: ${{ matrix.test_config.tests }}
|
||||||
|
|
||||||
multi-node-tests:
|
multi-node-tests:
|
||||||
|
name: multi-node
|
||||||
needs: single-node-tests
|
needs: single-node-tests
|
||||||
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
|
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
|
||||||
strategy:
|
strategy:
|
||||||
@@ -71,10 +73,10 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
test_config:
|
test_config:
|
||||||
- name: multi-node-deepseek-dp
|
- name: multi-node-deepseek-dp
|
||||||
config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml
|
config_file_path: DeepSeek-R1-W8A8-A2.yaml
|
||||||
size: 2
|
size: 2
|
||||||
- name: multi-node-deepseek-dp-torchair
|
- name: multi-node-deepseek-dp-torchair
|
||||||
config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2-torchair.yaml
|
config_file_path: DeepSeek-R1-W8A8-A2-torchair.yaml
|
||||||
size: 2
|
size: 2
|
||||||
uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
|
uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
|
||||||
with:
|
with:
|
||||||
|
|||||||
@@ -42,6 +42,7 @@ concurrency:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
single-node-tests:
|
single-node-tests:
|
||||||
|
name: single-node
|
||||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
@@ -85,6 +86,7 @@ jobs:
|
|||||||
tests: ${{ matrix.test_config.tests }}
|
tests: ${{ matrix.test_config.tests }}
|
||||||
|
|
||||||
multi-node-tests:
|
multi-node-tests:
|
||||||
|
name: multi-node
|
||||||
needs: single-node-tests
|
needs: single-node-tests
|
||||||
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
|
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
|
||||||
strategy:
|
strategy:
|
||||||
@@ -93,19 +95,19 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
test_config:
|
test_config:
|
||||||
- name: multi-node-deepseek-pd
|
- name: multi-node-deepseek-pd
|
||||||
config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml
|
config_file_path: DeepSeek-V3.yaml
|
||||||
size: 2
|
size: 2
|
||||||
- name: multi-node-qwen3-dp
|
- name: multi-node-qwen3-dp
|
||||||
config_file_path: tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml
|
config_file_path: Qwen3-235B-A3B.yaml
|
||||||
size: 2
|
size: 2
|
||||||
- name: multi-node-dpsk-4node-pd
|
- name: multi-node-dpsk-4node-pd
|
||||||
config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml
|
config_file_path: DeepSeek-R1-W8A8.yaml
|
||||||
size: 4
|
size: 4
|
||||||
- name: multi-node-qwenw8a8-2node
|
- name: multi-node-qwenw8a8-2node
|
||||||
config_file_path: tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8.yaml
|
config_file_path: Qwen3-235B-W8A8.yaml
|
||||||
size: 2
|
size: 2
|
||||||
- name: multi-node-glm-2node
|
- name: multi-node-glm-2node
|
||||||
config_file_path: tests/e2e/nightly/multi_node/config/models/GLM-4_5.yaml
|
config_file_path: GLM-4_5.yaml
|
||||||
size: 2
|
size: 2
|
||||||
uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
|
uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
|
||||||
with:
|
with:
|
||||||
@@ -117,12 +119,3 @@ jobs:
|
|||||||
config_file_path: ${{ matrix.test_config.config_file_path }}
|
config_file_path: ${{ matrix.test_config.config_file_path }}
|
||||||
secrets:
|
secrets:
|
||||||
KUBECONFIG_B64: ${{ secrets.KUBECONFIG_B64 }}
|
KUBECONFIG_B64: ${{ secrets.KUBECONFIG_B64 }}
|
||||||
|
|
||||||
clear_resources:
|
|
||||||
needs: multi-node-tests
|
|
||||||
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
|
|
||||||
uses: ./.github/workflows/_kill_lws_resources.yaml
|
|
||||||
with:
|
|
||||||
runner: linux-aarch64-a3-0
|
|
||||||
secrets:
|
|
||||||
KUBECONFIG_B64: ${{ secrets.KUBECONFIG_B64 }}
|
|
||||||
|
|||||||
@@ -163,10 +163,11 @@ class RemoteOpenAIServer:
|
|||||||
self.proxy_port = proxy_port
|
self.proxy_port = proxy_port
|
||||||
|
|
||||||
self._start_server(model, vllm_serve_args, env_dict)
|
self._start_server(model, vllm_serve_args, env_dict)
|
||||||
max_wait_seconds = max_wait_seconds or 7200
|
max_wait_seconds = max_wait_seconds or 1800
|
||||||
if self.disaggregated_prefill:
|
if self.disaggregated_prefill:
|
||||||
assert proxy_port is not None, "for disaggregated_prefill, proxy port must be provided"
|
assert proxy_port is not None, "for disaggregated_prefill, proxy port must be provided"
|
||||||
self._wait_for_server_pd(proxy_port=proxy_port)
|
self._wait_for_server_pd(proxy_port=proxy_port,
|
||||||
|
timeout=max_wait_seconds)
|
||||||
else:
|
else:
|
||||||
self._wait_for_server(url=self.url_for("health"),
|
self._wait_for_server(url=self.url_for("health"),
|
||||||
timeout=max_wait_seconds)
|
timeout=max_wait_seconds)
|
||||||
@@ -186,7 +187,7 @@ class RemoteOpenAIServer:
|
|||||||
"""Subclasses override this method to customize process polling"""
|
"""Subclasses override this method to customize process polling"""
|
||||||
return self.proc.poll()
|
return self.proc.poll()
|
||||||
|
|
||||||
def hang_until_terminated(self) -> None:
|
def hang_until_terminated(self, url) -> None:
|
||||||
"""
|
"""
|
||||||
Wait until the server process terminates.
|
Wait until the server process terminates.
|
||||||
This is for headless mode, where the api server
|
This is for headless mode, where the api server
|
||||||
@@ -196,7 +197,7 @@ class RemoteOpenAIServer:
|
|||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
resp = client.get(self.url_for("health"), timeout=5)
|
resp = client.get(url, timeout=5)
|
||||||
if resp.status_code != 200:
|
if resp.status_code != 200:
|
||||||
break
|
break
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
@@ -206,7 +207,7 @@ class RemoteOpenAIServer:
|
|||||||
if isinstance(client, httpx.Client):
|
if isinstance(client, httpx.Client):
|
||||||
client.close()
|
client.close()
|
||||||
|
|
||||||
def _wait_for_server_pd(self, proxy_port: int):
|
def _wait_for_server_pd(self, proxy_port: int, timeout: float):
|
||||||
# Wait for all api_server nodes ready
|
# Wait for all api_server nodes ready
|
||||||
assert self.nodes_info is not None, "cluster info must be provided"
|
assert self.nodes_info is not None, "cluster info must be provided"
|
||||||
for node_info in self.nodes_info:
|
for node_info in self.nodes_info:
|
||||||
@@ -214,12 +215,12 @@ class RemoteOpenAIServer:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
url_health = f"http://{node_info.ip}:{node_info.server_port}/health"
|
url_health = f"http://{node_info.ip}:{node_info.server_port}/health"
|
||||||
self._wait_for_server(url=url_health, timeout=7200)
|
self._wait_for_server(url=url_health, timeout=timeout)
|
||||||
|
|
||||||
# Wait for proxy ready
|
# Wait for proxy ready
|
||||||
master_node = self.nodes_info[0]
|
master_node = self.nodes_info[0]
|
||||||
url_proxy = f"http://{master_node.ip}:{proxy_port}/healthcheck"
|
url_proxy = f"http://{master_node.ip}:{proxy_port}/healthcheck"
|
||||||
self._wait_for_server(url=url_proxy, timeout=7200)
|
self._wait_for_server(url=url_proxy, timeout=timeout)
|
||||||
|
|
||||||
def _wait_for_server(self, *, url: str, timeout: float):
|
def _wait_for_server(self, *, url: str, timeout: float):
|
||||||
# run health check
|
# run health check
|
||||||
|
|||||||
@@ -97,3 +97,12 @@ deployment:
|
|||||||
}
|
}
|
||||||
}'
|
}'
|
||||||
benchmarks:
|
benchmarks:
|
||||||
|
acc:
|
||||||
|
case_type: accuracy
|
||||||
|
dataset_path: vllm-ascend/gsm8k-lite
|
||||||
|
request_conf: vllm_api_general_chat
|
||||||
|
dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_chat_prompt
|
||||||
|
max_out_len: 4096
|
||||||
|
batch_size: 512
|
||||||
|
baseline: 95
|
||||||
|
threshold: 5
|
||||||
|
|||||||
@@ -47,3 +47,4 @@ deployment:
|
|||||||
--no-enable-prefix-caching
|
--no-enable-prefix-caching
|
||||||
--gpu-memory-utilization 0.9
|
--gpu-memory-utilization 0.9
|
||||||
benchmarks:
|
benchmarks:
|
||||||
|
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ setup_logger()
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
DISAGGREGATED_PREFILL_PROXY_SCRIPT = "examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py"
|
DISAGGREGATED_PREFILL_PROXY_SCRIPT = "examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py"
|
||||||
DISAGGEGATED_PREFILL_PORT = 5333
|
DISAGGEGATED_PREFILL_PORT = 5333
|
||||||
|
CONFIG_BASE_PATH = "tests/e2e/nightly/multi_node/config/models/"
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -187,9 +188,8 @@ class MultiNodeConfig:
|
|||||||
@classmethod
|
@classmethod
|
||||||
def from_yaml(cls, yaml_path: Optional[str] = None):
|
def from_yaml(cls, yaml_path: Optional[str] = None):
|
||||||
if not yaml_path:
|
if not yaml_path:
|
||||||
yaml_path = os.getenv(
|
yaml_path = os.getenv("CONFIG_YAML_PATH", "DeepSeek-V3.yaml")
|
||||||
"CONFIG_YAML_PATH",
|
yaml_path = os.path.join(CONFIG_BASE_PATH, yaml_path)
|
||||||
"tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml")
|
|
||||||
with open(yaml_path, 'r') as file:
|
with open(yaml_path, 'r') as file:
|
||||||
config_data = yaml.safe_load(file)
|
config_data = yaml.safe_load(file)
|
||||||
test_name = config_data.get("test_name", "default_test")
|
test_name = config_data.get("test_name", "default_test")
|
||||||
@@ -255,6 +255,7 @@ class MultiNodeConfig:
|
|||||||
ranktable_path = self.disaggregated_prefill.get("ranktable_path")
|
ranktable_path = self.disaggregated_prefill.get("ranktable_path")
|
||||||
assert ranktable_gen_path is not None and ranktable_path is not None
|
assert ranktable_gen_path is not None and ranktable_path is not None
|
||||||
if os.path.exists(str(ranktable_path)):
|
if os.path.exists(str(ranktable_path)):
|
||||||
|
logger.info("ranktable has already generated")
|
||||||
return
|
return
|
||||||
|
|
||||||
local_host = self.cur_ip
|
local_host = self.cur_ip
|
||||||
@@ -286,6 +287,8 @@ class MultiNodeConfig:
|
|||||||
assert self.nic_name is not None
|
assert self.nic_name is not None
|
||||||
env["GLOO_SOCKET_IFNAME"] = self.nic_name
|
env["GLOO_SOCKET_IFNAME"] = self.nic_name
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Generating ranktable from command: {' '.join(map(str, cmd))}")
|
||||||
subprocess.run(cmd, env=env, check=True)
|
subprocess.run(cmd, env=env, check=True)
|
||||||
assert os.path.exists(
|
assert os.path.exists(
|
||||||
str(ranktable_path)), "failed generate ranktable.json"
|
str(ranktable_path)), "failed generate ranktable.json"
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ spec:
|
|||||||
image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11") }}
|
image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11") }}
|
||||||
env:
|
env:
|
||||||
- name: CONFIG_YAML_PATH
|
- name: CONFIG_YAML_PATH
|
||||||
value: {{ config_file_path | default("tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml") }}
|
value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
|
||||||
- name: WORKSPACE
|
- name: WORKSPACE
|
||||||
value: "/root/workspace"
|
value: "/root/workspace"
|
||||||
# Set vLLM version and vLLM-Ascend version here, once there is a new release, update here.
|
# Set vLLM version and vLLM-Ascend version here, once there is a new release, update here.
|
||||||
@@ -29,11 +29,9 @@ spec:
|
|||||||
- name: VLLM_ASCEND_REMOTE_URL
|
- name: VLLM_ASCEND_REMOTE_URL
|
||||||
value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }}
|
value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }}
|
||||||
- name: RESULT_FILE_PATH
|
- name: RESULT_FILE_PATH
|
||||||
value: {{ result_file_path | default("/root/.cache/tests/ret/test_result.txt") }}
|
value: {{ result_file_path | default("/root/.cache/tests/ret") }}
|
||||||
- name: CONTROLLER_NAME
|
- name: FAIL_TAG
|
||||||
value: {{ controller_name | default("placeholder") }}
|
value: {{ fail_tag | default("FAIL_TAG") }}
|
||||||
- name: SECRET
|
|
||||||
value: {{ kb_secret | default("placeholder") }}
|
|
||||||
command:
|
command:
|
||||||
- sh
|
- sh
|
||||||
- -c
|
- -c
|
||||||
@@ -80,7 +78,7 @@ spec:
|
|||||||
image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11") }}
|
image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11") }}
|
||||||
env:
|
env:
|
||||||
- name: CONFIG_YAML_PATH
|
- name: CONFIG_YAML_PATH
|
||||||
value: {{ config_file_path | default("tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml") }}
|
value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
|
||||||
- name: WORKSPACE
|
- name: WORKSPACE
|
||||||
value: "/root/workspace"
|
value: "/root/workspace"
|
||||||
# Set vLLM version and vLLM-Ascend version here, once there is a new release, update here.
|
# Set vLLM version and vLLM-Ascend version here, once there is a new release, update here.
|
||||||
@@ -92,6 +90,8 @@ spec:
|
|||||||
value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }}
|
value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }}
|
||||||
- name: RESULT_FILE_PATH
|
- name: RESULT_FILE_PATH
|
||||||
value: {{ result_file_path | default("/root/.cache/tests/ret/test_result.txt") }}
|
value: {{ result_file_path | default("/root/.cache/tests/ret/test_result.txt") }}
|
||||||
|
- name: FAIL_TAG
|
||||||
|
value: {{ fail_tag | default("FAIL_TAG") }}
|
||||||
command:
|
command:
|
||||||
- sh
|
- sh
|
||||||
- -c
|
- -c
|
||||||
|
|||||||
@@ -20,6 +20,11 @@ print_section() {
|
|||||||
echo -e "\n${BLUE}=== $1 ===${NC}"
|
echo -e "\n${BLUE}=== $1 ===${NC}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
print_failure() {
|
||||||
|
echo -e "${RED}${FAIL_TAG} ✗ ERROR: $1${NC}"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
# Function to print success messages
|
# Function to print success messages
|
||||||
print_success() {
|
print_success() {
|
||||||
echo -e "${GREEN}✓ $1${NC}"
|
echo -e "${GREEN}✓ $1${NC}"
|
||||||
@@ -161,32 +166,24 @@ kill_npu_processes() {
|
|||||||
sleep 4
|
sleep 4
|
||||||
}
|
}
|
||||||
|
|
||||||
run_tests() {
|
run_tests_with_log() {
|
||||||
set +e
|
set +e
|
||||||
kill_npu_processes
|
kill_npu_processes
|
||||||
pytest -sv tests/e2e/nightly/multi_node/test_multi_node.py
|
BASENAME=$(basename "$CONFIG_YAML_PATH" .yaml)
|
||||||
ret=$?
|
# each worker should have log file
|
||||||
|
LOG_FILE="${RESULT_FILE_PATH}/${BASENAME}_worker_${LWS_WORKER_INDEX}.log"
|
||||||
|
mkdir -p ${RESULT_FILE_PATH}
|
||||||
|
pytest -sv tests/e2e/nightly/multi_node/test_multi_node.py 2>&1 | tee $LOG_FILE
|
||||||
|
ret=${PIPESTATUS[0]}
|
||||||
|
set -e
|
||||||
if [ "$LWS_WORKER_INDEX" -eq 0 ]; then
|
if [ "$LWS_WORKER_INDEX" -eq 0 ]; then
|
||||||
if [ $ret -eq 0 ]; then
|
if [ $ret -eq 0 ]; then
|
||||||
print_success "All tests passed!"
|
print_success "All tests passed!"
|
||||||
else
|
else
|
||||||
print_error "Some tests failed!"
|
print_failure "Some tests failed!"
|
||||||
kubectl delete pod $CONTROLLER_NAME -n vllm-project
|
mv LOG_FILE error_${LOG_FILE}
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
set -e
|
|
||||||
}
|
|
||||||
|
|
||||||
install_kubectl() {
|
|
||||||
arch=$(uname -m)
|
|
||||||
KUBECTL=/root/.cache/.kube/kubectl
|
|
||||||
if echo "$arch" | grep -qiE "arm|aarch64"; then
|
|
||||||
echo "Detected ARM architecture: $arch"
|
|
||||||
KUBECTL="$KUBECTL"_arm
|
|
||||||
fi
|
|
||||||
install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
|
|
||||||
echo "$SECRET" | base64 -d > /tmp/kubeconfig
|
|
||||||
export KUBECONFIG=/tmp/kubeconfig
|
|
||||||
}
|
}
|
||||||
|
|
||||||
main() {
|
main() {
|
||||||
@@ -194,7 +191,6 @@ main() {
|
|||||||
check_and_config
|
check_and_config
|
||||||
checkout_src
|
checkout_src
|
||||||
install_sys_dependencies
|
install_sys_dependencies
|
||||||
install_kubectl
|
|
||||||
install_vllm
|
install_vllm
|
||||||
install_ais_bench
|
install_ais_bench
|
||||||
# to speed up mooncake build process, install Go here
|
# to speed up mooncake build process, install Go here
|
||||||
@@ -203,7 +199,7 @@ main() {
|
|||||||
. $SRC_DIR/vllm-ascend/tests/e2e/nightly/multi_node/scripts/build_mooncake.sh \
|
. $SRC_DIR/vllm-ascend/tests/e2e/nightly/multi_node/scripts/build_mooncake.sh \
|
||||||
pooling_async_memecpy_v1 9d96b2e1dd76cc601d76b1b4c5f6e04605cd81d3
|
pooling_async_memecpy_v1 9d96b2e1dd76cc601d76b1b4c5f6e04605cd81d3
|
||||||
cd "$WORKSPACE/source_code/vllm-ascend"
|
cd "$WORKSPACE/source_code/vllm-ascend"
|
||||||
run_tests
|
run_tests_with_log
|
||||||
}
|
}
|
||||||
|
|
||||||
main "$@"
|
main "$@"
|
||||||
|
|||||||
@@ -118,6 +118,11 @@ async def test_multi_node() -> None:
|
|||||||
port = proxy_port if disaggregated_prefill else server_port
|
port = proxy_port if disaggregated_prefill else server_port
|
||||||
# aisbench test
|
# aisbench test
|
||||||
aisbench_cases = [acc_cmd, perf_cmd]
|
aisbench_cases = [acc_cmd, perf_cmd]
|
||||||
run_aisbench_cases(local_model_path, port, aisbench_cases)
|
run_aisbench_cases(local_model_path,
|
||||||
|
port,
|
||||||
|
aisbench_cases,
|
||||||
|
host_ip=config.cluster_ips[0])
|
||||||
else:
|
else:
|
||||||
remote_server.hang_until_terminated()
|
# for the nodes except master, should hang until the task complete
|
||||||
|
master_url = f"http://{config.cluster_ips[0]}:{server_port}/health"
|
||||||
|
remote_server.hang_until_terminated(master_url)
|
||||||
|
|||||||
@@ -68,6 +68,7 @@ class AisbenchRunner:
|
|||||||
model: str,
|
model: str,
|
||||||
port: int,
|
port: int,
|
||||||
aisbench_config: dict,
|
aisbench_config: dict,
|
||||||
|
host_ip: str = "localhost",
|
||||||
verify=True):
|
verify=True):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.dataset_path = maybe_download_from_modelscope(
|
self.dataset_path = maybe_download_from_modelscope(
|
||||||
@@ -76,6 +77,7 @@ class AisbenchRunner:
|
|||||||
assert self.dataset_path is not None and self.model_path is not None, \
|
assert self.dataset_path is not None and self.model_path is not None, \
|
||||||
f"Failed to download dataset or model: dataset={self.dataset_path}, model={self.model_path}"
|
f"Failed to download dataset or model: dataset={self.dataset_path}, model={self.model_path}"
|
||||||
self.port = port
|
self.port = port
|
||||||
|
self.host_ip = host_ip
|
||||||
self.task_type = aisbench_config["case_type"]
|
self.task_type = aisbench_config["case_type"]
|
||||||
self.request_conf = aisbench_config["request_conf"]
|
self.request_conf = aisbench_config["request_conf"]
|
||||||
self.dataset_conf = aisbench_config.get("dataset_conf")
|
self.dataset_conf = aisbench_config.get("dataset_conf")
|
||||||
@@ -131,6 +133,7 @@ class AisbenchRunner:
|
|||||||
content = f.read()
|
content = f.read()
|
||||||
content = re.sub(r'model=.*', f'model="{self.model}",', content)
|
content = re.sub(r'model=.*', f'model="{self.model}",', content)
|
||||||
content = re.sub(r'host_port.*', f'host_port = {self.port},', content)
|
content = re.sub(r'host_port.*', f'host_port = {self.port},', content)
|
||||||
|
content = re.sub(r'host_ip.*', f'host_ip = "{self.host_ip}",', content)
|
||||||
content = re.sub(r'max_out_len.*',
|
content = re.sub(r'max_out_len.*',
|
||||||
f'max_out_len = {self.max_out_len},', content)
|
f'max_out_len = {self.max_out_len},', content)
|
||||||
content = re.sub(r'batch_size.*', f'batch_size = {self.batch_size},',
|
content = re.sub(r'batch_size.*', f'batch_size = {self.batch_size},',
|
||||||
@@ -238,14 +241,21 @@ class AisbenchRunner:
|
|||||||
assert self.baseline - self.threshold <= acc_value <= self.baseline + self.threshold, f"Accuracy verification failed. The accuracy of {self.dataset_path} is {acc_value}, which is not within {self.threshold} relative to baseline {self.baseline}."
|
assert self.baseline - self.threshold <= acc_value <= self.baseline + self.threshold, f"Accuracy verification failed. The accuracy of {self.dataset_path} is {acc_value}, which is not within {self.threshold} relative to baseline {self.baseline}."
|
||||||
|
|
||||||
|
|
||||||
def run_aisbench_cases(model, port, aisbench_cases, server_args=""):
|
def run_aisbench_cases(model,
|
||||||
|
port,
|
||||||
|
aisbench_cases,
|
||||||
|
server_args="",
|
||||||
|
host_ip="localhost"):
|
||||||
aisbench_results = []
|
aisbench_results = []
|
||||||
aisbench_errors = []
|
aisbench_errors = []
|
||||||
for aisbench_case in aisbench_cases:
|
for aisbench_case in aisbench_cases:
|
||||||
if not aisbench_case:
|
if not aisbench_case:
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
with AisbenchRunner(model, port, aisbench_case) as aisbench:
|
with AisbenchRunner(model=model,
|
||||||
|
port=port,
|
||||||
|
host_ip=host_ip,
|
||||||
|
aisbench_config=aisbench_case) as aisbench:
|
||||||
aisbench_results.append(aisbench.result)
|
aisbench_results.append(aisbench.result)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
aisbench_results.append("")
|
aisbench_results.append("")
|
||||||
|
|||||||
Reference in New Issue
Block a user