[CI] Optimize nightly CI (#3898)

### What this PR does / why we need it?
This patch mainly fix the the problem of not being able to determine the
exit status of the pod's entrypoint script and some other tiny
optimizations:
1. Shorten wait for server timeout
2. fix typo
3. fix the issue of ais_bench failing to correctly access the proxy URL
in a PD separation scenario.
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?


- vLLM version: v0.11.0
- vLLM main:
83f478bb19

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
Li Wang
2025-10-30 23:42:20 +08:00
committed by GitHub
parent 2c291bc63f
commit eb0a2ee2d0
14 changed files with 94 additions and 66 deletions

View File

@@ -60,13 +60,13 @@ defaults:
# only cancel in-progress runs of the same workflow # only cancel in-progress runs of the same workflow
# and ignore the lint / 8 cards test type # and ignore the lint / 8 cards test type
concurrency: concurrency:
group: ascend-nightly-${{ github.ref }}-${{ inputs.config_file_path }} group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.config_file_path }}
cancel-in-progress: true cancel-in-progress: true
jobs: jobs:
e2e: e2e:
name: ${{ inputs.config_file_path }} name: ${{ inputs.config_file_path }}
# This is a runner with no NPU for k8s controller # This is the runner with no NPU for k8s controller
runs-on: ${{ inputs.runner }} runs-on: ${{ inputs.runner }}
container: container:
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11 image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
@@ -75,7 +75,7 @@ jobs:
KUBECTL: /root/.cache/.kube/kubectl KUBECTL: /root/.cache/.kube/kubectl
NAMESPACE: vllm-project NAMESPACE: vllm-project
LEADER_POD: vllm-0 LEADER_POD: vllm-0
RESULT_FILE: /root/.cache/tests/ret/test_result.txt RESULT_FILE: /root/.cache/tests/ret_${{ inputs.soc_version }}
steps: steps:
- name: Install system denpendencies - name: Install system denpendencies
run: | run: |
@@ -84,7 +84,7 @@ jobs:
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
pip install jinja2-cli pip install jinja2-cli
apt-get update -y && apt-get install -y git curl #apt-get update -y && apt-get install -y git curl
- name: Install kubectl - name: Install kubectl
run: | run: |
@@ -117,8 +117,8 @@ jobs:
run: | run: |
# pre clear the crd resources created by lws # pre clear the crd resources created by lws
kubectl delete leaderworkerset vllm -n "$NAMESPACE" --ignore-not-found kubectl delete leaderworkerset vllm -n "$NAMESPACE" --ignore-not-found
- name: Launch cluster - name: Launch cluster
id: launcher
run: | run: |
set -e set -e
@@ -130,6 +130,8 @@ jobs:
vllm_ascend_ref="${{ inputs.vllm_ascend_ref }}" vllm_ascend_ref="${{ inputs.vllm_ascend_ref }}"
vllm_ascend_remote_url="${{ inputs.vllm_ascend_remote_url }}" vllm_ascend_remote_url="${{ inputs.vllm_ascend_remote_url }}"
result_file_path="$RESULT_FILE" result_file_path="$RESULT_FILE"
fail_tag=FAIL_TAG_"${{ inputs.config_file_path }}"
echo "FAIL_TAG=${fail_tag}" >> $GITHUB_ENV
required_params=("size" "replicas" "image" "config_file_path") required_params=("size" "replicas" "image" "config_file_path")
for param in "${required_params[@]}"; do for param in "${required_params[@]}"; do
@@ -155,8 +157,7 @@ jobs:
-D vllm_ascend_ref="$vllm_ascend_ref" \ -D vllm_ascend_ref="$vllm_ascend_ref" \
-D result_file_path="$result_file_path" \ -D result_file_path="$result_file_path" \
-D npu_per_node="$npu_per_node" \ -D npu_per_node="$npu_per_node" \
-D controller_name="$HOSTNAME" \ -D fail_tag="$fail_tag" \
-D kb_secret=${{ secrets.KUBECONFIG_B64 }} \
--outfile lws.yaml --outfile lws.yaml
kubectl apply -f ./lws.yaml kubectl apply -f ./lws.yaml
@@ -180,7 +181,14 @@ jobs:
- name: Stream logs - name: Stream logs
run: | run: |
kubectl logs -f "$LEADER_POD" -n "$NAMESPACE" set -euo pipefail
echo "Looking for logs containing: $FAIL_TAG"
kubectl logs -f "$LEADER_POD" -n "$NAMESPACE" | while read -r line; do
echo "$line"
if echo "$line" | grep -q "$FAIL_TAG"; then
exit 1 # workflow step failed
fi
done
- name: Post process - name: Post process
if: always() if: always()

View File

@@ -44,7 +44,7 @@ defaults:
# only cancel in-progress runs of the same workflow # only cancel in-progress runs of the same workflow
# and ignore the lint / 1 card / 4 cards test type # and ignore the lint / 1 card / 4 cards test type
concurrency: concurrency:
group: ascend-nightly-${{ github.ref }}-${{ inputs.tests }} group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.tests }}
cancel-in-progress: true cancel-in-progress: true
jobs: jobs:

View File

@@ -42,6 +42,7 @@ concurrency:
jobs: jobs:
single-node-tests: single-node-tests:
name: single-node
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
strategy: strategy:
fail-fast: false fail-fast: false
@@ -63,6 +64,7 @@ jobs:
tests: ${{ matrix.test_config.tests }} tests: ${{ matrix.test_config.tests }}
multi-node-tests: multi-node-tests:
name: multi-node
needs: single-node-tests needs: single-node-tests
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
strategy: strategy:
@@ -71,10 +73,10 @@ jobs:
matrix: matrix:
test_config: test_config:
- name: multi-node-deepseek-dp - name: multi-node-deepseek-dp
config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml config_file_path: DeepSeek-R1-W8A8-A2.yaml
size: 2 size: 2
- name: multi-node-deepseek-dp-torchair - name: multi-node-deepseek-dp-torchair
config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2-torchair.yaml config_file_path: DeepSeek-R1-W8A8-A2-torchair.yaml
size: 2 size: 2
uses: ./.github/workflows/_e2e_nightly_multi_node.yaml uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
with: with:

View File

@@ -42,6 +42,7 @@ concurrency:
jobs: jobs:
single-node-tests: single-node-tests:
name: single-node
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
strategy: strategy:
fail-fast: false fail-fast: false
@@ -85,6 +86,7 @@ jobs:
tests: ${{ matrix.test_config.tests }} tests: ${{ matrix.test_config.tests }}
multi-node-tests: multi-node-tests:
name: multi-node
needs: single-node-tests needs: single-node-tests
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
strategy: strategy:
@@ -93,19 +95,19 @@ jobs:
matrix: matrix:
test_config: test_config:
- name: multi-node-deepseek-pd - name: multi-node-deepseek-pd
config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml config_file_path: DeepSeek-V3.yaml
size: 2 size: 2
- name: multi-node-qwen3-dp - name: multi-node-qwen3-dp
config_file_path: tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml config_file_path: Qwen3-235B-A3B.yaml
size: 2 size: 2
- name: multi-node-dpsk-4node-pd - name: multi-node-dpsk-4node-pd
config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml config_file_path: DeepSeek-R1-W8A8.yaml
size: 4 size: 4
- name: multi-node-qwenw8a8-2node - name: multi-node-qwenw8a8-2node
config_file_path: tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8.yaml config_file_path: Qwen3-235B-W8A8.yaml
size: 2 size: 2
- name: multi-node-glm-2node - name: multi-node-glm-2node
config_file_path: tests/e2e/nightly/multi_node/config/models/GLM-4_5.yaml config_file_path: GLM-4_5.yaml
size: 2 size: 2
uses: ./.github/workflows/_e2e_nightly_multi_node.yaml uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
with: with:
@@ -117,12 +119,3 @@ jobs:
config_file_path: ${{ matrix.test_config.config_file_path }} config_file_path: ${{ matrix.test_config.config_file_path }}
secrets: secrets:
KUBECONFIG_B64: ${{ secrets.KUBECONFIG_B64 }} KUBECONFIG_B64: ${{ secrets.KUBECONFIG_B64 }}
clear_resources:
needs: multi-node-tests
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
uses: ./.github/workflows/_kill_lws_resources.yaml
with:
runner: linux-aarch64-a3-0
secrets:
KUBECONFIG_B64: ${{ secrets.KUBECONFIG_B64 }}

View File

@@ -163,10 +163,11 @@ class RemoteOpenAIServer:
self.proxy_port = proxy_port self.proxy_port = proxy_port
self._start_server(model, vllm_serve_args, env_dict) self._start_server(model, vllm_serve_args, env_dict)
max_wait_seconds = max_wait_seconds or 7200 max_wait_seconds = max_wait_seconds or 1800
if self.disaggregated_prefill: if self.disaggregated_prefill:
assert proxy_port is not None, "for disaggregated_prefill, proxy port must be provided" assert proxy_port is not None, "for disaggregated_prefill, proxy port must be provided"
self._wait_for_server_pd(proxy_port=proxy_port) self._wait_for_server_pd(proxy_port=proxy_port,
timeout=max_wait_seconds)
else: else:
self._wait_for_server(url=self.url_for("health"), self._wait_for_server(url=self.url_for("health"),
timeout=max_wait_seconds) timeout=max_wait_seconds)
@@ -186,7 +187,7 @@ class RemoteOpenAIServer:
"""Subclasses override this method to customize process polling""" """Subclasses override this method to customize process polling"""
return self.proc.poll() return self.proc.poll()
def hang_until_terminated(self) -> None: def hang_until_terminated(self, url) -> None:
""" """
Wait until the server process terminates. Wait until the server process terminates.
This is for headless mode, where the api server This is for headless mode, where the api server
@@ -196,7 +197,7 @@ class RemoteOpenAIServer:
try: try:
while True: while True:
try: try:
resp = client.get(self.url_for("health"), timeout=5) resp = client.get(url, timeout=5)
if resp.status_code != 200: if resp.status_code != 200:
break break
time.sleep(5) time.sleep(5)
@@ -206,7 +207,7 @@ class RemoteOpenAIServer:
if isinstance(client, httpx.Client): if isinstance(client, httpx.Client):
client.close() client.close()
def _wait_for_server_pd(self, proxy_port: int): def _wait_for_server_pd(self, proxy_port: int, timeout: float):
# Wait for all api_server nodes ready # Wait for all api_server nodes ready
assert self.nodes_info is not None, "cluster info must be provided" assert self.nodes_info is not None, "cluster info must be provided"
for node_info in self.nodes_info: for node_info in self.nodes_info:
@@ -214,12 +215,12 @@ class RemoteOpenAIServer:
continue continue
url_health = f"http://{node_info.ip}:{node_info.server_port}/health" url_health = f"http://{node_info.ip}:{node_info.server_port}/health"
self._wait_for_server(url=url_health, timeout=7200) self._wait_for_server(url=url_health, timeout=timeout)
# Wait for proxy ready # Wait for proxy ready
master_node = self.nodes_info[0] master_node = self.nodes_info[0]
url_proxy = f"http://{master_node.ip}:{proxy_port}/healthcheck" url_proxy = f"http://{master_node.ip}:{proxy_port}/healthcheck"
self._wait_for_server(url=url_proxy, timeout=7200) self._wait_for_server(url=url_proxy, timeout=timeout)
def _wait_for_server(self, *, url: str, timeout: float): def _wait_for_server(self, *, url: str, timeout: float):
# run health check # run health check

View File

@@ -97,3 +97,12 @@ deployment:
} }
}' }'
benchmarks: benchmarks:
acc:
case_type: accuracy
dataset_path: vllm-ascend/gsm8k-lite
request_conf: vllm_api_general_chat
dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_chat_prompt
max_out_len: 4096
batch_size: 512
baseline: 95
threshold: 5

View File

@@ -47,3 +47,4 @@ deployment:
--no-enable-prefix-caching --no-enable-prefix-caching
--gpu-memory-utilization 0.9 --gpu-memory-utilization 0.9
benchmarks: benchmarks:

View File

@@ -17,6 +17,7 @@ setup_logger()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
DISAGGREGATED_PREFILL_PROXY_SCRIPT = "examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py" DISAGGREGATED_PREFILL_PROXY_SCRIPT = "examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py"
DISAGGEGATED_PREFILL_PORT = 5333 DISAGGEGATED_PREFILL_PORT = 5333
CONFIG_BASE_PATH = "tests/e2e/nightly/multi_node/config/models/"
@dataclass @dataclass
@@ -187,9 +188,8 @@ class MultiNodeConfig:
@classmethod @classmethod
def from_yaml(cls, yaml_path: Optional[str] = None): def from_yaml(cls, yaml_path: Optional[str] = None):
if not yaml_path: if not yaml_path:
yaml_path = os.getenv( yaml_path = os.getenv("CONFIG_YAML_PATH", "DeepSeek-V3.yaml")
"CONFIG_YAML_PATH", yaml_path = os.path.join(CONFIG_BASE_PATH, yaml_path)
"tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml")
with open(yaml_path, 'r') as file: with open(yaml_path, 'r') as file:
config_data = yaml.safe_load(file) config_data = yaml.safe_load(file)
test_name = config_data.get("test_name", "default_test") test_name = config_data.get("test_name", "default_test")
@@ -255,6 +255,7 @@ class MultiNodeConfig:
ranktable_path = self.disaggregated_prefill.get("ranktable_path") ranktable_path = self.disaggregated_prefill.get("ranktable_path")
assert ranktable_gen_path is not None and ranktable_path is not None assert ranktable_gen_path is not None and ranktable_path is not None
if os.path.exists(str(ranktable_path)): if os.path.exists(str(ranktable_path)):
logger.info("ranktable has already generated")
return return
local_host = self.cur_ip local_host = self.cur_ip
@@ -286,6 +287,8 @@ class MultiNodeConfig:
assert self.nic_name is not None assert self.nic_name is not None
env["GLOO_SOCKET_IFNAME"] = self.nic_name env["GLOO_SOCKET_IFNAME"] = self.nic_name
logger.info(
f"Generating ranktable from command: {' '.join(map(str, cmd))}")
subprocess.run(cmd, env=env, check=True) subprocess.run(cmd, env=env, check=True)
assert os.path.exists( assert os.path.exists(
str(ranktable_path)), "failed generate ranktable.json" str(ranktable_path)), "failed generate ranktable.json"

View File

@@ -18,7 +18,7 @@ spec:
image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11") }} image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11") }}
env: env:
- name: CONFIG_YAML_PATH - name: CONFIG_YAML_PATH
value: {{ config_file_path | default("tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml") }} value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
- name: WORKSPACE - name: WORKSPACE
value: "/root/workspace" value: "/root/workspace"
# Set vLLM version and vLLM-Ascend version here, once there is a new release, update here. # Set vLLM version and vLLM-Ascend version here, once there is a new release, update here.
@@ -29,11 +29,9 @@ spec:
- name: VLLM_ASCEND_REMOTE_URL - name: VLLM_ASCEND_REMOTE_URL
value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }} value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }}
- name: RESULT_FILE_PATH - name: RESULT_FILE_PATH
value: {{ result_file_path | default("/root/.cache/tests/ret/test_result.txt") }} value: {{ result_file_path | default("/root/.cache/tests/ret") }}
- name: CONTROLLER_NAME - name: FAIL_TAG
value: {{ controller_name | default("placeholder") }} value: {{ fail_tag | default("FAIL_TAG") }}
- name: SECRET
value: {{ kb_secret | default("placeholder") }}
command: command:
- sh - sh
- -c - -c
@@ -80,7 +78,7 @@ spec:
image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11") }} image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11") }}
env: env:
- name: CONFIG_YAML_PATH - name: CONFIG_YAML_PATH
value: {{ config_file_path | default("tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml") }} value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
- name: WORKSPACE - name: WORKSPACE
value: "/root/workspace" value: "/root/workspace"
# Set vLLM version and vLLM-Ascend version here, once there is a new release, update here. # Set vLLM version and vLLM-Ascend version here, once there is a new release, update here.
@@ -92,6 +90,8 @@ spec:
value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }} value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }}
- name: RESULT_FILE_PATH - name: RESULT_FILE_PATH
value: {{ result_file_path | default("/root/.cache/tests/ret/test_result.txt") }} value: {{ result_file_path | default("/root/.cache/tests/ret/test_result.txt") }}
- name: FAIL_TAG
value: {{ fail_tag | default("FAIL_TAG") }}
command: command:
- sh - sh
- -c - -c

View File

@@ -20,6 +20,11 @@ print_section() {
echo -e "\n${BLUE}=== $1 ===${NC}" echo -e "\n${BLUE}=== $1 ===${NC}"
} }
print_failure() {
echo -e "${RED}${FAIL_TAG} ✗ ERROR: $1${NC}"
exit 1
}
# Function to print success messages # Function to print success messages
print_success() { print_success() {
echo -e "${GREEN}$1${NC}" echo -e "${GREEN}$1${NC}"
@@ -161,32 +166,24 @@ kill_npu_processes() {
sleep 4 sleep 4
} }
run_tests() { run_tests_with_log() {
set +e set +e
kill_npu_processes kill_npu_processes
pytest -sv tests/e2e/nightly/multi_node/test_multi_node.py BASENAME=$(basename "$CONFIG_YAML_PATH" .yaml)
ret=$? # each worker should have log file
LOG_FILE="${RESULT_FILE_PATH}/${BASENAME}_worker_${LWS_WORKER_INDEX}.log"
mkdir -p ${RESULT_FILE_PATH}
pytest -sv tests/e2e/nightly/multi_node/test_multi_node.py 2>&1 | tee $LOG_FILE
ret=${PIPESTATUS[0]}
set -e
if [ "$LWS_WORKER_INDEX" -eq 0 ]; then if [ "$LWS_WORKER_INDEX" -eq 0 ]; then
if [ $ret -eq 0 ]; then if [ $ret -eq 0 ]; then
print_success "All tests passed!" print_success "All tests passed!"
else else
print_error "Some tests failed!" print_failure "Some tests failed!"
kubectl delete pod $CONTROLLER_NAME -n vllm-project mv LOG_FILE error_${LOG_FILE}
fi fi
fi fi
set -e
}
install_kubectl() {
arch=$(uname -m)
KUBECTL=/root/.cache/.kube/kubectl
if echo "$arch" | grep -qiE "arm|aarch64"; then
echo "Detected ARM architecture: $arch"
KUBECTL="$KUBECTL"_arm
fi
install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
echo "$SECRET" | base64 -d > /tmp/kubeconfig
export KUBECONFIG=/tmp/kubeconfig
} }
main() { main() {
@@ -194,7 +191,6 @@ main() {
check_and_config check_and_config
checkout_src checkout_src
install_sys_dependencies install_sys_dependencies
install_kubectl
install_vllm install_vllm
install_ais_bench install_ais_bench
# to speed up mooncake build process, install Go here # to speed up mooncake build process, install Go here
@@ -203,7 +199,7 @@ main() {
. $SRC_DIR/vllm-ascend/tests/e2e/nightly/multi_node/scripts/build_mooncake.sh \ . $SRC_DIR/vllm-ascend/tests/e2e/nightly/multi_node/scripts/build_mooncake.sh \
pooling_async_memecpy_v1 9d96b2e1dd76cc601d76b1b4c5f6e04605cd81d3 pooling_async_memecpy_v1 9d96b2e1dd76cc601d76b1b4c5f6e04605cd81d3
cd "$WORKSPACE/source_code/vllm-ascend" cd "$WORKSPACE/source_code/vllm-ascend"
run_tests run_tests_with_log
} }
main "$@" main "$@"

View File

@@ -118,6 +118,11 @@ async def test_multi_node() -> None:
port = proxy_port if disaggregated_prefill else server_port port = proxy_port if disaggregated_prefill else server_port
# aisbench test # aisbench test
aisbench_cases = [acc_cmd, perf_cmd] aisbench_cases = [acc_cmd, perf_cmd]
run_aisbench_cases(local_model_path, port, aisbench_cases) run_aisbench_cases(local_model_path,
port,
aisbench_cases,
host_ip=config.cluster_ips[0])
else: else:
remote_server.hang_until_terminated() # for the nodes except master, should hang until the task complete
master_url = f"http://{config.cluster_ips[0]}:{server_port}/health"
remote_server.hang_until_terminated(master_url)

View File

@@ -68,6 +68,7 @@ class AisbenchRunner:
model: str, model: str,
port: int, port: int,
aisbench_config: dict, aisbench_config: dict,
host_ip: str = "localhost",
verify=True): verify=True):
self.model = model self.model = model
self.dataset_path = maybe_download_from_modelscope( self.dataset_path = maybe_download_from_modelscope(
@@ -76,6 +77,7 @@ class AisbenchRunner:
assert self.dataset_path is not None and self.model_path is not None, \ assert self.dataset_path is not None and self.model_path is not None, \
f"Failed to download dataset or model: dataset={self.dataset_path}, model={self.model_path}" f"Failed to download dataset or model: dataset={self.dataset_path}, model={self.model_path}"
self.port = port self.port = port
self.host_ip = host_ip
self.task_type = aisbench_config["case_type"] self.task_type = aisbench_config["case_type"]
self.request_conf = aisbench_config["request_conf"] self.request_conf = aisbench_config["request_conf"]
self.dataset_conf = aisbench_config.get("dataset_conf") self.dataset_conf = aisbench_config.get("dataset_conf")
@@ -131,6 +133,7 @@ class AisbenchRunner:
content = f.read() content = f.read()
content = re.sub(r'model=.*', f'model="{self.model}",', content) content = re.sub(r'model=.*', f'model="{self.model}",', content)
content = re.sub(r'host_port.*', f'host_port = {self.port},', content) content = re.sub(r'host_port.*', f'host_port = {self.port},', content)
content = re.sub(r'host_ip.*', f'host_ip = "{self.host_ip}",', content)
content = re.sub(r'max_out_len.*', content = re.sub(r'max_out_len.*',
f'max_out_len = {self.max_out_len},', content) f'max_out_len = {self.max_out_len},', content)
content = re.sub(r'batch_size.*', f'batch_size = {self.batch_size},', content = re.sub(r'batch_size.*', f'batch_size = {self.batch_size},',
@@ -238,14 +241,21 @@ class AisbenchRunner:
assert self.baseline - self.threshold <= acc_value <= self.baseline + self.threshold, f"Accuracy verification failed. The accuracy of {self.dataset_path} is {acc_value}, which is not within {self.threshold} relative to baseline {self.baseline}." assert self.baseline - self.threshold <= acc_value <= self.baseline + self.threshold, f"Accuracy verification failed. The accuracy of {self.dataset_path} is {acc_value}, which is not within {self.threshold} relative to baseline {self.baseline}."
def run_aisbench_cases(model, port, aisbench_cases, server_args=""): def run_aisbench_cases(model,
port,
aisbench_cases,
server_args="",
host_ip="localhost"):
aisbench_results = [] aisbench_results = []
aisbench_errors = [] aisbench_errors = []
for aisbench_case in aisbench_cases: for aisbench_case in aisbench_cases:
if not aisbench_case: if not aisbench_case:
continue continue
try: try:
with AisbenchRunner(model, port, aisbench_case) as aisbench: with AisbenchRunner(model=model,
port=port,
host_ip=host_ip,
aisbench_config=aisbench_case) as aisbench:
aisbench_results.append(aisbench.result) aisbench_results.append(aisbench.result)
except Exception as e: except Exception as e:
aisbench_results.append("") aisbench_results.append("")