[CI] Optimize nightly CI (#3898)

### What this PR does / why we need it?
This patch mainly fix the the problem of not being able to determine the
exit status of the pod's entrypoint script and some other tiny
optimizations:
1. Shorten wait for server timeout
2. fix typo
3. fix the issue of ais_bench failing to correctly access the proxy URL
in a PD separation scenario.
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?


- vLLM version: v0.11.0
- vLLM main:
83f478bb19

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
Li Wang
2025-10-30 23:42:20 +08:00
committed by GitHub
parent 2c291bc63f
commit eb0a2ee2d0
14 changed files with 94 additions and 66 deletions

View File

@@ -68,6 +68,7 @@ class AisbenchRunner:
model: str,
port: int,
aisbench_config: dict,
host_ip: str = "localhost",
verify=True):
self.model = model
self.dataset_path = maybe_download_from_modelscope(
@@ -76,6 +77,7 @@ class AisbenchRunner:
assert self.dataset_path is not None and self.model_path is not None, \
f"Failed to download dataset or model: dataset={self.dataset_path}, model={self.model_path}"
self.port = port
self.host_ip = host_ip
self.task_type = aisbench_config["case_type"]
self.request_conf = aisbench_config["request_conf"]
self.dataset_conf = aisbench_config.get("dataset_conf")
@@ -131,6 +133,7 @@ class AisbenchRunner:
content = f.read()
content = re.sub(r'model=.*', f'model="{self.model}",', content)
content = re.sub(r'host_port.*', f'host_port = {self.port},', content)
content = re.sub(r'host_ip.*', f'host_ip = "{self.host_ip}",', content)
content = re.sub(r'max_out_len.*',
f'max_out_len = {self.max_out_len},', content)
content = re.sub(r'batch_size.*', f'batch_size = {self.batch_size},',
@@ -238,14 +241,21 @@ class AisbenchRunner:
assert self.baseline - self.threshold <= acc_value <= self.baseline + self.threshold, f"Accuracy verification failed. The accuracy of {self.dataset_path} is {acc_value}, which is not within {self.threshold} relative to baseline {self.baseline}."
def run_aisbench_cases(model, port, aisbench_cases, server_args=""):
def run_aisbench_cases(model,
port,
aisbench_cases,
server_args="",
host_ip="localhost"):
aisbench_results = []
aisbench_errors = []
for aisbench_case in aisbench_cases:
if not aisbench_case:
continue
try:
with AisbenchRunner(model, port, aisbench_case) as aisbench:
with AisbenchRunner(model=model,
port=port,
host_ip=host_ip,
aisbench_config=aisbench_case) as aisbench:
aisbench_results.append(aisbench.result)
except Exception as e:
aisbench_results.append("")