[CI] Fix nightly CI (#3821)

### What this PR does / why we need it?
This patch fix the nightly CI runs
[failure](https://github.com/vllm-project/vllm-ascend/actions/runs/18848144365)

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?


- vLLM version: v0.11.0rc3
- vLLM main:
https://github.com/vllm-project/vllm/commit/releases/v0.11.1

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
Li Wang
2025-10-28 20:40:03 +08:00
committed by GitHub
parent a7450db1bd
commit 90ae114569
6 changed files with 79 additions and 24 deletions

View File

@@ -14,11 +14,16 @@
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
import hashlib
import json
import os
import re
import subprocess
import tempfile
from pathlib import Path
import filelock
import huggingface_hub
import pandas as pd
from modelscope import snapshot_download # type: ignore
@@ -63,10 +68,12 @@ class AisbenchRunner:
port: int,
aisbench_config: dict,
verify=True):
self.dataset_path = snapshot_download(aisbench_config["dataset_path"],
repo_type='dataset')
self.model = model
self.model_path = snapshot_download(model)
self.dataset_path = maybe_download_from_modelscope(
aisbench_config["dataset_path"], repo_type="dataset")
self.model_path = maybe_download_from_modelscope(model)
assert self.dataset_path is not None and self.model_path is not None, \
f"Failed to download dataset or model: dataset={self.dataset_path}, model={self.model_path}"
self.port = port
self.task_type = aisbench_config["case_type"]
self.request_conf = aisbench_config["request_conf"]
@@ -254,3 +261,52 @@ def run_aisbench_cases(model, port, aisbench_cases):
def get_TTFT(result):
TTFT = result[0][0].loc["TTFT", "Average"][:-3]
return float(TTFT)
temp_dir = tempfile.gettempdir()
def get_lock(model_name_or_path: str | Path, cache_dir: str | None = None):
lock_dir = cache_dir or temp_dir
model_name_or_path = str(model_name_or_path)
os.makedirs(os.path.dirname(lock_dir), exist_ok=True)
model_name = model_name_or_path.replace("/", "-")
hash_name = hashlib.sha256(model_name.encode()).hexdigest()
# add hash to avoid conflict with old users' lock files
lock_file_name = hash_name + model_name + ".lock"
# mode 0o666 is required for the filelock to be shared across users
lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name),
mode=0o666)
return lock
def maybe_download_from_modelscope(
model: str,
repo_type: str | None = None,
revision: str | None = None,
download_dir: str | None = None,
ignore_patterns: str | list[str] | None = None,
allow_patterns: list[str] | str | None = None,
) -> str | None:
"""
Download model/dataset from ModelScope hub.
Returns the path to the downloaded model, or None if the model is not
downloaded from ModelScope.
"""
# Use file lock to prevent multiple processes from
# downloading the same model weights at the same time.
with get_lock(model, download_dir):
if not os.path.exists(model):
model_path = snapshot_download(
model_id=model,
repo_type=repo_type,
cache_dir=download_dir,
local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE,
revision=revision,
ignore_file_pattern=ignore_patterns,
allow_patterns=allow_patterns,
)
else:
model_path = model
return model_path
return None