[CI] Fix nightly CI (#3821)

### What this PR does / why we need it?
This patch fix the nightly CI runs
[failure](https://github.com/vllm-project/vllm-ascend/actions/runs/18848144365)

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?


- vLLM version: v0.11.0rc3
- vLLM main:
https://github.com/vllm-project/vllm/commit/releases/v0.11.1

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
Li Wang
2025-10-28 20:40:03 +08:00
committed by GitHub
parent a7450db1bd
commit 90ae114569
6 changed files with 79 additions and 24 deletions

View File

@@ -84,16 +84,17 @@ class MultiNodeConfig:
self.envs["LOCAL_IP"] = self.cur_ip
self.envs["NIC_NAME"] = self.nic_name
master_ip = self.cluster_ips[0]
if self.disaggregated_prefill:
self.envs[
"DISAGGREGATED_PREFILL_RANK_TABLE_PATH"] = self.disaggregated_prefill.get(
"ranktable_path")
if self.cur_index < self.decode_start_index:
self.envs["MASTER_IP"] = self.cluster_ips[0]
master_ip = self.cluster_ips[0]
else:
self.envs["MASTER_IP"] = self.cluster_ips[
self.decode_start_index]
master_ip = self.cluster_ips[self.decode_start_index]
self.envs["MASTER_IP"] = master_ip
ascend_path = "/usr/local/Ascend/ascend-toolkit/latest/python/site-packages"
self.envs[
"LD_LIBRARY_PATH"] = f"{ascend_path}:{self.envs.get('LD_LIBRARY_PATH', os.environ.get('LD_LIBRARY_PATH', ''))}"
@@ -288,8 +289,3 @@ class MultiNodeConfig:
subprocess.run(cmd, env=env, check=True)
assert os.path.exists(
str(ranktable_path)), "failed generate ranktable.json"
if __name__ == '__main__':
config = MultiNodeConfig.from_yaml()
print(config.perf_cmd)

View File

@@ -121,7 +121,7 @@ download_go() {
}
install_ais_bench() {
local AIS_BENCH="$SRC_DIR/benchmark"
local AIS_BENCH="$SRC_DIR/vllm-ascend/benchmark"
git clone https://gitee.com/aisbench/benchmark.git $AIS_BENCH
cd $AIS_BENCH
git checkout v3.0-20250930-master
@@ -166,8 +166,8 @@ run_tests() {
kill_npu_processes
ret=$?
if [ "$LWS_WORKER_INDEX" -eq 0 ]; then
mkdir -p "$(dirname "$RESULT_PATH")"
echo $ret > "$RESULT_PATH"
mkdir -p "$(dirname "$RESULT_FILE_PATH")"
echo $ret > "$RESULT_FILE_PATH"
fi
return $ret
}

View File

@@ -48,7 +48,7 @@ def get_local_model_path_with_retry(
async def get_completions(url: str, model: str, prompts: Union[str, List[str]],
**api_kwargs: Any) -> List[str]:
"""
Asynchronously send HTTP requests to a /v1/completions endpoint.
Asynchronously send HTTP requests to endpoint.
Args:
url: Full endpoint URL, e.g. "http://localhost:1025/v1/completions"
@@ -88,7 +88,10 @@ async def get_completions(url: str, model: str, prompts: Union[str, List[str]],
@pytest.mark.asyncio
async def test_multi_node() -> None:
config = MultiNodeConfig.from_yaml()
# To avoid modelscope 400 HttpError, we should download the model with retry
local_model_path = get_local_model_path_with_retry(config.model)
config.server_cmd = config.server_cmd.replace(config.model,
local_model_path)
assert local_model_path is not None, "can not find any local weight for test"
env_dict = config.envs
perf_cmd = config.perf_cmd
@@ -113,11 +116,6 @@ async def test_multi_node() -> None:
) as remote_server:
if config.is_master:
port = proxy_port if disaggregated_prefill else server_port
base_url = f"http://localhost:{port}/v1/completions"
_ = await get_completions(url=base_url,
model=local_model_path,
prompts=prompts,
api_kwargs=api_keyword_args)
# aisbench test
if acc_cmd:
run_aisbench_cases(local_model_path, port, acc_cmd)