[v0.18.0][CI]Add rank0 process count check for DeepSeek-R1-W8A8-HBM test (#8072)
### What this PR does / why we need it? Adds a `check_rank0_process_count` validation step to the DeepSeek-R1-W8A8-HBM nightly single-node test. The check verifies that after the server starts, there is **exactly 1** `vllm serve` process running on rank0. This guards against the regression fixed in #8041 (extra NPU context leaking on device 0), ensuring it does not silently reappear in future releases. #### Changes - **`tests/e2e/nightly/single_node/models/scripts/test_single_node.py`**: Add `run_check_rank0_process_count` async handler. It calls `npu-smi info` for diagnostics, then uses `psutil` to assert exactly one `vllm serve` process exists on rank0. - **`tests/e2e/nightly/single_node/models/configs/DeepSeek-R1-W8A8-HBM.yaml`**: Register `check_rank0_process_count` in the `test_content` list for the HBM test case. Signed-off-by: hfadzxy <starmoon_zhang@163.com>
This commit is contained in:
@@ -39,4 +39,7 @@ test_cases:
|
|||||||
- "--enforce-eager"
|
- "--enforce-eager"
|
||||||
- "--additional-config"
|
- "--additional-config"
|
||||||
- '{"ascend_scheduler_config": {"enabled": false}, "torchair_graph_config": {"enabled": false, "enable_multistream_shared_expert": false}}'
|
- '{"ascend_scheduler_config": {"enabled": false}, "torchair_graph_config": {"enabled": false, "enable_multistream_shared_expert": false}}'
|
||||||
|
test_content:
|
||||||
|
- completion
|
||||||
|
- check_rank0_process_count
|
||||||
benchmarks:
|
benchmarks:
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import openai
|
import openai
|
||||||
|
import psutil
|
||||||
import pytest
|
import pytest
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
@@ -107,11 +109,36 @@ def run_benchmark_comparisons(config: SingleNodeConfig, results: Any) -> None:
|
|||||||
print(f"✅ Comparison passed: {eval_str} [threshold: {expected_threshold}]")
|
print(f"✅ Comparison passed: {eval_str} [threshold: {expected_threshold}]")
|
||||||
|
|
||||||
|
|
||||||
|
async def run_check_rank0_process_count(config: SingleNodeConfig, server: "RemoteOpenAIServer | DisaggEpdProxy") -> None:
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
"npu-smi", "info",
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
stdout_bytes, stderr_bytes = await proc.communicate()
|
||||||
|
if proc.returncode == 0:
|
||||||
|
logger.info("npu-smi info:\n%s", stdout_bytes.decode(errors='ignore'))
|
||||||
|
else:
|
||||||
|
logger.warning("npu-smi info failed: %s", stderr_bytes.decode(errors='ignore'))
|
||||||
|
|
||||||
|
vllm_serve_procs = [
|
||||||
|
p for p in psutil.process_iter(attrs=["pid", "cmdline"], ad_value=None)
|
||||||
|
if p.info["cmdline"]
|
||||||
|
and any("vllm" in arg for arg in p.info["cmdline"])
|
||||||
|
and any("serve" in arg for arg in p.info["cmdline"])
|
||||||
|
]
|
||||||
|
count = len(vllm_serve_procs)
|
||||||
|
assert count == 1, (
|
||||||
|
f"rank0 process count check failed: expected exactly 1 vllm serve process on rank0, found {count}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Extend this dictionary to add new test capabilities
|
# Extend this dictionary to add new test capabilities
|
||||||
TEST_HANDLERS = {
|
TEST_HANDLERS = {
|
||||||
"completion": run_completion_test,
|
"completion": run_completion_test,
|
||||||
"image": run_image_test,
|
"image": run_image_test,
|
||||||
"chat_completion": run_chat_completion_test,
|
"chat_completion": run_chat_completion_test,
|
||||||
|
"check_rank0_process_count": run_check_rank0_process_count,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user