[CPU] enable CI for PRs, add Dockerfile and auto build task (#6458)

Co-authored-by: diwei sun <diwei.sun@intel.com>
Co-authored-by: Yineng Zhang <me@zhyncs.com>
This commit is contained in:
Zaili Wang
2025-06-06 04:43:54 +08:00
committed by GitHub
parent 8b2474898b
commit 562f279a2d
6 changed files with 239 additions and 2 deletions

View File

@@ -89,7 +89,7 @@ srt_hpu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11"]
# CPU: currently, there are no pre-built vllm wheels for CPU.
# To install vllm for CPU, please follow the instruction here:
# https://docs.vllm.ai/en/latest/getting_started/installation/cpu/index.html
srt_cpu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11", "torch"]
srt_cpu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11", "einops"]
# https://vllm-ascend.readthedocs.io/en/latest/installation.html
srt_npu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11"]

View File

@@ -26,6 +26,7 @@ from sglang.lang.backend.openai import OpenAI
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.srt.utils import (
get_bool_env_var,
get_device,
is_port_available,
kill_process_tree,
retry,
@@ -305,13 +306,33 @@ def add_common_other_args_and_parse(parser: argparse.ArgumentParser):
return args
def auto_config_device() -> str:
"""Auto-config available device platform"""
try:
device = get_device()
except (RuntimeError, ImportError) as e:
print(f"Warning: {e} - Falling back to CPU")
device = "cpu"
return device
def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser):
parser.add_argument("--parallel", type=int, default=64)
parser.add_argument("--host", type=str, default="http://127.0.0.1")
parser.add_argument("--port", type=int, default=30000)
parser.add_argument("--backend", type=str, default="srt")
parser.add_argument(
"--device",
type=str,
default="auto",
choices=["auto", "cuda", "rocm", "cpu"],
help="Device type (auto/cuda/rocm/cpu). Auto will detect available platforms",
)
parser.add_argument("--result-file", type=str, default="result.jsonl")
args = parser.parse_args()
return args
@@ -397,11 +418,25 @@ def popen_launch_server(
base_url: str,
timeout: float,
api_key: Optional[str] = None,
other_args: list[str] = (),
other_args: list[str] = [],
env: Optional[dict] = None,
return_stdout_stderr: Optional[tuple] = None,
device: str = "auto",
pd_separated: bool = False,
):
"""Launch a server process with automatic device detection.
Args:
device: Device type ("auto", "cuda", "rocm" or "cpu").
If "auto", will detect available platforms automatically.
"""
# Auto-detect device if needed
if device == "auto":
device = auto_config_device()
print(f"Auto-configed device: {device}", flush=True)
other_args = list(other_args)
other_args += ["--device", str(device)]
_, host, port = base_url.split(":")
host = host[2:]
@@ -457,6 +492,15 @@ def popen_launch_server(
start_time = time.perf_counter()
with requests.Session() as session:
while time.perf_counter() - start_time < timeout:
return_code = process.poll()
if return_code is not None:
# Server failed to start (non-zero exit code) or crashed
raise Exception(
f"Server process exited with code {return_code}. "
"Check server logs for errors."
)
try:
headers = {
"Content-Type": "application/json; charset=utf-8",
@@ -627,6 +671,7 @@ def get_benchmark_args(
disable_stream=False,
disable_ignore_eos=False,
seed: int = 0,
device="auto",
pd_separated: bool = False,
):
return SimpleNamespace(
@@ -657,6 +702,7 @@ def get_benchmark_args(
profile=None,
lora_name=None,
prompt_suffix="",
device=device,
pd_separated=pd_separated,
)
@@ -676,7 +722,10 @@ def run_bench_serving(
disable_ignore_eos=False,
need_warmup=False,
seed: int = 0,
device="auto",
):
if device == "auto":
device = auto_config_device()
# Launch the server
base_url = DEFAULT_URL_FOR_TEST
process = popen_launch_server(
@@ -700,6 +749,7 @@ def run_bench_serving(
disable_stream=disable_stream,
disable_ignore_eos=disable_ignore_eos,
seed=seed,
device=device,
)
try:
@@ -750,6 +800,18 @@ def run_bench_serving_multi(
def run_bench_one_batch(model, other_args):
"""Launch a offline process with automatic device detection.
Args:
device: Device type ("auto", "cuda", "rocm" or "cpu").
If "auto", will detect available platforms automatically.
"""
# Auto-detect device if needed
device = auto_config_device()
print(f"Auto-configed device: {device}", flush=True)
other_args += ["--device", str(device)]
command = [
"python3",
"-m",