[CPU] enable CI for PRs, add Dockerfile and auto build task (#6458)

Co-authored-by: diwei sun <diwei.sun@intel.com> Co-authored-by: Yineng Zhang <me@zhyncs.com>
2025-06-06 04:43:54 +08:00
parent 8b2474898b
commit 562f279a2d
6 changed files with 239 additions and 2 deletions
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -89,7 +89,7 @@ srt_hpu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11"]
 # CPU: currently, there are no pre-built vllm wheels for CPU.
 # To install vllm for CPU, please follow the instruction here:
 # https://docs.vllm.ai/en/latest/getting_started/installation/cpu/index.html
-srt_cpu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11", "torch"]
+srt_cpu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11", "einops"]
 # https://vllm-ascend.readthedocs.io/en/latest/installation.html
 srt_npu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11"]

--- a/python/sglang/test/test_utils.py
+++ b/python/sglang/test/test_utils.py
@@ -26,6 +26,7 @@ from sglang.lang.backend.openai import OpenAI
 from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
 from sglang.srt.utils import (
    get_bool_env_var,
+    get_device,
    is_port_available,
    kill_process_tree,
    retry,
@@ -305,13 +306,33 @@ def add_common_other_args_and_parse(parser: argparse.ArgumentParser):
    return args


+def auto_config_device() -> str:
+    """Auto-config available device platform"""
+
+    try:
+        device = get_device()
+    except (RuntimeError, ImportError) as e:
+        print(f"Warning: {e} - Falling back to CPU")
+        device = "cpu"
+
+    return device
+
+
 def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser):
    parser.add_argument("--parallel", type=int, default=64)
    parser.add_argument("--host", type=str, default="http://127.0.0.1")
    parser.add_argument("--port", type=int, default=30000)
    parser.add_argument("--backend", type=str, default="srt")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default="auto",
+        choices=["auto", "cuda", "rocm", "cpu"],
+        help="Device type (auto/cuda/rocm/cpu). Auto will detect available platforms",
+    )
    parser.add_argument("--result-file", type=str, default="result.jsonl")
    args = parser.parse_args()
+
    return args


@@ -397,11 +418,25 @@ def popen_launch_server(
    base_url: str,
    timeout: float,
    api_key: Optional[str] = None,
-    other_args: list[str] = (),
+    other_args: list[str] = [],
    env: Optional[dict] = None,
    return_stdout_stderr: Optional[tuple] = None,
+    device: str = "auto",
    pd_separated: bool = False,
 ):
+    """Launch a server process with automatic device detection.
+
+    Args:
+        device: Device type ("auto", "cuda", "rocm" or "cpu").
+                If "auto", will detect available platforms automatically.
+    """
+    # Auto-detect device if needed
+    if device == "auto":
+        device = auto_config_device()
+        print(f"Auto-configed device: {device}", flush=True)
+        other_args = list(other_args)
+        other_args += ["--device", str(device)]
+
    _, host, port = base_url.split(":")
    host = host[2:]

@@ -457,6 +492,15 @@ def popen_launch_server(
    start_time = time.perf_counter()
    with requests.Session() as session:
        while time.perf_counter() - start_time < timeout:
+
+            return_code = process.poll()
+            if return_code is not None:
+                # Server failed to start (non-zero exit code) or crashed
+                raise Exception(
+                    f"Server process exited with code {return_code}. "
+                    "Check server logs for errors."
+                )
+
            try:
                headers = {
                    "Content-Type": "application/json; charset=utf-8",
@@ -627,6 +671,7 @@ def get_benchmark_args(
    disable_stream=False,
    disable_ignore_eos=False,
    seed: int = 0,
+    device="auto",
    pd_separated: bool = False,
 ):
    return SimpleNamespace(
@@ -657,6 +702,7 @@ def get_benchmark_args(
        profile=None,
        lora_name=None,
        prompt_suffix="",
+        device=device,
        pd_separated=pd_separated,
    )

@@ -676,7 +722,10 @@ def run_bench_serving(
    disable_ignore_eos=False,
    need_warmup=False,
    seed: int = 0,
+    device="auto",
 ):
+    if device == "auto":
+        device = auto_config_device()
    # Launch the server
    base_url = DEFAULT_URL_FOR_TEST
    process = popen_launch_server(
@@ -700,6 +749,7 @@ def run_bench_serving(
        disable_stream=disable_stream,
        disable_ignore_eos=disable_ignore_eos,
        seed=seed,
+        device=device,
    )

    try:
@@ -750,6 +800,18 @@ def run_bench_serving_multi(


 def run_bench_one_batch(model, other_args):
+    """Launch a offline process with automatic device detection.
+
+    Args:
+        device: Device type ("auto", "cuda", "rocm" or "cpu").
+                If "auto", will detect available platforms automatically.
+    """
+    # Auto-detect device if needed
+
+    device = auto_config_device()
+    print(f"Auto-configed device: {device}", flush=True)
+    other_args += ["--device", str(device)]
+
    command = [
        "python3",
        "-m",