[CPU] enable CI for PRs, add Dockerfile and auto build task (#6458)

Co-authored-by: diwei sun <diwei.sun@intel.com> Co-authored-by: Yineng Zhang <me@zhyncs.com>
2025-06-06 04:43:54 +08:00
parent 8b2474898b
commit 562f279a2d
6 changed files with 239 additions and 2 deletions
--- a/.github/workflows/pr-test-xeon.yml
+++ b/.github/workflows/pr-test-xeon.yml
@@ -0,0 +1,86 @@
 name: PR Test (Xeon)
 on:
  pull_request:
    branches:
      - main
  workflow_dispatch:
 concurrency:
  group: pr-test-xeon-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  build-test:
    if: github.event_name == 'pull_request'
    runs-on: sgl-kernel-build-node
    environment: 'prod'
    strategy:
      matrix:
        build_type: ['all']
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Build and Push
        run: |
          version=$(cat python/sglang/version.py | cut -d'"' -f2)
          tag=v${version}-xeon
          docker build . -f docker/Dockerfile.xeon  -t sglang_xeon --no-cache
  unit-test:
    if: github.event_name == 'pull_request'
    needs: [build-test]
    runs-on: sgl-kernel-build-node
    steps:
      - name: Run container
        run: |
          docker run -dt \
            -v ${{ github.workspace }}:/sglang-checkout/ --ipc=host \
            --name ci_sglang_xeon \
            sglang_xeon
      - name: Install Dependency
        timeout-minutes: 20
        run: |
          docker exec ci_sglang_xeon bash -c "python3 -m pip install --upgrade pip"
          docker exec ci_sglang_xeon pip uninstall sgl-kernel -y || true
          docker exec -w /sglang-checkout/sgl-kernel ci_sglang_xeon bash -c "cp pyproject_cpu.toml pyproject.toml && pip install -v ."
          docker exec -w /sglang-checkout/ ci_sglang_xeon bash -c "pip install -e "python[all_cpu]""
          docker exec ci_sglang_xeon bash -c "python3 -m pip install pytest expecttest"
      - name: Check AMX Support
        id: check_amx
        timeout-minutes: 5
        run: |
          docker exec -w /sglang-checkout/ ci_sglang_xeon \
            bash -c "python3 -c 'import torch; import sgl_kernel; assert torch._C._cpu._is_amx_tile_supported(); assert hasattr(torch.ops.sgl_kernel, \"convert_weight_packed\"); '"
        continue-on-error: true
      - name: Run UT Cases
        if: steps.check_amx.outcome == 'success'
        timeout-minutes: 20
        run: |
          docker exec -w /sglang-checkout/ ci_sglang_xeon \
            bash -c "cd ./test/srt && python3 run_suite.py --suite per-commit-cpu"
      - name: Cleanup container
        if: always()
        run: |
          docker rm -f ci_sglang_xeon || true
  finish:
    if: always()
    needs: [build-test, unit-test]
    runs-on: ubuntu-24.04
    steps:
      - name: Check all dependent job statuses
        run: |
          results=(${{ join(needs.*.result, ' ') }})
          for result in "${results[@]}"; do
            if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
              echo "Job failed with result: $result"
              exit 1
            fi
          done
          echo "All jobs completed successfully"
          exit 0
--- a/.github/workflows/release-docker-xeon.yml
+++ b/.github/workflows/release-docker-xeon.yml
@@ -0,0 +1,35 @@
 name: Release Docker Images
 on:
  push:
    branches:
      - main
    paths:
      - "python/sglang/version.py"
  workflow_dispatch:
 jobs:
  publish:
    if: github.repository == 'sgl-project/sglang'
    runs-on: ubuntu-24.04
    environment: 'prod'
    strategy:
      matrix:
        build_type: ['all']
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
      - name: Build and Push
        run: |
          version=$(cat python/sglang/version.py | cut -d'"' -f2)
          tag=v${version}-xeon
          docker build . -f docker/Dockerfile.xeon  -t lmsysorg/sglang:${tag} --no-cache
          docker push lmsysorg/sglang:${tag}
--- a/docker/Dockerfile.xeon
+++ b/docker/Dockerfile.xeon
@@ -0,0 +1,44 @@
 FROM ubuntu:24.04
 SHELL ["/bin/bash", "-c"]
 ARG VER_SGLANG=main
 ARG VER_TORCH=2.6.0
 ARG VER_TORCHVISION=0.21.0
 RUN apt-get update && \
    apt-get full-upgrade -y && \
    DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
    ca-certificates \
    git \
    curl \
    wget \
    vim \
    gcc \
    g++ \
    make
 WORKDIR /sgl-workspace
 RUN curl -fsSL -v -o miniforge.sh -O https://github.com/conda-forge/miniforge/releases/download/24.11.3-2/Miniforge3-24.11.3-2-Linux-x86_64.sh && \
    bash miniforge.sh -b -p ./miniforge3 && \
    rm -f miniforge.sh && \
    . miniforge3/bin/activate && \
    conda install -y libsqlite==3.48.0 gperftools tbb libnuma numactl
 ENV PATH=/sgl-workspace/miniforge3/bin:/sgl-workspace/miniforge3/condabin:${PATH}
 ENV PIP_ROOT_USER_ACTION=ignore
 RUN pip install intel-openmp
 RUN git clone https://github.com/sgl-project/sglang.git && \
    cd sglang && \
    git checkout ${VER_SGLANG} && \
    pip install -e "python[all_cpu]" && \
    pip install torch==${VER_TORCH} torchvision==${VER_TORCHVISION} --index-url https://download.pytorch.org/whl/cpu --force-reinstall && \
    cd sgl-kernel && \
    cp pyproject_cpu.toml pyproject.toml && \
    pip install -v .
 ENV LD_PRELOAD=/sgl-workspace/miniforge3/lib/libiomp5.so:/sgl-workspace/miniforge3/lib/libtcmalloc.so:/sgl-workspace/miniforge3/lib/libtbbmalloc.so.2
 WORKDIR /sgl-workspace/sglang
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -89,7 +89,7 @@ srt_hpu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11"]
 # CPU: currently, there are no pre-built vllm wheels for CPU.
 # To install vllm for CPU, please follow the instruction here:
 # https://docs.vllm.ai/en/latest/getting_started/installation/cpu/index.html
-srt_cpu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11", "torch"]
+srt_cpu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11", "einops"]
 # https://vllm-ascend.readthedocs.io/en/latest/installation.html
 srt_npu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11"]
--- a/python/sglang/test/test_utils.py
+++ b/python/sglang/test/test_utils.py
@@ -26,6 +26,7 @@ from sglang.lang.backend.openai import OpenAI
 from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
 from sglang.srt.utils import (
    get_bool_env_var,
    get_device,
    is_port_available,
    kill_process_tree,
    retry,
@@ -305,13 +306,33 @@ def add_common_other_args_and_parse(parser: argparse.ArgumentParser):
    return args
 def auto_config_device() -> str:
    """Auto-config available device platform"""
    try:
        device = get_device()
    except (RuntimeError, ImportError) as e:
        print(f"Warning: {e} - Falling back to CPU")
        device = "cpu"
    return device
 def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser):
    parser.add_argument("--parallel", type=int, default=64)
    parser.add_argument("--host", type=str, default="http://127.0.0.1")
    parser.add_argument("--port", type=int, default=30000)
    parser.add_argument("--backend", type=str, default="srt")
    parser.add_argument(
        "--device",
        type=str,
        default="auto",
        choices=["auto", "cuda", "rocm", "cpu"],
        help="Device type (auto/cuda/rocm/cpu). Auto will detect available platforms",
    )
    parser.add_argument("--result-file", type=str, default="result.jsonl")
    args = parser.parse_args()
    return args
@@ -397,11 +418,25 @@ def popen_launch_server(
    base_url: str,
    timeout: float,
    api_key: Optional[str] = None,
-    other_args: list[str] = (),
+    other_args: list[str] = [],
    env: Optional[dict] = None,
    return_stdout_stderr: Optional[tuple] = None,
    device: str = "auto",
    pd_separated: bool = False,
 ):
    """Launch a server process with automatic device detection.
    Args:
        device: Device type ("auto", "cuda", "rocm" or "cpu").
                If "auto", will detect available platforms automatically.
    """
    # Auto-detect device if needed
    if device == "auto":
        device = auto_config_device()
        print(f"Auto-configed device: {device}", flush=True)
        other_args = list(other_args)
        other_args += ["--device", str(device)]
    _, host, port = base_url.split(":")
    host = host[2:]
@@ -457,6 +492,15 @@ def popen_launch_server(
    start_time = time.perf_counter()
    with requests.Session() as session:
        while time.perf_counter() - start_time < timeout:
            return_code = process.poll()
            if return_code is not None:
                # Server failed to start (non-zero exit code) or crashed
                raise Exception(
                    f"Server process exited with code {return_code}. "
                    "Check server logs for errors."
                )
            try:
                headers = {
                    "Content-Type": "application/json; charset=utf-8",
@@ -627,6 +671,7 @@ def get_benchmark_args(
    disable_stream=False,
    disable_ignore_eos=False,
    seed: int = 0,
    device="auto",
    pd_separated: bool = False,
 ):
    return SimpleNamespace(
@@ -657,6 +702,7 @@ def get_benchmark_args(
        profile=None,
        lora_name=None,
        prompt_suffix="",
        device=device,
        pd_separated=pd_separated,
    )
@@ -676,7 +722,10 @@ def run_bench_serving(
    disable_ignore_eos=False,
    need_warmup=False,
    seed: int = 0,
    device="auto",
 ):
    if device == "auto":
        device = auto_config_device()
    # Launch the server
    base_url = DEFAULT_URL_FOR_TEST
    process = popen_launch_server(
@@ -700,6 +749,7 @@ def run_bench_serving(
        disable_stream=disable_stream,
        disable_ignore_eos=disable_ignore_eos,
        seed=seed,
        device=device,
    )
    try:
@@ -750,6 +800,18 @@ def run_bench_serving_multi(
 def run_bench_one_batch(model, other_args):
    """Launch a offline process with automatic device detection.
    Args:
        device: Device type ("auto", "cuda", "rocm" or "cpu").
                If "auto", will detect available platforms automatically.
    """
    # Auto-detect device if needed
    device = auto_config_device()
    print(f"Auto-configed device: {device}", flush=True)
    other_args += ["--device", str(device)]
    command = [
        "python3",
        "-m",
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -127,6 +127,16 @@ suites = {
    "per-commit-8-gpu-amd": [
        TestFile("test_full_deepseek_v3.py", 250),
    ],
    "per-commit-cpu": [
        TestFile("cpu/test_activation.py"),
        TestFile("cpu/test_decode.py"),
        TestFile("cpu/test_extend.py"),
        TestFile("cpu/test_gemm.py"),
        TestFile("cpu/test_moe.py"),
        TestFile("cpu/test_norm.py"),
        TestFile("cpu/test_qkv_proj_with_rope.py"),
        TestFile("cpu/test_shared_expert.py"),
    ],
    "nightly": [
        TestFile("test_nightly_gsm8k_eval.py"),
    ],