diff --git a/.github/workflows/pr-test-xeon.yml b/.github/workflows/pr-test-xeon.yml new file mode 100644 index 000000000..e5d9bc161 --- /dev/null +++ b/.github/workflows/pr-test-xeon.yml @@ -0,0 +1,86 @@ +name: PR Test (Xeon) +on: + pull_request: + branches: + - main + workflow_dispatch: + +concurrency: + group: pr-test-xeon-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-test: + if: github.event_name == 'pull_request' + runs-on: sgl-kernel-build-node + environment: 'prod' + strategy: + matrix: + build_type: ['all'] + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Build and Push + run: | + version=$(cat python/sglang/version.py | cut -d'"' -f2) + tag=v${version}-xeon + + docker build . -f docker/Dockerfile.xeon -t sglang_xeon --no-cache + unit-test: + if: github.event_name == 'pull_request' + needs: [build-test] + runs-on: sgl-kernel-build-node + steps: + - name: Run container + run: | + docker run -dt \ + -v ${{ github.workspace }}:/sglang-checkout/ --ipc=host \ + --name ci_sglang_xeon \ + sglang_xeon + + - name: Install Dependency + timeout-minutes: 20 + run: | + docker exec ci_sglang_xeon bash -c "python3 -m pip install --upgrade pip" + docker exec ci_sglang_xeon pip uninstall sgl-kernel -y || true + docker exec -w /sglang-checkout/sgl-kernel ci_sglang_xeon bash -c "cp pyproject_cpu.toml pyproject.toml && pip install -v ." + docker exec -w /sglang-checkout/ ci_sglang_xeon bash -c "pip install -e "python[all_cpu]"" + docker exec ci_sglang_xeon bash -c "python3 -m pip install pytest expecttest" + + - name: Check AMX Support + id: check_amx + timeout-minutes: 5 + run: | + docker exec -w /sglang-checkout/ ci_sglang_xeon \ + bash -c "python3 -c 'import torch; import sgl_kernel; assert torch._C._cpu._is_amx_tile_supported(); assert hasattr(torch.ops.sgl_kernel, \"convert_weight_packed\"); '" + continue-on-error: true + + - name: Run UT Cases + if: steps.check_amx.outcome == 'success' + timeout-minutes: 20 + run: | + docker exec -w /sglang-checkout/ ci_sglang_xeon \ + bash -c "cd ./test/srt && python3 run_suite.py --suite per-commit-cpu" + + - name: Cleanup container + if: always() + run: | + docker rm -f ci_sglang_xeon || true + + finish: + if: always() + needs: [build-test, unit-test] + runs-on: ubuntu-24.04 + steps: + - name: Check all dependent job statuses + run: | + results=(${{ join(needs.*.result, ' ') }}) + for result in "${results[@]}"; do + if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then + echo "Job failed with result: $result" + exit 1 + fi + done + echo "All jobs completed successfully" + exit 0 diff --git a/.github/workflows/release-docker-xeon.yml b/.github/workflows/release-docker-xeon.yml new file mode 100644 index 000000000..118a1392b --- /dev/null +++ b/.github/workflows/release-docker-xeon.yml @@ -0,0 +1,35 @@ +name: Release Docker Images +on: + push: + branches: + - main + paths: + - "python/sglang/version.py" + workflow_dispatch: + +jobs: + publish: + if: github.repository == 'sgl-project/sglang' + runs-on: ubuntu-24.04 + environment: 'prod' + strategy: + matrix: + build_type: ['all'] + steps: + + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Login to Docker Hub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Build and Push + run: | + version=$(cat python/sglang/version.py | cut -d'"' -f2) + tag=v${version}-xeon + + docker build . -f docker/Dockerfile.xeon -t lmsysorg/sglang:${tag} --no-cache + docker push lmsysorg/sglang:${tag} diff --git a/docker/Dockerfile.xeon b/docker/Dockerfile.xeon new file mode 100644 index 000000000..d622aa736 --- /dev/null +++ b/docker/Dockerfile.xeon @@ -0,0 +1,44 @@ +FROM ubuntu:24.04 +SHELL ["/bin/bash", "-c"] + +ARG VER_SGLANG=main +ARG VER_TORCH=2.6.0 +ARG VER_TORCHVISION=0.21.0 + +RUN apt-get update && \ + apt-get full-upgrade -y && \ + DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + ca-certificates \ + git \ + curl \ + wget \ + vim \ + gcc \ + g++ \ + make + +WORKDIR /sgl-workspace + +RUN curl -fsSL -v -o miniforge.sh -O https://github.com/conda-forge/miniforge/releases/download/24.11.3-2/Miniforge3-24.11.3-2-Linux-x86_64.sh && \ + bash miniforge.sh -b -p ./miniforge3 && \ + rm -f miniforge.sh && \ + . miniforge3/bin/activate && \ + conda install -y libsqlite==3.48.0 gperftools tbb libnuma numactl + +ENV PATH=/sgl-workspace/miniforge3/bin:/sgl-workspace/miniforge3/condabin:${PATH} +ENV PIP_ROOT_USER_ACTION=ignore + +RUN pip install intel-openmp + +RUN git clone https://github.com/sgl-project/sglang.git && \ + cd sglang && \ + git checkout ${VER_SGLANG} && \ + pip install -e "python[all_cpu]" && \ + pip install torch==${VER_TORCH} torchvision==${VER_TORCHVISION} --index-url https://download.pytorch.org/whl/cpu --force-reinstall && \ + cd sgl-kernel && \ + cp pyproject_cpu.toml pyproject.toml && \ + pip install -v . + +ENV LD_PRELOAD=/sgl-workspace/miniforge3/lib/libiomp5.so:/sgl-workspace/miniforge3/lib/libtcmalloc.so:/sgl-workspace/miniforge3/lib/libtbbmalloc.so.2 + +WORKDIR /sgl-workspace/sglang diff --git a/python/pyproject.toml b/python/pyproject.toml index ae8b828ff..7aaaa7de9 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -89,7 +89,7 @@ srt_hpu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11"] # CPU: currently, there are no pre-built vllm wheels for CPU. # To install vllm for CPU, please follow the instruction here: # https://docs.vllm.ai/en/latest/getting_started/installation/cpu/index.html -srt_cpu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11", "torch"] +srt_cpu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11", "einops"] # https://vllm-ascend.readthedocs.io/en/latest/installation.html srt_npu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11"] diff --git a/python/sglang/test/test_utils.py b/python/sglang/test/test_utils.py index f1e60c8f7..607a53b63 100644 --- a/python/sglang/test/test_utils.py +++ b/python/sglang/test/test_utils.py @@ -26,6 +26,7 @@ from sglang.lang.backend.openai import OpenAI from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint from sglang.srt.utils import ( get_bool_env_var, + get_device, is_port_available, kill_process_tree, retry, @@ -305,13 +306,33 @@ def add_common_other_args_and_parse(parser: argparse.ArgumentParser): return args +def auto_config_device() -> str: + """Auto-config available device platform""" + + try: + device = get_device() + except (RuntimeError, ImportError) as e: + print(f"Warning: {e} - Falling back to CPU") + device = "cpu" + + return device + + def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser): parser.add_argument("--parallel", type=int, default=64) parser.add_argument("--host", type=str, default="http://127.0.0.1") parser.add_argument("--port", type=int, default=30000) parser.add_argument("--backend", type=str, default="srt") + parser.add_argument( + "--device", + type=str, + default="auto", + choices=["auto", "cuda", "rocm", "cpu"], + help="Device type (auto/cuda/rocm/cpu). Auto will detect available platforms", + ) parser.add_argument("--result-file", type=str, default="result.jsonl") args = parser.parse_args() + return args @@ -397,11 +418,25 @@ def popen_launch_server( base_url: str, timeout: float, api_key: Optional[str] = None, - other_args: list[str] = (), + other_args: list[str] = [], env: Optional[dict] = None, return_stdout_stderr: Optional[tuple] = None, + device: str = "auto", pd_separated: bool = False, ): + """Launch a server process with automatic device detection. + + Args: + device: Device type ("auto", "cuda", "rocm" or "cpu"). + If "auto", will detect available platforms automatically. + """ + # Auto-detect device if needed + if device == "auto": + device = auto_config_device() + print(f"Auto-configed device: {device}", flush=True) + other_args = list(other_args) + other_args += ["--device", str(device)] + _, host, port = base_url.split(":") host = host[2:] @@ -457,6 +492,15 @@ def popen_launch_server( start_time = time.perf_counter() with requests.Session() as session: while time.perf_counter() - start_time < timeout: + + return_code = process.poll() + if return_code is not None: + # Server failed to start (non-zero exit code) or crashed + raise Exception( + f"Server process exited with code {return_code}. " + "Check server logs for errors." + ) + try: headers = { "Content-Type": "application/json; charset=utf-8", @@ -627,6 +671,7 @@ def get_benchmark_args( disable_stream=False, disable_ignore_eos=False, seed: int = 0, + device="auto", pd_separated: bool = False, ): return SimpleNamespace( @@ -657,6 +702,7 @@ def get_benchmark_args( profile=None, lora_name=None, prompt_suffix="", + device=device, pd_separated=pd_separated, ) @@ -676,7 +722,10 @@ def run_bench_serving( disable_ignore_eos=False, need_warmup=False, seed: int = 0, + device="auto", ): + if device == "auto": + device = auto_config_device() # Launch the server base_url = DEFAULT_URL_FOR_TEST process = popen_launch_server( @@ -700,6 +749,7 @@ def run_bench_serving( disable_stream=disable_stream, disable_ignore_eos=disable_ignore_eos, seed=seed, + device=device, ) try: @@ -750,6 +800,18 @@ def run_bench_serving_multi( def run_bench_one_batch(model, other_args): + """Launch a offline process with automatic device detection. + + Args: + device: Device type ("auto", "cuda", "rocm" or "cpu"). + If "auto", will detect available platforms automatically. + """ + # Auto-detect device if needed + + device = auto_config_device() + print(f"Auto-configed device: {device}", flush=True) + other_args += ["--device", str(device)] + command = [ "python3", "-m", diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 83fde313f..4be6c9f71 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -127,6 +127,16 @@ suites = { "per-commit-8-gpu-amd": [ TestFile("test_full_deepseek_v3.py", 250), ], + "per-commit-cpu": [ + TestFile("cpu/test_activation.py"), + TestFile("cpu/test_decode.py"), + TestFile("cpu/test_extend.py"), + TestFile("cpu/test_gemm.py"), + TestFile("cpu/test_moe.py"), + TestFile("cpu/test_norm.py"), + TestFile("cpu/test_qkv_proj_with_rope.py"), + TestFile("cpu/test_shared_expert.py"), + ], "nightly": [ TestFile("test_nightly_gsm8k_eval.py"), ],