diff --git a/.github/workflows/pr-test-amd.yml b/.github/workflows/pr-test-amd.yml index 565b02fb5..49ab3eb04 100644 --- a/.github/workflows/pr-test-amd.yml +++ b/.github/workflows/pr-test-amd.yml @@ -56,6 +56,42 @@ jobs: run: | docker exec -w /sglang-checkout/test/srt ci_sglang python3 test_eval_accuracy_large.py + mla-test-1-gpu-amd: + if: github.repository == 'sgl-project/sglang' && github.event.pull_request.draft == false + runs-on: linux-mi300-gpu-1 + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Setup docker + run: | + # Ensure GPU isolation if pod is part of kubernetes setup with DEVICE_FLAG. + if [ -f "/etc/podinfo/gha-render-devices" ]; then + DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices) + else + DEVICE_FLAG="--device /dev/dri" + fi + docker pull lmsysorg/sglang:v0.4.2.post2-rocm630 + docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \ + -v ${{ github.workspace }}:/sglang-checkout --ipc=host --group-add video \ + --cap-add=SYS_PTRACE -e HF_TOKEN=${{ secrets.AMD_HF_TOKEN }} --security-opt seccomp=unconfined \ + -w /sglang-checkout --name ci_sglang \ + lmsysorg/sglang:v0.4.2.post2-rocm630 + + - name: Install dependencies + run: | + docker exec ci_sglang pip install --upgrade pip + docker exec -w /sglang-checkout/sgl-kernel ci_sglang python3 setup_rocm.py install + docker exec ci_sglang pip install -e "python[dev_hip]" + + docker exec -w / ci_sglang git clone https://github.com/merrymercy/human-eval.git + docker exec -w /human-eval ci_sglang pip install -e . + + - name: MLA TEST + timeout-minutes: 20 + run: | + docker exec -w /sglang-checkout/test/srt ci_sglang python3 test_mla.py + finish: needs: [ accuracy-test-1-gpu-amd diff --git a/test/srt/test_mla.py b/test/srt/test_mla.py index 630573250..0e4d64c9a 100644 --- a/test/srt/test_mla.py +++ b/test/srt/test_mla.py @@ -1,6 +1,8 @@ import unittest from types import SimpleNamespace +import torch + from sglang.srt.utils import kill_process_tree from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k from sglang.test.run_eval import run_eval @@ -58,16 +60,14 @@ class TestDeepseekV3(unittest.TestCase): def setUpClass(cls): cls.model = "lmzheng/sglang-ci-dsv3-test" cls.base_url = DEFAULT_URL_FOR_TEST + other_args = ["--trust-remote-code"] + if torch.cuda.is_available() and torch.version.cuda: + other_args.extend(["--enable-torch-compile", "--cuda-graph-max-bs", "2"]) cls.process = popen_launch_server( cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--enable-torch-compile", - "--cuda-graph-max-bs", - "2", - ], + other_args=other_args, ) @classmethod