From 7f875f1293aa4dab646e312b1e67edda372102c7 Mon Sep 17 00:00:00 2001 From: saienduri Date: Wed, 9 Apr 2025 11:09:47 -0700 Subject: [PATCH] update grok test (#5171) --- .github/workflows/pr-test-amd.yml | 47 +++++++++++++++++++++-- python/sglang/test/test_utils.py | 4 +- test/srt/models/test_dummy_grok_models.py | 33 ++++++++++++++++ 3 files changed, 79 insertions(+), 5 deletions(-) create mode 100644 test/srt/models/test_dummy_grok_models.py diff --git a/.github/workflows/pr-test-amd.yml b/.github/workflows/pr-test-amd.yml index 9955a2d76..1c52f0c7b 100644 --- a/.github/workflows/pr-test-amd.yml +++ b/.github/workflows/pr-test-amd.yml @@ -56,8 +56,8 @@ jobs: docker exec -w /human-eval ci_sglang pip install -e . docker exec -w / ci_sglang mkdir -p /dummy-grok - mkdir -p dummy-grok && wget https://sharkpublic.blob.core.windows.net/sharkpublic/sglang/dummy_grok.json -P dummy-grok - docker cp ./dummy-grok ci_sglang:/dummy-grok/ + mkdir -p dummy-grok && wget https://sharkpublic.blob.core.windows.net/sharkpublic/sglang/dummy_grok.json -O dummy-grok/config.json + docker cp ./dummy-grok ci_sglang:/ - name: Evaluate Accuracy timeout-minutes: 20 @@ -65,7 +65,6 @@ jobs: docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 ci_sglang python3 test_eval_accuracy_large.py docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 ci_sglang python3 test_eval_fp8_accuracy.py docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 ci_sglang python3 models/test_qwen_models.py - docker exec -w /sglang-checkout -e SGLANG_IS_IN_CI=1 ci_sglang python3 -m sglang.bench_one_batch --batch-size 32 --input 1024 --output 8 --model /dummy-grok --tokenizer-path Xenova/grok-1-tokenizer --load-format dummy --tp 8 --quantization fp8 mla-test-1-gpu-amd: if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && @@ -105,6 +104,48 @@ jobs: run: | docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 ci_sglang python3 test_mla.py + bench-test-2-gpu-amd: + if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && + github.event.pull_request.draft == false + runs-on: linux-mi300-gpu-2 + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup docker + run: | + # Ensure GPU isolation if pod is part of kubernetes setup with DEVICE_FLAG. + if [ -f "/etc/podinfo/gha-render-devices" ]; then + DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices) + else + DEVICE_FLAG="--device /dev/dri" + fi + docker pull lmsysorg/sglang:v0.4.5-rocm630 + docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \ + -v ${{ github.workspace }}:/sglang-checkout --ipc=host --group-add video \ + --cap-add=SYS_PTRACE -e HF_TOKEN=${HF_TOKEN} --security-opt seccomp=unconfined \ + -w /sglang-checkout --name ci_sglang \ + lmsysorg/sglang:v0.4.5-rocm630 + + - name: Install dependencies + run: | + docker exec ci_sglang pip install --upgrade pip + docker exec ci_sglang pip uninstall sgl-kernel -y || true + docker exec -w /sglang-checkout/sgl-kernel ci_sglang bash -c "rm -f pyproject.toml && mv pyproject_rocm.toml pyproject.toml && python3 setup_rocm.py install" + docker exec ci_sglang pip install -e "python[dev_hip]" + + docker exec -w / ci_sglang git clone https://github.com/merrymercy/human-eval.git + docker exec -w /human-eval ci_sglang pip install -e . + + docker exec -w / ci_sglang mkdir -p /dummy-grok + mkdir -p dummy-grok && wget https://sharkpublic.blob.core.windows.net/sharkpublic/sglang/dummy_grok.json -O dummy-grok/config.json + docker cp ./dummy-grok ci_sglang:/ + + - name: Evaluate Benchmark + timeout-minutes: 20 + run: | + docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 ci_sglang python3 models/test_dummy_grok_models.py + finish: if: always() needs: [ diff --git a/python/sglang/test/test_utils.py b/python/sglang/test/test_utils.py index 7d68dcf37..e60afb841 100644 --- a/python/sglang/test/test_utils.py +++ b/python/sglang/test/test_utils.py @@ -669,8 +669,6 @@ def run_bench_one_batch(model, other_args): "python3", "-m", "sglang.bench_one_batch", - "--model-path", - model, "--batch-size", "1", "--input", @@ -679,6 +677,8 @@ def run_bench_one_batch(model, other_args): "8", *[str(x) for x in other_args], ] + if model is not None: + command += ["--model-path", model] process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) try: diff --git a/test/srt/models/test_dummy_grok_models.py b/test/srt/models/test_dummy_grok_models.py new file mode 100644 index 000000000..7ffa432b2 --- /dev/null +++ b/test/srt/models/test_dummy_grok_models.py @@ -0,0 +1,33 @@ +import unittest + +from sglang.test.test_utils import CustomTestCase, is_in_ci, run_bench_one_batch + + +class TestDummyGrok1(CustomTestCase): + def test_dummy_grok_1(self): + output_throughput = run_bench_one_batch( + None, + [ + "--model", + "/dummy-grok", + "--tokenizer-path", + "Xenova/grok-1-tokenizer", + "--batch-size", + "2", + "--tp", + "2", + "--quantization", + "fp8", + "--load-format", + "dummy", + "--json-model-override-args", + '{"num_hidden_layers": 2}', + ], + ) + + if is_in_ci(): + assert output_throughput > 0, f"{output_throughput=}" + + +if __name__ == "__main__": + unittest.main()