From 9f1f699a7a3830215f4d5f6da654b4532b4f39f0 Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Mon, 13 Oct 2025 11:41:02 -0700 Subject: [PATCH] [CI] Add Basic Test for DeepSeek V3.2 (#11308) --- .github/workflows/pr-test.yml | 27 ++++++++++ scripts/ci/ci_install_dependency.sh | 27 +++++++++- test/srt/run_suite.py | 3 ++ test/srt/test_deepseek_v32_basic.py | 78 +++++++++++++++++++++++++++++ 4 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 test/srt/test_deepseek_v32_basic.py diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index d66d6f6c5..277f9a281 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -664,6 +664,33 @@ jobs: cd test/srt python3 run_suite.py --suite per-commit-8-gpu-deepep + unit-test-backend-8-gpu-deepseek-v32: + needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels] + if: always() && !failure() && !cancelled() && + ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) + runs-on: 8-gpu-h200 + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Download artifacts + if: needs.check-changes.outputs.sgl_kernel == 'true' + uses: actions/download-artifact@v4 + with: + path: sgl-kernel/dist/ + merge-multiple: true + pattern: wheel-python3.10-cuda12.9 + + - name: Install dependencies + run: | + CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} RUN_DEEPSEEK_V32=1 bash scripts/ci/ci_install_dependency.sh + + - name: Run test + timeout-minutes: 20 + run: | + cd test/srt + python3 run_suite.py --suite per-commit-8-gpu-deepseek-v32 + unit-test-backend-4-gpu-b200: needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels] if: always() && !failure() && !cancelled() && diff --git a/scripts/ci/ci_install_dependency.sh b/scripts/ci/ci_install_dependency.sh index fb449d282..8aba6318e 100755 --- a/scripts/ci/ci_install_dependency.sh +++ b/scripts/ci/ci_install_dependency.sh @@ -3,6 +3,7 @@ set -euxo pipefail IS_BLACKWELL=${IS_BLACKWELL:-0} +RUN_DEEPSEEK_V32=${RUN_DEEPSEEK_V32:-0} CU_VERSION="cu128" # Kill existing processes @@ -68,7 +69,31 @@ if [ "$IS_BLACKWELL" != "1" ]; then $PIP_CMD install -e lmms-eval/ $PIP_INSTALL_SUFFIX # Install xformers - $PIP_CMD install xformers --index-url https://download.pytorch.org/whl/${CU_VERSION} --no-deps $PIP_INSTALL_SUFFIX --force-reinstall + $PIP_CMD install xformers --index-url https://download.pytorch.org/whl/${CU_VERSION} --no-deps $PIP_INSTALL_SUFFIX +fi + +# Install dependencies for deepseek-v3.2 +if [ "$RUN_DEEPSEEK_V32" = "1" ]; then + # Install flashmla + FLASHMLA_COMMIT="1408756a88e52a25196b759eaf8db89d2b51b5a1" + FLASH_MLA_DISABLE_SM100="0" + if [ "$IS_BLACKWELL" != "1" ]; then + FLASH_MLA_DISABLE_SM100="1" + fi + git clone https://github.com/deepseek-ai/FlashMLA.git flash-mla + cd flash-mla + git checkout ${FLASHMLA_COMMIT} + git submodule update --init --recursive + FLASH_MLA_DISABLE_SM100=${FLASH_MLA_DISABLE_SM100} $PIP_CMD install -v . $PIP_INSTALL_SUFFIX --no-build-isolation + cd .. + + # Install fast-hadamard-transform + FAST_HADAMARD_TRANSFORM_COMMIT="7fd811c2b47f63b0b08d2582619f939e14dad77c" + git clone https://github.com/Dao-AILab/fast-hadamard-transform + cd fast-hadamard-transform + git checkout ${FAST_HADAMARD_TRANSFORM_COMMIT} + $PIP_CMD install . $PIP_INSTALL_SUFFIX --no-build-isolation + cd .. fi # Show current packages diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index c65362594..6cf36a006 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -170,6 +170,9 @@ suites = { "per-commit-8-gpu-deepep": [ TestFile("ep/test_deepep_large.py", 338), ], + "per-commit-8-gpu-deepseek-v32": [ + TestFile("test_deepseek_v32_basic.py", 275), + ], "per-commit-8-gpu-h20": [ TestFile("quant/test_w4a8_deepseek_v3.py", 371), ], diff --git a/test/srt/test_deepseek_v32_basic.py b/test/srt/test_deepseek_v32_basic.py new file mode 100644 index 000000000..eac6c27bf --- /dev/null +++ b/test/srt/test_deepseek_v32_basic.py @@ -0,0 +1,78 @@ +import unittest +from types import SimpleNamespace + +from sglang.srt.utils import kill_process_tree +from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k +from sglang.test.send_one import BenchArgs, send_one_prompt +from sglang.test.test_utils import ( + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, + CustomTestCase, + is_in_ci, + popen_launch_server, + write_github_step_summary, +) + +DEEPSEEK_V32_MODEL_PATH = "deepseek-ai/DeepSeek-V3.2-Exp" + + +class TestDeepseekV3Basic(CustomTestCase): + @classmethod + def setUpClass(cls): + cls.model = DEEPSEEK_V32_MODEL_PATH + cls.base_url = DEFAULT_URL_FOR_TEST + other_args = [ + "--trust-remote-code", + "--tp", + "8", + "--dp", + "8", + "--enable-dp-attention", + ] + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=other_args, + ) + + @classmethod + def tearDownClass(cls): + kill_process_tree(cls.process.pid) + + def test_a_gsm8k( + self, + ): # Append an "a" to make this test run first (alphabetically) to warm up the server + args = SimpleNamespace( + num_shots=8, + data_path=None, + num_questions=1400, + parallel=1400, + max_new_tokens=512, + host="http://127.0.0.1", + port=int(self.base_url.split(":")[-1]), + ) + metrics = run_eval_few_shot_gsm8k(args) + print(f"{metrics=}") + + if is_in_ci(): + write_github_step_summary( + f"### test_gsm8k (deepseek-v3)\n" f'{metrics["accuracy"]=:.3f}\n' + ) + self.assertGreater(metrics["accuracy"], 0.935) + + def test_bs_1_speed(self): + args = BenchArgs(port=int(self.base_url.split(":")[-1]), max_new_tokens=2048) + acc_length, speed = send_one_prompt(args) + + print(f"{speed=:.2f}") + + if is_in_ci(): + write_github_step_summary( + f"### test_bs_1_speed (deepseek-v3)\n" f"{speed=:.2f} token/s\n" + ) + self.assertGreater(speed, 50) + + +if __name__ == "__main__": + unittest.main()