From 9f1f699a7a3830215f4d5f6da654b4532b4f39f0 Mon Sep 17 00:00:00 2001
From: Baizhou Zhang <sobereddiezhang@gmail.com>
Date: Mon, 13 Oct 2025 11:41:02 -0700
Subject: [PATCH] [CI] Add Basic Test for DeepSeek V3.2 (#11308)

---
 .github/workflows/pr-test.yml       | 27 ++++++++++
 scripts/ci/ci_install_dependency.sh | 27 +++++++++-
 test/srt/run_suite.py               |  3 ++
 test/srt/test_deepseek_v32_basic.py | 78 +++++++++++++++++++++++++++++
 4 files changed, 134 insertions(+), 1 deletion(-)
 create mode 100644 test/srt/test_deepseek_v32_basic.py

diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml
index d66d6f6c5..277f9a281 100644
--- a/.github/workflows/pr-test.yml
+++ b/.github/workflows/pr-test.yml
@@ -664,6 +664,33 @@ jobs:
           cd test/srt
           python3 run_suite.py --suite per-commit-8-gpu-deepep
 
+  unit-test-backend-8-gpu-deepseek-v32:
+    needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
+    if: always() && !failure() && !cancelled() &&
+        ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
+    runs-on: 8-gpu-h200
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Download artifacts
+        if: needs.check-changes.outputs.sgl_kernel == 'true'
+        uses: actions/download-artifact@v4
+        with:
+          path: sgl-kernel/dist/
+          merge-multiple: true
+          pattern: wheel-python3.10-cuda12.9
+
+      - name: Install dependencies
+        run: |
+          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} RUN_DEEPSEEK_V32=1 bash scripts/ci/ci_install_dependency.sh
+
+      - name: Run test
+        timeout-minutes: 20
+        run: |
+          cd test/srt
+          python3 run_suite.py --suite per-commit-8-gpu-deepseek-v32
+
   unit-test-backend-4-gpu-b200:
     needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
diff --git a/scripts/ci/ci_install_dependency.sh b/scripts/ci/ci_install_dependency.sh
index fb449d282..8aba6318e 100755
--- a/scripts/ci/ci_install_dependency.sh
+++ b/scripts/ci/ci_install_dependency.sh
@@ -3,6 +3,7 @@
 set -euxo pipefail
 
 IS_BLACKWELL=${IS_BLACKWELL:-0}
+RUN_DEEPSEEK_V32=${RUN_DEEPSEEK_V32:-0}
 CU_VERSION="cu128"
 
 # Kill existing processes
@@ -68,7 +69,31 @@ if [ "$IS_BLACKWELL" != "1" ]; then
     $PIP_CMD install -e lmms-eval/ $PIP_INSTALL_SUFFIX
 
     # Install xformers
-    $PIP_CMD install xformers --index-url https://download.pytorch.org/whl/${CU_VERSION} --no-deps $PIP_INSTALL_SUFFIX --force-reinstall
+    $PIP_CMD install xformers --index-url https://download.pytorch.org/whl/${CU_VERSION} --no-deps $PIP_INSTALL_SUFFIX
+fi
+
+# Install dependencies for deepseek-v3.2
+if [ "$RUN_DEEPSEEK_V32" = "1" ]; then
+    # Install flashmla
+    FLASHMLA_COMMIT="1408756a88e52a25196b759eaf8db89d2b51b5a1"
+    FLASH_MLA_DISABLE_SM100="0"
+    if [ "$IS_BLACKWELL" != "1" ]; then
+        FLASH_MLA_DISABLE_SM100="1"
+    fi
+    git clone https://github.com/deepseek-ai/FlashMLA.git flash-mla
+    cd flash-mla
+    git checkout ${FLASHMLA_COMMIT}
+    git submodule update --init --recursive
+    FLASH_MLA_DISABLE_SM100=${FLASH_MLA_DISABLE_SM100} $PIP_CMD install -v . $PIP_INSTALL_SUFFIX --no-build-isolation
+    cd ..
+
+    # Install fast-hadamard-transform
+    FAST_HADAMARD_TRANSFORM_COMMIT="7fd811c2b47f63b0b08d2582619f939e14dad77c"
+    git clone https://github.com/Dao-AILab/fast-hadamard-transform
+    cd fast-hadamard-transform
+    git checkout ${FAST_HADAMARD_TRANSFORM_COMMIT}
+    $PIP_CMD install . $PIP_INSTALL_SUFFIX --no-build-isolation
+    cd ..
 fi
 
 # Show current packages
diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py
index c65362594..6cf36a006 100644
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -170,6 +170,9 @@ suites = {
     "per-commit-8-gpu-deepep": [
         TestFile("ep/test_deepep_large.py", 338),
     ],
+    "per-commit-8-gpu-deepseek-v32": [
+        TestFile("test_deepseek_v32_basic.py", 275),
+    ],
     "per-commit-8-gpu-h20": [
         TestFile("quant/test_w4a8_deepseek_v3.py", 371),
     ],
diff --git a/test/srt/test_deepseek_v32_basic.py b/test/srt/test_deepseek_v32_basic.py
new file mode 100644
index 000000000..eac6c27bf
--- /dev/null
+++ b/test/srt/test_deepseek_v32_basic.py
@@ -0,0 +1,78 @@
+import unittest
+from types import SimpleNamespace
+
+from sglang.srt.utils import kill_process_tree
+from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
+from sglang.test.send_one import BenchArgs, send_one_prompt
+from sglang.test.test_utils import (
+    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
+    is_in_ci,
+    popen_launch_server,
+    write_github_step_summary,
+)
+
+DEEPSEEK_V32_MODEL_PATH = "deepseek-ai/DeepSeek-V3.2-Exp"
+
+
+class TestDeepseekV3Basic(CustomTestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.model = DEEPSEEK_V32_MODEL_PATH
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        other_args = [
+            "--trust-remote-code",
+            "--tp",
+            "8",
+            "--dp",
+            "8",
+            "--enable-dp-attention",
+        ]
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            other_args=other_args,
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        kill_process_tree(cls.process.pid)
+
+    def test_a_gsm8k(
+        self,
+    ):  # Append an "a" to make this test run first (alphabetically) to warm up the server
+        args = SimpleNamespace(
+            num_shots=8,
+            data_path=None,
+            num_questions=1400,
+            parallel=1400,
+            max_new_tokens=512,
+            host="http://127.0.0.1",
+            port=int(self.base_url.split(":")[-1]),
+        )
+        metrics = run_eval_few_shot_gsm8k(args)
+        print(f"{metrics=}")
+
+        if is_in_ci():
+            write_github_step_summary(
+                f"### test_gsm8k (deepseek-v3)\n" f'{metrics["accuracy"]=:.3f}\n'
+            )
+            self.assertGreater(metrics["accuracy"], 0.935)
+
+    def test_bs_1_speed(self):
+        args = BenchArgs(port=int(self.base_url.split(":")[-1]), max_new_tokens=2048)
+        acc_length, speed = send_one_prompt(args)
+
+        print(f"{speed=:.2f}")
+
+        if is_in_ci():
+            write_github_step_summary(
+                f"### test_bs_1_speed (deepseek-v3)\n" f"{speed=:.2f} token/s\n"
+            )
+            self.assertGreater(speed, 50)
+
+
+if __name__ == "__main__":
+    unittest.main()