Use only one GPU for MLA CI tests (#2858)

2025-01-13 03:55:33 -08:00
parent 4536d72446
commit 67008f4b32
4 changed files with 39 additions and 8 deletions
--- a/.github/workflows/pr-test.yml
+++ b/.github/workflows/pr-test.yml
@@ -87,18 +87,16 @@ jobs:
        run: |
          bash scripts/ci_install_dependency.sh

-      - name: Evaluate data parallelism accuracy (DP=2)
+      - name: Test data parallelism (DP=2)
        timeout-minutes: 10
        run: |
          cd test/srt
          python3 test_data_parallelism.py

-      - name: Evaluate MLA accuracy (TP=2)
+      - name: Test data parallelism attention (DP=2)
        timeout-minutes: 10
        run: |
          cd test/srt
-          python3 test_mla.py
-          python3 test_mla_fp8.py
          python3 test_dp_attention.py

      - name: Test update weights from distributed
@@ -107,7 +105,7 @@ jobs:
          cd test/srt
          python3 test_update_weights_from_distributed.py

-      - name: Evaluate MoE EP accuracy (TP=2)
+      - name: Test expert parallelism (EP=2)
        timeout-minutes: 10
        run: |
          cd test/srt
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -22,6 +22,8 @@ suites = {
        "test_json_constrained.py",
        "test_large_max_new_tokens.py",
        "test_metrics.py",
+        "test_mla.py",
+        "test_mla_fp8.py",
        "test_no_chunked_prefill.py",
        "test_no_overlap_scheduler.py",
        "test_openai_server.py",
--- a/test/srt/test_mla.py
+++ b/test/srt/test_mla.py
@@ -2,6 +2,7 @@ import unittest
 from types import SimpleNamespace

 from sglang.srt.utils import kill_process_tree
+from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
 from sglang.test.run_eval import run_eval
 from sglang.test.test_utils import (
    DEFAULT_MLA_MODEL_NAME_FOR_TEST,
@@ -20,7 +21,7 @@ class TestMLA(unittest.TestCase):
            cls.model,
            cls.base_url,
            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
-            other_args=["--tp", "2", "--trust-remote-code"],
+            other_args=["--trust-remote-code"],
        )

    @classmethod
@@ -52,5 +53,37 @@ class TestMLA(unittest.TestCase):
        self.assertGreater(metrics["score"], 0.8)


+class TestDeepseekV3(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.model = "lmzheng/sglang-ci-dsv3-test"
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            other_args=["--trust-remote-code"],
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        kill_process_tree(cls.process.pid)
+
+    def test_mmlu(self):
+        args = SimpleNamespace(
+            num_shots=5,
+            data_path=None,
+            num_questions=200,
+            max_new_tokens=512,
+            parallel=128,
+            host="http://127.0.0.1",
+            port=int(self.base_url.split(":")[-1]),
+        )
+        metrics = run_eval_few_shot_gsm8k(args)
+        print(metrics)
+
+        self.assertGreater(metrics["accuracy"], 0.62)
+
+
 if __name__ == "__main__":
    unittest.main()
--- a/test/srt/test_mla_fp8.py
+++ b/test/srt/test_mla_fp8.py
@@ -21,8 +21,6 @@ class TestMLA(unittest.TestCase):
            cls.base_url,
            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
            other_args=[
-                "--tp",
-                "2",
                "--trust-remote-code",
                "--kv-cache-dtype",
                "fp8_e5m2",