diff --git a/.github/workflows/pr-test-amd.yml b/.github/workflows/pr-test-amd.yml index acc8a6bb9..3b58cde5d 100644 --- a/.github/workflows/pr-test-amd.yml +++ b/.github/workflows/pr-test-amd.yml @@ -296,6 +296,7 @@ jobs: fail-fast: false matrix: runner: [linux-mi300-gpu-8] + part: [0, 1] runs-on: ${{matrix.runner}} steps: - name: Checkout code @@ -315,7 +316,7 @@ jobs: - name: Run test timeout-minutes: 60 run: | - bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --timeout-per-file 3600 + bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 3600 unit-test-sgl-kernel-amd: if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 4660e34ad..cab0b527d 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -152,7 +152,8 @@ suites = { TestFile("test_disaggregation_dp_attention.py", 155), TestFile("test_disaggregation_different_tp.py", 600), TestFile("test_disaggregation_pp.py", 140), - TestFile("test_full_deepseek_v3.py", 550), + TestFile("test_deepseek_v3_basic.py", 275), + TestFile("test_deepseek_v3_mtp.py", 275), ], "per-commit-4-gpu-b200": [ # TestFile("test_gpt_oss_4gpu.py", 600), @@ -267,7 +268,8 @@ suite_amd = { TestFile("test_pp_single_node.py", 150), ], "per-commit-8-gpu-amd": [ - TestFile("test_full_deepseek_v3.py", 250), + TestFile("test_deepseek_v3_basic.py", 275), + TestFile("test_deepseek_v3_mtp.py", 275), ], "nightly-amd": [ TestFile("test_nightly_gsm8k_eval_amd.py"), @@ -369,7 +371,7 @@ if __name__ == "__main__": arg_parser.add_argument( "--timeout-per-file", type=int, - default=1500 if is_hip() else 1200, + default=1200, help="The time limit for running one file in seconds.", ) arg_parser.add_argument( diff --git a/test/srt/test_deepseek_v3_basic.py b/test/srt/test_deepseek_v3_basic.py new file mode 100644 index 000000000..349c102c5 --- /dev/null +++ b/test/srt/test_deepseek_v3_basic.py @@ -0,0 +1,77 @@ +import unittest +from types import SimpleNamespace + +import requests + +from sglang.srt.utils import kill_process_tree +from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k +from sglang.test.send_one import BenchArgs, send_one_prompt +from sglang.test.test_utils import ( + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, + CustomTestCase, + is_in_amd_ci, + is_in_ci, + popen_launch_server, + write_github_step_summary, +) + +FULL_DEEPSEEK_V3_MODEL_PATH = "deepseek-ai/DeepSeek-V3-0324" + + +class TestDeepseekV3Basic(CustomTestCase): + @classmethod + def setUpClass(cls): + cls.model = FULL_DEEPSEEK_V3_MODEL_PATH + cls.base_url = DEFAULT_URL_FOR_TEST + other_args = ["--trust-remote-code", "--tp", "8"] + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=other_args, + ) + + @classmethod + def tearDownClass(cls): + kill_process_tree(cls.process.pid) + + def test_a_gsm8k( + self, + ): # Append an "a" to make this test run first (alphabetically) to warm up the server + args = SimpleNamespace( + num_shots=8, + data_path=None, + num_questions=1400, + parallel=1400, + max_new_tokens=512, + host="http://127.0.0.1", + port=int(self.base_url.split(":")[-1]), + ) + metrics = run_eval_few_shot_gsm8k(args) + print(f"{metrics=}") + + if is_in_ci(): + write_github_step_summary( + f"### test_gsm8k (deepseek-v3)\n" f'{metrics["accuracy"]=:.3f}\n' + ) + self.assertGreater(metrics["accuracy"], 0.935) + + def test_bs_1_speed(self): + args = BenchArgs(port=int(self.base_url.split(":")[-1]), max_new_tokens=2048) + acc_length, speed = send_one_prompt(args) + + print(f"{speed=:.2f}") + + if is_in_ci(): + write_github_step_summary( + f"### test_bs_1_speed (deepseek-v3)\n" f"{speed=:.2f} token/s\n" + ) + if is_in_amd_ci(): + self.assertGreater(speed, 12) + else: + self.assertGreater(speed, 75) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/srt/test_full_deepseek_v3.py b/test/srt/test_deepseek_v3_mtp.py similarity index 65% rename from test/srt/test_full_deepseek_v3.py rename to test/srt/test_deepseek_v3_mtp.py index f6a58536a..4dde12a50 100644 --- a/test/srt/test_full_deepseek_v3.py +++ b/test/srt/test_deepseek_v3_mtp.py @@ -19,60 +19,6 @@ from sglang.test.test_utils import ( FULL_DEEPSEEK_V3_MODEL_PATH = "deepseek-ai/DeepSeek-V3-0324" -class TestDeepseekV3(CustomTestCase): - @classmethod - def setUpClass(cls): - cls.model = FULL_DEEPSEEK_V3_MODEL_PATH - cls.base_url = DEFAULT_URL_FOR_TEST - other_args = ["--trust-remote-code", "--tp", "8"] - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=other_args, - ) - - @classmethod - def tearDownClass(cls): - kill_process_tree(cls.process.pid) - - def test_a_gsm8k( - self, - ): # Append an "a" to make this test run first (alphabetically) to warm up the server - args = SimpleNamespace( - num_shots=8, - data_path=None, - num_questions=1400, - parallel=1400, - max_new_tokens=512, - host="http://127.0.0.1", - port=int(self.base_url.split(":")[-1]), - ) - metrics = run_eval_few_shot_gsm8k(args) - print(f"{metrics=}") - - if is_in_ci(): - write_github_step_summary( - f"### test_gsm8k (deepseek-v3)\n" f'{metrics["accuracy"]=:.3f}\n' - ) - self.assertGreater(metrics["accuracy"], 0.935) - - def test_bs_1_speed(self): - args = BenchArgs(port=int(self.base_url.split(":")[-1]), max_new_tokens=2048) - acc_length, speed = send_one_prompt(args) - - print(f"{speed=:.2f}") - - if is_in_ci(): - write_github_step_summary( - f"### test_bs_1_speed (deepseek-v3)\n" f"{speed=:.2f} token/s\n" - ) - if is_in_amd_ci(): - self.assertGreater(speed, 12) - else: - self.assertGreater(speed, 75) - - class TestDeepseekV3MTP(CustomTestCase): @classmethod def setUpClass(cls):