From fe6cdf8972368d72fe79878e38ba310ef2140163 Mon Sep 17 00:00:00 2001 From: Yi Zhang <1109276519@qq.com> Date: Fri, 12 Sep 2025 18:06:48 +0800 Subject: [PATCH] add qwen3-next ut (#10355) --- test/srt/models/test_qwen3_next_models.py | 94 +++++++++++++++++++++++ test/srt/run_suite.py | 1 + 2 files changed, 95 insertions(+) create mode 100644 test/srt/models/test_qwen3_next_models.py diff --git a/test/srt/models/test_qwen3_next_models.py b/test/srt/models/test_qwen3_next_models.py new file mode 100644 index 000000000..808da9a71 --- /dev/null +++ b/test/srt/models/test_qwen3_next_models.py @@ -0,0 +1,94 @@ +import unittest +from types import SimpleNamespace + +from sglang.srt.utils import kill_process_tree +from sglang.test.few_shot_gsm8k import run_eval +from sglang.test.test_utils import ( + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, + CustomTestCase, + popen_launch_server, +) + + +class TestQwen3Next(CustomTestCase): + @classmethod + def setUpClass(cls): + cls.model = "Qwen/Qwen3-Next-80B-A3B-Instruct" + cls.base_url = DEFAULT_URL_FOR_TEST + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=[ + "--tp-size", + "4", + ], + ) + + @classmethod + def tearDownClass(cls): + kill_process_tree(cls.process.pid) + + def test_gsm8k(self): + args = SimpleNamespace( + num_shots=5, + data_path=None, + num_questions=200, + max_new_tokens=512, + parallel=128, + host="http://127.0.0.1", + port=int(self.base_url.split(":")[-1]), + ) + metrics = run_eval(args) + print(f"{metrics=}") + self.assertGreater(metrics["accuracy"], 0.93) + + +class TestQwen3NextMTP(CustomTestCase): + @classmethod + def setUpClass(cls): + cls.model = "Qwen/Qwen3-Next-80B-A3B-Instruct" + cls.base_url = DEFAULT_URL_FOR_TEST + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=[ + "--trust-remote-code", + "--speculative-algorithm", + "NEXTN", + "--speculative-num-steps", + "1", + "--speculative-eagle-topk", + "1", + "--speculative-num-draft-tokens", + "2", + "--mem-fraction-static", + "0.8", + "--tp", + "4", + ], + ) + + @classmethod + def tearDownClass(cls): + kill_process_tree(cls.process.pid) + + def test_gsm8k(self): + args = SimpleNamespace( + num_shots=5, + data_path=None, + num_questions=200, + max_new_tokens=512, + parallel=128, + host="http://127.0.0.1", + port=int(self.base_url.split(":")[-1]), + ) + metrics = run_eval(args) + print(f"{metrics=}") + self.assertGreater(metrics["accuracy"], 0.93) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index bb881bf8c..bfb30b891 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -133,6 +133,7 @@ suites = { TestFile("test_gpt_oss_4gpu.py", 600), TestFile("test_local_attn.py", 250), TestFile("test_pp_single_node.py", 372), + TestFile("models/test_qwen3_next_models.py", 200), TestFile("test_multi_instance_release_memory_occupation.py", 64), ], "per-commit-8-gpu": [