enable auto-round quantization model (#6226)

Signed-off-by: Zhang, Weiwei1 <weiwei1.zhang@intel.com>
2025-09-08 13:05:35 +08:00
parent b67c277f86
commit c8295d2353
8 changed files with 528 additions and 0 deletions
--- a/test/srt/quant/test_autoround.py
+++ b/test/srt/quant/test_autoround.py
@@ -0,0 +1,62 @@
+"""
+Usage:
+python3 -m unittest test_autoround.TestAutoRound.test_mmlu
+"""
+
+import unittest
+from types import SimpleNamespace
+
+from sglang.srt.utils import kill_process_tree
+from sglang.test.run_eval import run_eval
+from sglang.test.test_utils import (
+    DEFAULT_AUTOROUND_MODEL_NAME_FOR_TEST,
+    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
+    popen_launch_server,
+)
+
+
+class TestAutoRound(CustomTestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.base_url = DEFAULT_URL_FOR_TEST
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_mmlu(self):
+        device = "auto"
+        for model in DEFAULT_AUTOROUND_MODEL_NAME_FOR_TEST:
+            with self.subTest(model=model):
+                print(f"\n[INFO] Launching server for model: {model}")
+                process = popen_launch_server(
+                    model,
+                    self.base_url,
+                    timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+                    other_args=["--trust-remote-code", "--quantization", "auto-round"],
+                    device=device,
+                )
+
+                try:
+                    args = SimpleNamespace(
+                        base_url=self.base_url,
+                        model=model,
+                        eval_name="mmlu",
+                        num_examples=64,
+                        num_threads=32,
+                        device=device,
+                    )
+                    metrics = run_eval(args)
+                    if "Llama" in model:
+                        self.assertGreaterEqual(metrics["score"], 0.6)
+                    else:
+                        self.assertGreaterEqual(metrics["score"], 0.26)
+                finally:
+                    kill_process_tree(process.pid)
+                    print(f"[INFO] Server for {model} stopped.")
+
+
+if __name__ == "__main__":
+    unittest.main()