support Llama4 with non uniformed intermediate size across layers for… (#10047)

2025-09-05 17:28:15 -07:00
parent 273b28344b
commit ab62b135c1
7 changed files with 123 additions and 13 deletions
--- a/test/srt/lora/test_lora_llama4.py
+++ b/test/srt/lora/test_lora_llama4.py
@@ -0,0 +1,61 @@
+import unittest
+from types import SimpleNamespace
+
+from sglang.srt.utils import kill_process_tree
+from sglang.test.test_utils import (
+    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
+    popen_launch_server,
+)
+
+MODELS = [
+    SimpleNamespace(
+        model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        tp_size=8,
+    ),
+]
+
+
+class TestLlama4LoRA(CustomTestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.base_url = DEFAULT_URL_FOR_TEST
+
+    def test_bringup(self):
+        for model in MODELS:
+            try:
+                process = popen_launch_server(
+                    model.model,
+                    self.base_url,
+                    timeout=3 * DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+                    other_args=[
+                        "--enable-lora",
+                        "--max-lora-rank",
+                        "64",
+                        "--lora-target-modules",
+                        "all",
+                        "--tp-size",
+                        str(model.tp_size),
+                        "--context-length",
+                        "1048576",
+                        "--attention-backend",
+                        "fa3",
+                    ],
+                )
+            except Exception as e:
+                print(f"Error testing {model.model}: {e}")
+                self.fail(f"Test failed for {model.model}: {e}")
+
+            finally:
+                # Ensure process cleanup happens regardless of success/failure
+                if process is not None and process.poll() is None:
+                    print(f"Cleaning up process {process.pid}")
+                    try:
+                        kill_process_tree(process.pid)
+                    except Exception as e:
+                        print(f"Error killing process: {e}")
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -136,6 +136,7 @@ suites = {
    "per-commit-8-gpu": [
        # Disabled because it hangs on the CI.
        # TestFile("ep/test_moe_ep.py", 181),
+        TestFile("lora/test_lora_llama4.py", 600),
        TestFile("test_disaggregation.py", 499),
        TestFile("test_disaggregation_different_tp.py", 155),
        TestFile("test_full_deepseek_v3.py", 333),