Add Tensor Parallel to torch_native_llama (#1876)

2024-11-15 21:26:00 -08:00
parent e5c6715003
commit cf2489762b
5 changed files with 246 additions and 82 deletions
--- a/test/srt/test_torch_tp.py
+++ b/test/srt/test_torch_tp.py
@@ -0,0 +1,24 @@
+import unittest
+
+from sglang.test.test_utils import is_in_ci, run_bench_latency
+
+
+class TestTorchTP(unittest.TestCase):
+    def test_torch_native_llama(self):
+        output_throughput = run_bench_latency(
+            "meta-llama/Meta-Llama-3-8B",
+            [
+                "--tp",
+                "2",
+                "--json-model-override-args",
+                '{"architectures": ["TorchNativeLlamaForCausalLM"]}',
+                "--disable-cuda-graph",
+            ],
+        )
+
+        if is_in_ci():
+            assert output_throughput > 0, f"{output_throughput=}"
+
+
+if __name__ == "__main__":
+    unittest.main()