Fix torch compile for deepseek-v2 (#1442)

2024-09-17 15:52:08 +08:00
parent 3a6e04185b
commit 76524b70d1
3 changed files with 20 additions and 1 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -110,6 +110,7 @@ class ServerArgs:
    disable_custom_all_reduce: bool = False
    enable_mixed_chunk: bool = False
    enable_torch_compile: bool = False
+    max_torch_compile_bs: int = 32
    torchao_config: str = ""
    enable_p2p_check: bool = False
    enable_mla: bool = False
@@ -523,6 +524,12 @@ class ServerArgs:
            action="store_true",
            help="Optimize the model with torch.compile. Experimental feature.",
        )
+        parser.add_argument(
+            "--max-torch-compile-bs",
+            type=int,
+            default=ServerArgs.max_torch_compile_bs,
+            help="Set the maximum batch size when using torch compile.",
+        )
        parser.add_argument(
            "--torchao-config",
            type=str,