Support w8a8 int8 quantization config (#2881)

2025-01-14 17:07:49 +08:00
parent b8cd09f27a
commit cc0485bef2
4 changed files with 135 additions and 6 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -378,6 +378,7 @@ class ServerArgs:
                "bitsandbytes",
                "gguf",
                "modelopt",
+                "w8a8_int8",
            ],
            help="The quantization method.",
        )