Apply sgl w8a8 fp8 kernel (#3148)

2025-03-09 16:03:32 +08:00
parent 9fb48f951f
commit 0dd6cda288
13 changed files with 523 additions and 37 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -405,6 +405,7 @@ class ServerArgs:
                "gguf",
                "modelopt",
                "w8a8_int8",
+                "w8a8_fp8",
            ],
            help="The quantization method.",
        )