Add fp8 gemm kernel for CPU in sgl-kernel and add gemm UT (#6216)

Co-authored-by: YanbingJiang <yanbing.jiang@intel.com> Co-authored-by: mingfeima <mingfei.ma@intel.com>
2025-05-16 00:10:40 +08:00
parent 9a405274e2
commit fb4959b2c5
9 changed files with 921 additions and 2 deletions
--- a/sgl-kernel/setup_cpu.py
+++ b/sgl-kernel/setup_cpu.py
@@ -47,6 +47,8 @@ def _get_version():
                return line.split("=")[1].strip().strip('"')


+cpu_fp8_ftz = os.getenv("SGLANG_CPU_FP8_CVT_FTZ", "1") == "1"
+
 operator_namespace = "sgl_kernel"
 include_dirs = []

@@ -56,6 +58,7 @@ sources = [
    "csrc/cpu/decode.cpp",
    "csrc/cpu/extend.cpp",
    "csrc/cpu/gemm.cpp",
+    "csrc/cpu/gemm_fp8.cpp",
    "csrc/cpu/gemm_int8.cpp",
    "csrc/cpu/moe.cpp",
    "csrc/cpu/moe_int8.cpp",
@@ -76,6 +79,9 @@ extra_compile_args = {
        "-fopenmp",
    ]
 }
+if cpu_fp8_ftz:
+    extra_compile_args["cxx"].append("-DSGLANG_CPU_FP8_CVT_FTZ")
+
 libraries = ["c10", "torch", "torch_python"]
 cmdclass = {
    "build_ext": BuildExtension.with_options(use_ninja=True),