Add fp8 gemm kernel for CPU in sgl-kernel and add gemm UT (#6216)

Co-authored-by: YanbingJiang <yanbing.jiang@intel.com>
Co-authored-by: mingfeima <mingfei.ma@intel.com>
This commit is contained in:
Chunyuan WU
2025-05-16 00:10:40 +08:00
committed by GitHub
parent 9a405274e2
commit fb4959b2c5
9 changed files with 921 additions and 2 deletions

View File

@@ -47,6 +47,8 @@ def _get_version():
return line.split("=")[1].strip().strip('"')
cpu_fp8_ftz = os.getenv("SGLANG_CPU_FP8_CVT_FTZ", "1") == "1"
operator_namespace = "sgl_kernel"
include_dirs = []
@@ -56,6 +58,7 @@ sources = [
"csrc/cpu/decode.cpp",
"csrc/cpu/extend.cpp",
"csrc/cpu/gemm.cpp",
"csrc/cpu/gemm_fp8.cpp",
"csrc/cpu/gemm_int8.cpp",
"csrc/cpu/moe.cpp",
"csrc/cpu/moe_int8.cpp",
@@ -76,6 +79,9 @@ extra_compile_args = {
"-fopenmp",
]
}
if cpu_fp8_ftz:
extra_compile_args["cxx"].append("-DSGLANG_CPU_FP8_CVT_FTZ")
libraries = ["c10", "torch", "torch_python"]
cmdclass = {
"build_ext": BuildExtension.with_options(use_ninja=True),