Add fp8 gemm kernel for CPU in sgl-kernel and add gemm UT (#6216)

Co-authored-by: YanbingJiang <yanbing.jiang@intel.com> Co-authored-by: mingfeima <mingfei.ma@intel.com>
2025-05-16 00:10:40 +08:00
parent 9a405274e2
commit fb4959b2c5
9 changed files with 921 additions and 2 deletions
--- a/sgl-kernel/csrc/cpu/gemm.cpp
+++ b/sgl-kernel/csrc/cpu/gemm.cpp
@@ -424,7 +424,8 @@ at::Tensor convert_weight_packed(at::Tensor& weight) {
  const int64_t stride = OC * IC;

  TORCH_CHECK(
-      st == at::kBFloat16 || st == at::kHalf || st == at::kChar, "expect weight to be bfloat16, float16 or int8.");
+      st == at::kBFloat16 || st == at::kHalf || st == at::kChar || st == at::kFloat8_e4m3fn,
+      "expect weight to be bfloat16, float16, int8 or fp8_e4m3.");

  CPU_DISPATCH_PACKED_TYPES(st, [&] {
    // adjust most inner dimension size