Add fp8 gemm kernel for CPU in sgl-kernel and add gemm UT (#6216)

Co-authored-by: YanbingJiang <yanbing.jiang@intel.com> Co-authored-by: mingfeima <mingfei.ma@intel.com>
2025-05-16 00:10:40 +08:00
parent 9a405274e2
commit fb4959b2c5
9 changed files with 921 additions and 2 deletions
--- a/sgl-kernel/csrc/cpu/common.h
+++ b/sgl-kernel/csrc/cpu/common.h
@@ -22,7 +22,7 @@ namespace {
    }                                            \
  }()

-// dispatch: bfloat16, float16, int8_t
+// dispatch: bfloat16, float16, int8_t, fp8_e4m3
 #define CPU_DISPATCH_PACKED_TYPES(TYPE, ...)                     \
  [&] {                                                          \
    switch (TYPE) {                                              \
@@ -38,6 +38,10 @@ namespace {
        using packed_t = int8_t;                                 \
        return __VA_ARGS__();                                    \
      }                                                          \
+      case at::ScalarType::Float8_e4m3fn: {                      \
+        using packed_t = at::Float8_e4m3fn;                      \
+        return __VA_ARGS__();                                    \
+      }                                                          \
      default:                                                   \
        TORCH_CHECK(false, "Unsupported floating data type.\n"); \
    }                                                            \