Add fp8 gemm kernel for CPU in sgl-kernel and add gemm UT (#6216)

Co-authored-by: YanbingJiang <yanbing.jiang@intel.com>
Co-authored-by: mingfeima <mingfei.ma@intel.com>
This commit is contained in:
Chunyuan WU
2025-05-16 00:10:40 +08:00
committed by GitHub
parent 9a405274e2
commit fb4959b2c5
9 changed files with 921 additions and 2 deletions

View File

@@ -33,6 +33,11 @@ inline bool can_use_brgemm<int8_t>(int M) {
return false;
}
template <>
inline bool can_use_brgemm<at::Float8_e4m3fn>(int M) {
return M > 4;
}
// work around compiler internal error
#define BLOCK_K 128 // 4 * TILE_K