Add fp8 shared_expert kernel for CPU in sgl-kernel and add UT (#6339)

Co-authored-by: Jiang, Yanbing <yanbing.jiang@intel.com> Co-authored-by: mingfeima <mingfei.ma@intel.com>
2025-05-19 03:42:15 +08:00
parent f11481b921
commit 5dd62c3a6f
8 changed files with 603 additions and 32 deletions
--- a/sgl-kernel/csrc/cpu/torch_extension_cpu.cpp
+++ b/sgl-kernel/csrc/cpu/torch_extension_cpu.cpp
@@ -139,8 +139,10 @@ at::Tensor shared_expert_cpu(
    double routed_scaling_factor,
    bool inplace,
    bool use_int8_w8a8,
+    bool use_fp8_w8a16,
    std::optional<at::Tensor>& w1_scale,
    std::optional<at::Tensor>& w2_scale,
+    std::optional<std::vector<int64_t>> block_size,
    std::optional<at::Tensor>& a1_scale,
    std::optional<at::Tensor>& a2_scale,
    bool is_vnni);