Add fp8 shared_expert kernel for CPU in sgl-kernel and add UT (#6339)

Co-authored-by: Jiang, Yanbing <yanbing.jiang@intel.com>
Co-authored-by: mingfeima <mingfei.ma@intel.com>
This commit is contained in:
Chunyuan WU
2025-05-19 03:42:15 +08:00
committed by GitHub
parent f11481b921
commit 5dd62c3a6f
8 changed files with 603 additions and 32 deletions

View File

@@ -139,8 +139,10 @@ at::Tensor shared_expert_cpu(
double routed_scaling_factor,
bool inplace,
bool use_int8_w8a8,
bool use_fp8_w8a16,
std::optional<at::Tensor>& w1_scale,
std::optional<at::Tensor>& w2_scale,
std::optional<std::vector<int64_t>> block_size,
std::optional<at::Tensor>& a1_scale,
std::optional<at::Tensor>& a2_scale,
bool is_vnni);