pass a_scale from fp8 quant result instead of hard code to 1.0f (#10241)

Co-authored-by: Yichen Wang <yichen.wang@bytedance.com> Co-authored-by: Jinwu Guo <641876696@qq.com>
2025-09-10 12:56:05 -07:00
parent 91b3555d2d
commit 2286e85e77
3 changed files with 34 additions and 29 deletions
--- a/sgl-kernel/csrc/moe/cutlass_moe/w4a8/w4a8_grouped_mm_c3x.cuh
+++ b/sgl-kernel/csrc/moe/cutlass_moe/w4a8/w4a8_grouped_mm_c3x.cuh
@@ -209,7 +209,7 @@ void cutlass_w4a8_group_gemm_caller(

  Args arguments;
  decltype(arguments.epilogue.thread) fusion_args;
-  fusion_args.alpha = 1.0f;
+  fusion_args.alpha = 0;
  fusion_args.beta = 0;
  fusion_args.alpha_ptr = a_scales.data_ptr<float>();
  ;