pass a_scale from fp8 quant result instead of hard code to 1.0f (#10241)

Co-authored-by: Yichen Wang <yichen.wang@bytedance.com>
Co-authored-by: Jinwu Guo <641876696@qq.com>
This commit is contained in:
Rain Jiang
2025-09-10 12:56:05 -07:00
committed by GitHub
parent 91b3555d2d
commit 2286e85e77
3 changed files with 34 additions and 29 deletions

View File

@@ -209,7 +209,7 @@ void cutlass_w4a8_group_gemm_caller(
Args arguments;
decltype(arguments.epilogue.thread) fusion_args;
fusion_args.alpha = 1.0f;
fusion_args.alpha = 0;
fusion_args.beta = 0;
fusion_args.alpha_ptr = a_scales.data_ptr<float>();
;