vulkan: clamp matmul and FA results to the max finite value (#15652)

* vulkan: clamp matmul and FA results to the max finite value

* only clamp for fp16
This commit is contained in:
Jeff Bolz
2025-08-31 01:27:57 -05:00
committed by GitHub
parent 4d74393bcc
commit 94e82c7ead
7 changed files with 58 additions and 8 deletions

View File

@@ -283,6 +283,10 @@ void main() {
O = Ldiag*O;
#if defined(ACC_TYPE_MAX)
[[unroll]] for (uint i = 0; i < O.length(); ++i) { O[i] = clamp(O[i], -ACC_TYPE_MAX, ACC_TYPE_MAX); }
#endif
uint32_t o_offset = iq3*p.ne2*p.ne1*HSV;
coopmat<D_TYPE, gl_ScopeWorkgroup, Br, HSV_pad, gl_MatrixUseAccumulator> O_D = coopmat<D_TYPE, gl_ScopeWorkgroup, Br, HSV_pad, gl_MatrixUseAccumulator>(O);