[CPU] Optimize FP16 decode_attention_cpu (#10652)

This commit is contained in:
blzheng
2025-10-23 12:39:51 +08:00
committed by GitHub
parent 81fd2b0ee0
commit 13fb8b5489
4 changed files with 181 additions and 9 deletions

View File

@@ -47,7 +47,7 @@ convert_from_float_ext<at::BFloat16>(const Vectorized<float>& a, const Vectorize
#define CVT_BF16_TO_FP32(a) _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_cvtepu16_epi32(a), 16))
#define CVT_FP16_TO_FP32(a) _mm512_cvtps_ph(a, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC))
#define CVT_FP16_TO_FP32(a) _mm512_cvtph_ps(a)
// this doesn't handle NaN.
inline __m512bh cvt_e4m3_bf16_intrinsic_no_nan(__m256i fp8_vec) {