Optimize prefill performance on cpu backend (#8750)

This commit is contained in:
Ma Mingfei
2025-08-29 08:21:55 +08:00
committed by GitHub
parent 9f81d741a2
commit 5ad296bda1
9 changed files with 680 additions and 273 deletions

View File

@@ -100,8 +100,7 @@ void segment_gemm_kernel_impl(
const int64_t NB1 = div_up(N1, BLOCK_N);
const int64_t NB = NB0 + NB1;
// TODO: brgemm u8s8 depends on PyTorch 2.7 release.
const bool use_brgemm = false;
const bool use_brgemm = can_use_brgemm<int8_t>(M);
// K + 4 after compensation
const int64_t packed_row_size = get_row_size<int8_t>(K);