Optimize prefill performance on cpu backend (#8750)
This commit is contained in:
@@ -27,10 +27,10 @@ template <>
|
||||
inline bool can_use_brgemm<at::Half>(int M) {
|
||||
return true;
|
||||
}
|
||||
// TODO: add u8s8 brgemm, this requires PyTorch 2.7
|
||||
// this requires PyTorch 2.7 or above
|
||||
template <>
|
||||
inline bool can_use_brgemm<int8_t>(int M) {
|
||||
return false;
|
||||
return M > 4;
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -198,4 +198,5 @@ void tinygemm_kernel(
|
||||
int64_t ldb,
|
||||
int64_t ldc,
|
||||
bool brg,
|
||||
int64_t block_size_K);
|
||||
int64_t block_size_K,
|
||||
bool do_unpack = true);
|
||||
|
||||
Reference in New Issue
Block a user