vulkan: mul_mat_id coopmat2 optimizations (#15546)
* vulkan: mul_mat_id coopmat2 optimizations Add a path for when the tile fits in BN/2, similar to what we have for mul_mat. Only call fetch_scales/store_scales once per QUANT_K block, and once at the beginning in case start_k is not aligned. * Also add a path for BN/4 - worth a couple more percent
This commit is contained in:
@@ -2225,7 +2225,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
||||
s_mmq_wg_denoms_k = { 32, 64, 1 };
|
||||
|
||||
// spec constants and tile sizes for quant matmul_id
|
||||
l_warptile_mmqid = { 256, 128, 128, 16, 0, device->subgroup_size };
|
||||
l_warptile_mmqid = { 256, 128, 128, 16, 1, device->subgroup_size };
|
||||
m_warptile_mmqid = { 256, 128, 64, 16, 0, device->subgroup_size };
|
||||
s_warptile_mmqid = { 256, 128, 64, 16, 0, device->subgroup_size };
|
||||
l_mmqid_wg_denoms = { 128, 128, 1 };
|
||||
|
||||
Reference in New Issue
Block a user