vulkan: matmul dequantization improvements (#12015)

* faster dequant for old quants * dont use unpack for iq4_nl * vec2 unpack for q8
2025-02-28 07:20:08 +00:00
parent 581650b7ca
commit fbeda9002d
5 changed files with 93 additions and 53 deletions
--- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp
+++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp
@@ -92,7 +92,7 @@ float16_t dequantFuncQ8_0(const in decodeBufQ8_0 bl, const in uint blockCoords[2
    const uint iqs = idx;

    // Load 16b and select the byte for this element
-    int32_t qs = unpack8(int32_t(bl.block.qs[(iqs & 0x1E) >> 1]))[iqs & 1];
+    int32_t qs = unpack8(bl.block.qs[(iqs & 0x1E) >> 1])[iqs & 1];
    float16_t ret = float16_t(qs) * d;
    return ret;
 }