vulkan: matmul dequantization improvements (#12015)
* faster dequant for old quants * dont use unpack for iq4_nl * vec2 unpack for q8
This commit is contained in:
@@ -92,7 +92,7 @@ float16_t dequantFuncQ8_0(const in decodeBufQ8_0 bl, const in uint blockCoords[2
|
||||
const uint iqs = idx;
|
||||
|
||||
// Load 16b and select the byte for this element
|
||||
int32_t qs = unpack8(int32_t(bl.block.qs[(iqs & 0x1E) >> 1]))[iqs & 1];
|
||||
int32_t qs = unpack8(bl.block.qs[(iqs & 0x1E) >> 1])[iqs & 1];
|
||||
float16_t ret = float16_t(qs) * d;
|
||||
return ret;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user