Files
enginex-ascend-910-llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp
Acly e29acf74fe vulkan : incremental shader builds (#16341)
* vulkan (DRAFT): split shader generation by GLSL source file, to improve incremental build times

* support dep-files so shaders are recompiled if their included files change

* rename shader files which are used as "headers" to use .glsl extension
* move glslc extension detection shaders to separate folders
* the above is to prevent them from getting glob'd with the actual compute shaders that need to be compiled

* vulkan : only write embedded shader .hpp/.cpp when they change

* avoid recompiling ggml-vulkan.cpp when editing shaders
* pass single --source argument instead of --input-dir & --filter to shader gen
* check for source file match earlier

* fix hang in vulkan-shaders-gen when there are compilation errors

* early out did not decrement compile_count

* clean up

* fix glslc integer dot product test

* unconditionally write the embedded shader cpp output

* replace output filepath in generated dep-files to match output in CMakeLists

---------

Co-authored-by: Jeff Bolz <jbolz@nvidia.com>
2025-10-04 11:42:56 +02:00

75 lines
1.7 KiB
Plaintext

#version 450
#include "types.glsl"
#extension GL_EXT_shader_16bit_storage : require
layout(push_constant) uniform parameter {
uint IW; uint IH;
uint OW; uint OH;
uint OC;
uint pelements;
uint op;
int k0; int k1;
int s0; int s1;
int p0; int p1;
} p;
#define BLOCK_SIZE 512
#define FLT_MAX 3.402823466e+38F
#define OP_POOL_MAX 0u
#define OP_POOL_AVG 1u
layout (local_size_x = BLOCK_SIZE, local_size_y = 1, local_size_z = 1) in;
layout(binding = 0) readonly buffer X {A_TYPE data_a[];};
layout(binding = 1) writeonly buffer D {D_TYPE data_d[];};
void main() {
const uint idx = gl_GlobalInvocationID.x;
if (idx >= p.pelements) {
return;
}
const uint O_HW = p.OW * p.OH;
const uint nc = idx / O_HW;
const uint cur_oh = (idx % O_HW) / p.OW;
const uint cur_ow = (idx % O_HW) % p.OW;
const int start_h = int(cur_oh) * p.s0 - p.p0;
const uint bh = max(start_h, 0);
const uint eh = min(start_h + p.k0, p.IH);
const int start_w = int(cur_ow) * p.s1 - p.p1;
const uint bw = max(start_w, 0);
const uint ew = min(start_w + p.k1, p.IW);
const float scale = 1.0 / float(p.k0 * p.k1);
float res;
if (p.op == OP_POOL_AVG) {
res = 0.0;
} else if (p.op == OP_POOL_MAX) {
res = -FLT_MAX;
} else {
return;
}
#pragma unroll
for (uint i = bh; i < eh; i++) {
#pragma unroll
for (uint j = bw; j < ew; j++) {
const float cur = D_TYPE(data_a[nc * p.IH * p.IW + i * p.IW + j]);
if (p.op == OP_POOL_AVG) {
res += cur * scale;
} else if (p.op == OP_POOL_MAX) {
res = max(res, cur);
}
}
}
data_d[nc * O_HW + cur_oh * p.OW + cur_ow] = res;
}