vulkan: support softmax/FA batch and broadcast (#14449)
This commit is contained in:
committed by
Georgi Gerganov
parent
ec68e84c32
commit
8875523eb3
@@ -6,6 +6,14 @@ layout (push_constant) uniform parameter
|
||||
{
|
||||
uint KX;
|
||||
uint KY;
|
||||
uint ne00;
|
||||
uint ne01;
|
||||
uint ne02;
|
||||
uint ne12;
|
||||
uint ne13;
|
||||
uint nb11;
|
||||
uint nb12;
|
||||
uint nb13;
|
||||
float scale;
|
||||
float max_bias;
|
||||
float m0;
|
||||
@@ -31,7 +39,15 @@ shared FLOAT_TYPE vals[BLOCK_SIZE];
|
||||
void soft_max(uint num_iters) {
|
||||
const uint tid = gl_LocalInvocationID.x;
|
||||
const uint rowx = gl_WorkGroupID.z * 262144 + gl_WorkGroupID.y * 512 + gl_WorkGroupID.x;
|
||||
const uint rowy = (p.KY > 0) ? (rowx % p.KY) : 0;
|
||||
|
||||
const uint32_t i03 = rowx / (p.ne01 * p.ne02);
|
||||
const uint32_t i02 = (rowx - i03 * p.ne01 * p.ne02) / p.ne01;
|
||||
const uint32_t i01 = rowx % p.ne01;
|
||||
|
||||
uint rowy_start = 0;
|
||||
if (p.KY > 0) {
|
||||
rowy_start = i01 * p.nb11 + (i02 % p.ne12) * p.nb12 + (i03 % p.ne13) * p.nb13;
|
||||
}
|
||||
|
||||
if (rowx >= p.nrows_x) {
|
||||
return;
|
||||
@@ -41,7 +57,7 @@ void soft_max(uint num_iters) {
|
||||
|
||||
// ALiBi
|
||||
if (p.max_bias > 0.0f) {
|
||||
const uint h = rowx/p.KY; // head index
|
||||
const uint h = (rowx / p.ne01) % p.ne02; // head index
|
||||
|
||||
const float base = h < p.n_head_log2 ? p.m0 : p.m1;
|
||||
const uint exp = h < p.n_head_log2 ? h + 1 : 2*(h - p.n_head_log2) + 1;
|
||||
@@ -67,7 +83,7 @@ void soft_max(uint num_iters) {
|
||||
|
||||
FLOAT_TYPE b = FLOAT_TYPE(0);
|
||||
if (p.KY > 0 && col < p.KX) {
|
||||
b = data_b[rowy * p.KX + col];
|
||||
b = data_b[rowy_start + col];
|
||||
}
|
||||
|
||||
FLOAT_TYPE v = a * p.scale + slope * b;
|
||||
@@ -111,7 +127,7 @@ void soft_max(uint num_iters) {
|
||||
if (idx < DATA_CACHE_SIZE) {
|
||||
val = exp(data_cache[idx] - max_val);
|
||||
} else {
|
||||
val = exp(FLOAT_TYPE(data_a[i]) * p.scale + (p.KY > 0 ? slope * FLOAT_TYPE(data_b[rowy * p.KX + col]) : FLOAT_TYPE(0.0f)) - max_val);
|
||||
val = exp(FLOAT_TYPE(data_a[i]) * p.scale + (p.KY > 0 ? slope * FLOAT_TYPE(data_b[rowy_start + col]) : FLOAT_TYPE(0.0f)) - max_val);
|
||||
}
|
||||
sum += val;
|
||||
if (idx < DATA_CACHE_SIZE) {
|
||||
|
||||
Reference in New Issue
Block a user