metal : fuse NORM + MUL + ADD, support non-multiples of 4 (#16220)

* metal : fuse NORM + MUL + ADD

* metal : support norms of non-multiple of 4

* cont : fix comment [no ci]
This commit is contained in:
Georgi Gerganov
2025-09-25 11:30:16 +03:00
committed by GitHub
parent 4ea00794b8
commit dfcd53f7ec
9 changed files with 206 additions and 232 deletions

View File

@@ -60,7 +60,6 @@ int ggml_metal_op_mul_mat_id (ggml_metal_op_t ctx, int idx);
int ggml_metal_op_add_id (ggml_metal_op_t ctx, int idx);
int ggml_metal_op_flash_attn_ext (ggml_metal_op_t ctx, int idx);
int ggml_metal_op_bin (ggml_metal_op_t ctx, int idx);
int ggml_metal_op_rms_norm (ggml_metal_op_t ctx, int idx);
int ggml_metal_op_l2_norm (ggml_metal_op_t ctx, int idx);
int ggml_metal_op_group_norm (ggml_metal_op_t ctx, int idx);
int ggml_metal_op_norm (ggml_metal_op_t ctx, int idx);