metal : FA support F32 K and V and head size = 32 (#16531)

* metal : FA support F32 K and V and head size = 32

* graph : remove obsolete comment [no ci]
This commit is contained in:
Georgi Gerganov
2025-10-13 23:07:57 +03:00
committed by GitHub
parent e38b7c6e9e
commit e60f241eac
4 changed files with 106 additions and 52 deletions

View File

@@ -693,7 +693,8 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
return true;
case GGML_OP_FLASH_ATTN_EXT:
// for new head sizes, add checks here
if (op->src[0]->ne[0] != 40 &&
if (op->src[0]->ne[0] != 32 &&
op->src[0]->ne[0] != 40 &&
op->src[0]->ne[0] != 64 &&
op->src[0]->ne[0] != 80 &&
op->src[0]->ne[0] != 96 &&