ggml : add asserts for type conversion in fattn kernels (#9971)

ggml-ci
2024-10-21 16:20:46 +03:00
parent d5ebd79c76
commit f594bc80ba
3 changed files with 8 additions and 4 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -19243,7 +19243,7 @@ struct llama_context * llama_new_context_with_model(
        params.flash_attn = false;
    }

-    if (params.type_v != GGML_TYPE_F16 && !params.flash_attn) {
+    if (ggml_is_quantized(params.type_v) && !params.flash_attn) {
        LLAMA_LOG_ERROR("%s: V cache quantization requires flash_attn\n", __func__);
        return nullptr;
    }