kv-cache : fix SWA checks + disable cacheless iSWA (#15811)

ggml-ci
2025-09-05 10:39:22 +03:00
parent 5d6688de08
commit c610b6c11b
9 changed files with 29 additions and 11 deletions
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -11084,7 +11084,8 @@ struct llm_build_gemma_embedding_iswa : public llm_graph_context {
        // inp_pos - contains the positions
        ggml_tensor * inp_pos = build_inp_pos();

-        auto * inp_attn = build_attn_inp_no_cache();
+        // TODO: support cacheless iSWA embeddings [TAG_NO_CACHE_ISWA]
+        auto * inp_attn = build_attn_inp_kv_iswa();

        ggml_tensor * inp_out_ids = build_inp_out_ids();

@@ -18632,7 +18633,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
        case LLM_ARCH_NOMIC_BERT_MOE:
        case LLM_ARCH_NEO_BERT:
        case LLM_ARCH_WAVTOKENIZER_DEC:
-        case LLM_ARCH_GEMMA_EMBEDDING:
+        //case LLM_ARCH_GEMMA_EMBEDDING: // TODO: disabled until the cacheless SWA logic is fixed [TAG_NO_CACHE_ISWA]
        case LLM_ARCH_DREAM:
        case LLM_ARCH_LLADA:
            {
@@ -18681,6 +18682,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
                        /* attn_kv_size      */ cparams.n_ctx,
                        /* attn_n_pad        */ padding,
                        /* attn_n_swa        */ hparams.n_swa,
+                        /* attn_swa_type     */ hparams.swa_type,
                        /* recurrent_type_k  */ GGML_TYPE_F32,
                        /* recurrent_type_v  */ GGML_TYPE_F32,
                        /* recurrent_kv_size */ std::max((uint32_t) 1, cparams.n_seq_max),
@@ -18750,6 +18752,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
                                cparams.n_seq_max,
                                padding,
                                hparams.n_swa,
+                                hparams.swa_type,
                                nullptr,
                                nullptr);
                    }