kv-cache : support layer reuse (#15504)

* kv-cache : support layer reuse ggml-ci * cont : update comments [no ci]
2025-08-24 13:07:07 +03:00
parent c9a24fb932
commit b730706a49
12 changed files with 203 additions and 136 deletions
--- a/src/llama-memory-recurrent.h
+++ b/src/llama-memory-recurrent.h
@@ -15,18 +15,14 @@
 //       see the implementation of llama_kv_cache_context_i for an example how to do it
 class llama_memory_recurrent : public llama_memory_i {
 public:
-
-    // this callback is used to filter out layers that should not be included in the cache
-    using layer_filter_cb = std::function<bool(int32_t il)>;
-
    llama_memory_recurrent(
-            const llama_model &  model,
-              layer_filter_cb && filter,
-                    ggml_type    type_r,
-                    ggml_type    type_s,
-                         bool    offload,
-                     uint32_t    mem_size,
-                     uint32_t    n_seq_max);
+            const llama_model & model,
+                    ggml_type   type_r,
+                    ggml_type   type_s,
+                         bool   offload,
+                     uint32_t   mem_size,
+                     uint32_t   n_seq_max,
+        const layer_filter_cb & filter);

    ~llama_memory_recurrent() = default;