kv-cache : support layer reuse (#15504)
* kv-cache : support layer reuse ggml-ci * cont : update comments [no ci]
This commit is contained in:
@@ -15,18 +15,14 @@
|
||||
// see the implementation of llama_kv_cache_context_i for an example how to do it
|
||||
class llama_memory_recurrent : public llama_memory_i {
|
||||
public:
|
||||
|
||||
// this callback is used to filter out layers that should not be included in the cache
|
||||
using layer_filter_cb = std::function<bool(int32_t il)>;
|
||||
|
||||
llama_memory_recurrent(
|
||||
const llama_model & model,
|
||||
layer_filter_cb && filter,
|
||||
ggml_type type_r,
|
||||
ggml_type type_s,
|
||||
bool offload,
|
||||
uint32_t mem_size,
|
||||
uint32_t n_seq_max);
|
||||
const llama_model & model,
|
||||
ggml_type type_r,
|
||||
ggml_type type_s,
|
||||
bool offload,
|
||||
uint32_t mem_size,
|
||||
uint32_t n_seq_max,
|
||||
const layer_filter_cb & filter);
|
||||
|
||||
~llama_memory_recurrent() = default;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user