kv-cache : drop the "unified" prefix (#15467)

* kv-cache : drop the "unified" prefix ggml-ci * cont : fix comment [no ci]
2025-08-21 17:00:33 +03:00
parent ad294df03f
commit 715a6db02c
15 changed files with 346 additions and 360 deletions
--- a/src/llama-memory-hybrid.cpp
+++ b/src/llama-memory-hybrid.cpp
@@ -30,7 +30,7 @@ llama_memory_hybrid::llama_memory_hybrid(
      layer_filter_cb && filter_attn,
      layer_filter_cb && filter_recr) :
    hparams(model.hparams),
-    mem_attn(new llama_kv_cache_unified(
+    mem_attn(new llama_kv_cache(
        model,
        filter_attn == nullptr ?
            [&](int32_t il) { return !hparams.is_recurrent(il); }
@@ -179,7 +179,7 @@ void llama_memory_hybrid::state_read(llama_io_read_i & io, llama_seq_id seq_id,
    mem_recr->state_read(io, seq_id);
 }

-llama_kv_cache_unified * llama_memory_hybrid::get_mem_attn() const {
+llama_kv_cache * llama_memory_hybrid::get_mem_attn() const {
    return mem_attn.get();
 }

@@ -210,7 +210,7 @@ llama_memory_hybrid_context::llama_memory_hybrid_context(
        std::vector<llama_ubatch>   ubatches) :
    ubatches(std::move(ubatches)),
    // note: here we copy the ubatches. not sure if this is ideal
-    ctx_attn(new llama_kv_cache_unified_context(mem->get_mem_attn(), std::move(sinfos_attn), this->ubatches)),
+    ctx_attn(new llama_kv_cache_context(mem->get_mem_attn(), std::move(sinfos_attn), this->ubatches)),
    ctx_recr(new llama_memory_recurrent_context(mem->get_mem_recr(),                        this->ubatches)),
    status(llama_memory_status_combine(ctx_attn->get_status(), ctx_recr->get_status())) {
 }
@@ -248,8 +248,8 @@ const llama_ubatch & llama_memory_hybrid_context::get_ubatch() const {
    return ubatches[i_next];
 }

-const llama_kv_cache_unified_context * llama_memory_hybrid_context::get_attn() const {
-    return static_cast<const llama_kv_cache_unified_context *>(ctx_attn.get());
+const llama_kv_cache_context * llama_memory_hybrid_context::get_attn() const {
+    return static_cast<const llama_kv_cache_context *>(ctx_attn.get());
 }

 const llama_memory_recurrent_context * llama_memory_hybrid_context::get_recr() const {