kv-cache : fix SWA checks + disable cacheless iSWA (#15811)

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-09-05 10:39:22 +03:00
committed by GitHub
parent 5d6688de08
commit c610b6c11b
9 changed files with 29 additions and 11 deletions

View File

@@ -17,6 +17,7 @@ llama_memory_hybrid::llama_memory_hybrid(
uint32_t kv_size,
uint32_t n_pad,
uint32_t n_swa,
llama_swa_type swa_type,
/* recurrent */
ggml_type type_r,
ggml_type type_s,
@@ -40,6 +41,7 @@ llama_memory_hybrid::llama_memory_hybrid(
n_seq_max,
n_pad,
n_swa,
swa_type,
filter_attn == nullptr ?
[&](int32_t il) { return !hparams.is_recurrent(il); }
: filter_attn,