Support MHA with chunked prefix cache for flashinfer/flashmla backend, support page size > 1 for MHA chunked prefix (#8616)

Co-authored-by: xuyongfei.xyf <xuyongfei.xyf@antgroup.com>
This commit is contained in:
Yongfei Xu
2025-08-22 09:19:44 +08:00
committed by GitHub
parent 704ced1b2e
commit 9708d353b7
6 changed files with 184 additions and 75 deletions

View File

@@ -106,6 +106,7 @@ GLOBAL_SERVER_ARGS_KEYS = [
"enable_symm_mem",
"quantization",
"enable_custom_logit_processor",
"disaggregation_mode",
]
# Put some global args for easy access