Support MHA with chunked prefix cache for flashinfer/flashmla backend, support page size > 1 for MHA chunked prefix (#8616)
Co-authored-by: xuyongfei.xyf <xuyongfei.xyf@antgroup.com>
This commit is contained in:
@@ -106,6 +106,7 @@ GLOBAL_SERVER_ARGS_KEYS = [
|
||||
"enable_symm_mem",
|
||||
"quantization",
|
||||
"enable_custom_logit_processor",
|
||||
"disaggregation_mode",
|
||||
]
|
||||
|
||||
# Put some global args for easy access
|
||||
|
||||
Reference in New Issue
Block a user