Support MHA with chunked prefix cache for DeepSeek chunked prefill (#5113)

This commit is contained in:
Baizhou Zhang
2025-04-15 22:01:22 -07:00
committed by GitHub
parent dd83e7e9c3
commit a42736bbb8
10 changed files with 734 additions and 46 deletions

View File

@@ -83,6 +83,7 @@ global_server_args_dict = {
"chunked_prefill_size": ServerArgs.chunked_prefill_size,
"n_share_experts_fusion": ServerArgs.n_share_experts_fusion,
"disable_shared_experts_fusion": ServerArgs.disable_shared_experts_fusion,
"disable_chunked_prefix_cache": ServerArgs.disable_chunked_prefix_cache,
}
logger = logging.getLogger(__name__)