feat: support flashinfer mla attention for deepseek v3 (#3550)

This commit is contained in:
Yineng Zhang
2025-02-14 08:50:14 +08:00
committed by GitHub
parent 368de3661e
commit 70f894b810
12 changed files with 299 additions and 135 deletions

View File

@@ -65,6 +65,7 @@ global_server_args_dict = {
"enable_dp_attention": ServerArgs.enable_dp_attention,
"enable_ep_moe": ServerArgs.enable_ep_moe,
"device": ServerArgs.device,
"enable_flashinfer_mla": ServerArgs.enable_flashinfer_mla,
}
logger = logging.getLogger(__name__)