[Feature] Support DeepEP Low Latency (#4767)

Co-authored-by: sleepcoo <sleepcoo@gmail.com>
Co-authored-by: laixinn <xielx@shanghaitech.edu.cn>
Co-authored-by: ch-wan <cwan39@gatech.edu>
This commit is contained in:
Jinyan Chen
2025-04-02 00:23:25 +08:00
committed by GitHub
parent 87fafa0105
commit 23c764b18a
8 changed files with 448 additions and 238 deletions

View File

@@ -72,6 +72,7 @@ global_server_args_dict = {
"enable_dp_attention": ServerArgs.enable_dp_attention,
"enable_ep_moe": ServerArgs.enable_ep_moe,
"enable_deepep_moe": ServerArgs.enable_deepep_moe,
"deepep_mode": ServerArgs.deepep_mode,
"device": ServerArgs.device,
"speculative_accept_threshold_single": ServerArgs.speculative_accept_threshold_single,
"speculative_accept_threshold_acc": ServerArgs.speculative_accept_threshold_acc,