Support redundant experts in expert parallel (#6461)

2025-05-21 17:05:53 +08:00
parent a071dc4084
commit ccfe5c009d
5 changed files with 18 additions and 5 deletions
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -206,6 +206,7 @@ class ModelRunner:
                "speculative_accept_threshold_acc": server_args.speculative_accept_threshold_acc,
                "use_mla_backend": self.use_mla_backend,
                "mm_attention_backend": server_args.mm_attention_backend,
+                "ep_num_redundant_experts": server_args.ep_num_redundant_experts,
            }
        )