Support redundant experts in expert parallel (#6461)
This commit is contained in:
@@ -206,6 +206,7 @@ class ModelRunner:
|
||||
"speculative_accept_threshold_acc": server_args.speculative_accept_threshold_acc,
|
||||
"use_mla_backend": self.use_mla_backend,
|
||||
"mm_attention_backend": server_args.mm_attention_backend,
|
||||
"ep_num_redundant_experts": server_args.ep_num_redundant_experts,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user