Support redundant experts in expert parallel (#6461)

This commit is contained in:
fzyzcjy
2025-05-21 17:05:53 +08:00
committed by GitHub
parent a071dc4084
commit ccfe5c009d
5 changed files with 18 additions and 5 deletions

View File

@@ -206,6 +206,7 @@ class ModelRunner:
"speculative_accept_threshold_acc": server_args.speculative_accept_threshold_acc,
"use_mla_backend": self.use_mla_backend,
"mm_attention_backend": server_args.mm_attention_backend,
"ep_num_redundant_experts": server_args.ep_num_redundant_experts,
}
)