Add fp4 quantize before all-gather for Flashinfer cutlass MoE DP (max throughput) (#7667)
This commit is contained in:
@@ -84,6 +84,7 @@ GLOBAL_SERVER_ARGS_KEYS = [
|
||||
"chunked_prefill_size",
|
||||
"device",
|
||||
"disable_chunked_prefix_cache",
|
||||
"disable_flashinfer_cutlass_moe_fp4_allgather",
|
||||
"disable_radix_cache",
|
||||
"enable_dp_lm_head",
|
||||
"enable_flashinfer_allreduce_fusion",
|
||||
|
||||
Reference in New Issue
Block a user