Add fp4 quantize before all-gather for Flashinfer cutlass MoE DP (max throughput) (#7667)

This commit is contained in:
Trevor Morris
2025-08-15 22:08:11 -07:00
committed by GitHub
parent 87dab54824
commit eff4eb3fdd
16 changed files with 360 additions and 52 deletions

View File

@@ -84,6 +84,7 @@ GLOBAL_SERVER_ARGS_KEYS = [
"chunked_prefill_size",
"device",
"disable_chunked_prefix_cache",
"disable_flashinfer_cutlass_moe_fp4_allgather",
"disable_radix_cache",
"enable_dp_lm_head",
"enable_flashinfer_allreduce_fusion",