Integrate triton moe kernel (#7689)

Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com>
This commit is contained in:
Yuan Luo
2025-07-07 11:05:49 +08:00
committed by GitHub
parent ea3e7ffec7
commit 253454de9b
7 changed files with 697 additions and 54 deletions

View File

@@ -101,6 +101,7 @@ GLOBAL_SERVER_ARGS_KEYS = [
"triton_attention_reduce_in_fp32",
"num_reserved_decode_tokens",
"weight_loader_disable_mmap",
"enable_triton_kernel_moe",
]
# Put some global args for easy access