Add DeepSeek V3/R1 shared experts fusion (#4918)

This commit is contained in:
Xiaoyu Zhang
2025-04-04 16:59:29 +08:00
committed by GitHub
parent 6ff9c6a5e7
commit 924ca7c92c
14 changed files with 536 additions and 36 deletions

View File

@@ -144,7 +144,7 @@ def moe_align_block_size_triton(
[32, 64, 128, 256], # block_size
[1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096], # num_tokens
[1, 2, 4, 8, 16, 32, 64], # topk
[64, 160, 256], # num_experts
[64, 160, 256, 257, 260, 264], # num_experts
)
),
)