diff --git a/benchmark/kernels/fused_moe_triton/benchmark_ep_pre_reorder_triton.py b/benchmark/kernels/fused_moe_triton/benchmark_ep_pre_reorder_triton.py index c62424357..89c7ce067 100644 --- a/benchmark/kernels/fused_moe_triton/benchmark_ep_pre_reorder_triton.py +++ b/benchmark/kernels/fused_moe_triton/benchmark_ep_pre_reorder_triton.py @@ -48,6 +48,7 @@ def benchmark_pre_reorder(batch_size, topk, model_config): topk, hidden_size, block_size, + use_per_token_if_dynamic=True, ) for _ in range(10): diff --git a/python/sglang/test/test_block_fp8_ep.py b/python/sglang/test/test_block_fp8_ep.py index ad8a1694d..a89ee1fe3 100644 --- a/python/sglang/test/test_block_fp8_ep.py +++ b/python/sglang/test/test_block_fp8_ep.py @@ -84,6 +84,7 @@ def ep_moe( top_k, hidden_states.shape[1], BLOCK_SIZE=512, + use_per_token_if_dynamic=True, ) seg_indptr_cur_rank = seg_indptr[start_expert_id : end_expert_id + 2]