fix EAGLE 2 non greedy case (#3407)

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
2025-02-09 07:28:34 +08:00
parent f90db8bc07
commit fad315cb8e
4 changed files with 71 additions and 22 deletions
--- a/benchmark/kernels/fused_moe_triton/benchmark_vllm_vs_sglang_fused_moe_triton.py
+++ b/benchmark/kernels/fused_moe_triton/benchmark_vllm_vs_sglang_fused_moe_triton.py
@@ -54,7 +54,9 @@ def get_model_config(model_name: str, tp_size: int):
    ):
        block_shape = config.quantization_config["weight_block_size"]
        assert len(block_shape) == 2
-        assert vllm_version_num >= 66, "Block-wise quantized fp8 fused_moe is only supported for VLLM>=0.6.6.post1"
+        assert (
+            vllm_version_num >= 66
+        ), "Block-wise quantized fp8 fused_moe is only supported for VLLM>=0.6.6.post1"

    shape_configs = {
        "num_experts": E,