fix EAGLE 2 non greedy case (#3407)

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
This commit is contained in:
Yineng Zhang
2025-02-09 07:28:34 +08:00
committed by GitHub
parent f90db8bc07
commit fad315cb8e
4 changed files with 71 additions and 22 deletions

View File

@@ -54,7 +54,9 @@ def get_model_config(model_name: str, tp_size: int):
):
block_shape = config.quantization_config["weight_block_size"]
assert len(block_shape) == 2
assert vllm_version_num >= 66, "Block-wise quantized fp8 fused_moe is only supported for VLLM>=0.6.6.post1"
assert (
vllm_version_num >= 66
), "Block-wise quantized fp8 fused_moe is only supported for VLLM>=0.6.6.post1"
shape_configs = {
"num_experts": E,