[BugFix] Fix top_p,top_k issue with EAGLE and add top_p,top_k in EAGLE e2e (#5131)

### What this PR does / why we need it?
Add top_p,top_k in EAGLE e2e

- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c

Signed-off-by: zhaomingyu <zhaomingyu13@h-partners.com>
This commit is contained in:
zhaomingyu13
2025-12-18 23:07:14 +08:00
committed by GitHub
parent 073a3a6e6c
commit 73e4b4f496
2 changed files with 4 additions and 3 deletions

View File

@@ -145,7 +145,9 @@ def test_eagle_correctness(
sampling_params = SamplingParams(
max_tokens=300,
temperature=0.0,
temperature=0.8,
top_p=0.7,
top_k=4,
ignore_eos=False,
)

View File

@@ -83,8 +83,7 @@ def apply_sampling_constraints(
if get_ascend_device_type(
) != AscendDeviceType._310P and top_p is not None and top_k is not None and 1 <= int(
top_k.max()) <= 1024:
return torch_npu.npu_top_k_top_p(logits, top_p.to(torch.bfloat16),
top_k)
return torch_npu.npu_top_k_top_p(logits, top_p.to(logits.dtype), top_k)
else:
# NOTE(woosuk): `apply_top_k_top_p` uses sorting to calculate the mask,
# which is slow for large vocab sizes. This may cause performance issues.