[BugFix] Fix top_p,top_k issue with EAGLE and add top_p,top_k in EAGLE e2e (#5131)
### What this PR does / why we need it?
Add top_p,top_k in EAGLE e2e
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
Signed-off-by: zhaomingyu <zhaomingyu13@h-partners.com>
This commit is contained in:
@@ -145,7 +145,9 @@ def test_eagle_correctness(
|
||||
|
||||
sampling_params = SamplingParams(
|
||||
max_tokens=300,
|
||||
temperature=0.0,
|
||||
temperature=0.8,
|
||||
top_p=0.7,
|
||||
top_k=4,
|
||||
ignore_eos=False,
|
||||
)
|
||||
|
||||
|
||||
@@ -83,8 +83,7 @@ def apply_sampling_constraints(
|
||||
if get_ascend_device_type(
|
||||
) != AscendDeviceType._310P and top_p is not None and top_k is not None and 1 <= int(
|
||||
top_k.max()) <= 1024:
|
||||
return torch_npu.npu_top_k_top_p(logits, top_p.to(torch.bfloat16),
|
||||
top_k)
|
||||
return torch_npu.npu_top_k_top_p(logits, top_p.to(logits.dtype), top_k)
|
||||
else:
|
||||
# NOTE(woosuk): `apply_top_k_top_p` uses sorting to calculate the mask,
|
||||
# which is slow for large vocab sizes. This may cause performance issues.
|
||||
|
||||
Reference in New Issue
Block a user