Add restriction conditions to the ApplyTopPTopK operator (#3254)
### What this PR does / why we need it? Add restriction conditions to the ApplyTopPTopK operator : 1 <= K <=1024 ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? - vLLM version: v0.10.2 - vLLM main: https://github.com/vllm-project/vllm/commit/releases/v0.11.0 --------- Signed-off-by: SunnyLee219 <3294305115@qq.com>
This commit is contained in:
@@ -29,7 +29,8 @@ class AscendTopKTopPSampler(TopKTopPSampler):
|
|||||||
p: torch.Tensor,
|
p: torch.Tensor,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
# npu_top_k_top_p uses the operator aclnnApplyTopKTopP, but aclnnApplyTopKTopP currently does not support 310P
|
# npu_top_k_top_p uses the operator aclnnApplyTopKTopP, but aclnnApplyTopKTopP currently does not support 310P
|
||||||
if not is_310p() and p is not None and k is not None:
|
if not is_310p() and p is not None and k is not None and 1 <= int(
|
||||||
|
k.max()) <= 1024:
|
||||||
# npu_top_k_top_p's parameter order is (logits, p, k), not (logits, k, p)
|
# npu_top_k_top_p's parameter order is (logits, p, k), not (logits, k, p)
|
||||||
return torch_npu.npu_top_k_top_p(logits, p, k)
|
return torch_npu.npu_top_k_top_p(logits, p, k)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user