From 69cc99d0046754fd1eff4661c94a742b8c487a80 Mon Sep 17 00:00:00 2001
From: LeeWenquan <83354342+SunnyLee151064@users.noreply.github.com>
Date: Mon, 29 Sep 2025 14:04:58 +0800
Subject: [PATCH] Add restriction conditions to the ApplyTopPTopK operator
 (#3254)

### What this PR does / why we need it?
Add restriction conditions to the ApplyTopPTopK operator : 1 <= K <=1024
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?

- vLLM version: v0.10.2
- vLLM main:
https://github.com/vllm-project/vllm/commit/releases/v0.11.0

---------

Signed-off-by: SunnyLee219 <3294305115@qq.com>
---
 vllm_ascend/sample/sampler.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm_ascend/sample/sampler.py b/vllm_ascend/sample/sampler.py
index 6a5c130..9cceda6 100644
--- a/vllm_ascend/sample/sampler.py
+++ b/vllm_ascend/sample/sampler.py
@@ -29,7 +29,8 @@ class AscendTopKTopPSampler(TopKTopPSampler):
         p: torch.Tensor,
     ) -> torch.Tensor:
         # npu_top_k_top_p uses the operator aclnnApplyTopKTopP, but aclnnApplyTopKTopP currently does not support 310P
-        if not is_310p() and p is not None and k is not None:
+        if not is_310p() and p is not None and k is not None and 1 <= int(
+                k.max()) <= 1024:
             # npu_top_k_top_p's parameter order is (logits, p, k), not (logits, k, p)
             return torch_npu.npu_top_k_top_p(logits, p, k)