Fixed 310p failure when using the sampler feature (#2151)

### What this PR does / why we need it? Fixed 310p failure when using the sampler feature. The root cause is: torch_npu.npu_top_k_top_p uses the operator aclnnApplyTopKTopP, but aclnnApplyTopKTopP currently does not support 310P. First PR that has the issue is #1308. ### Does this PR introduce _any_ user-facing change? No - vLLM version: v0.10.0 - vLLM main: 207b750e19 Signed-off-by: leo-pony <nengjunma@outlook.com>
2025-08-01 08:43:08 +08:00
parent 86bdde1ca8
commit c62f346f5d
1 changed files with 4 additions and 1 deletions
--- a/vllm_ascend/sample/sampler.py
+++ b/vllm_ascend/sample/sampler.py
@@ -3,6 +3,8 @@ import torch_npu
 from vllm.v1.sample.ops.topk_topp_sampler import TopKTopPSampler, random_sample
 from vllm.v1.sample.sampler import Sampler

+from vllm_ascend.utils import is_310p
+

 class AscendSampler(Sampler):

@@ -20,7 +22,8 @@ class AscendTopKTopPSampler(TopKTopPSampler):
        k: torch.Tensor,
        p: torch.Tensor,
    ) -> torch.Tensor:
-        if p is not None and k is not None:
+        # npu_top_k_top_p uses the operator aclnnApplyTopKTopP, but aclnnApplyTopKTopP currently does not support 310P
+        if not is_310p() and p is not None and k is not None:
            # npu_top_k_top_p's parameter order is (logits, p, k), not (logits, k, p)
            return torch_npu.npu_top_k_top_p(logits, p, k)