From c62f346f5d452f81f7b6ba46e9a13c2c1e270c93 Mon Sep 17 00:00:00 2001 From: leo-pony Date: Fri, 1 Aug 2025 08:43:08 +0800 Subject: [PATCH] Fixed 310p failure when using the sampler feature (#2151) ### What this PR does / why we need it? Fixed 310p failure when using the sampler feature. The root cause is: torch_npu.npu_top_k_top_p uses the operator aclnnApplyTopKTopP, but aclnnApplyTopKTopP currently does not support 310P. First PR that has the issue is #1308. ### Does this PR introduce _any_ user-facing change? No - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/207b750e194829c4bcd4df0450f5f93d71755dae Signed-off-by: leo-pony --- vllm_ascend/sample/sampler.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm_ascend/sample/sampler.py b/vllm_ascend/sample/sampler.py index 862bd03..c082f98 100644 --- a/vllm_ascend/sample/sampler.py +++ b/vllm_ascend/sample/sampler.py @@ -3,6 +3,8 @@ import torch_npu from vllm.v1.sample.ops.topk_topp_sampler import TopKTopPSampler, random_sample from vllm.v1.sample.sampler import Sampler +from vllm_ascend.utils import is_310p + class AscendSampler(Sampler): @@ -20,7 +22,8 @@ class AscendTopKTopPSampler(TopKTopPSampler): k: torch.Tensor, p: torch.Tensor, ) -> torch.Tensor: - if p is not None and k is not None: + # npu_top_k_top_p uses the operator aclnnApplyTopKTopP, but aclnnApplyTopKTopP currently does not support 310P + if not is_310p() and p is not None and k is not None: # npu_top_k_top_p's parameter order is (logits, p, k), not (logits, k, p) return torch_npu.npu_top_k_top_p(logits, p, k)