Fixed 310p failure when using the sampler feature (#2151)
### What this PR does / why we need it?
Fixed 310p failure when using the sampler feature.
The root cause is: torch_npu.npu_top_k_top_p uses the operator
aclnnApplyTopKTopP, but aclnnApplyTopKTopP currently does not support
310P.
First PR that has the issue is #1308.
### Does this PR introduce _any_ user-facing change?
No
- vLLM version: v0.10.0
- vLLM main:
207b750e19
Signed-off-by: leo-pony <nengjunma@outlook.com>
This commit is contained in:
@@ -3,6 +3,8 @@ import torch_npu
|
|||||||
from vllm.v1.sample.ops.topk_topp_sampler import TopKTopPSampler, random_sample
|
from vllm.v1.sample.ops.topk_topp_sampler import TopKTopPSampler, random_sample
|
||||||
from vllm.v1.sample.sampler import Sampler
|
from vllm.v1.sample.sampler import Sampler
|
||||||
|
|
||||||
|
from vllm_ascend.utils import is_310p
|
||||||
|
|
||||||
|
|
||||||
class AscendSampler(Sampler):
|
class AscendSampler(Sampler):
|
||||||
|
|
||||||
@@ -20,7 +22,8 @@ class AscendTopKTopPSampler(TopKTopPSampler):
|
|||||||
k: torch.Tensor,
|
k: torch.Tensor,
|
||||||
p: torch.Tensor,
|
p: torch.Tensor,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
if p is not None and k is not None:
|
# npu_top_k_top_p uses the operator aclnnApplyTopKTopP, but aclnnApplyTopKTopP currently does not support 310P
|
||||||
|
if not is_310p() and p is not None and k is not None:
|
||||||
# npu_top_k_top_p's parameter order is (logits, p, k), not (logits, k, p)
|
# npu_top_k_top_p's parameter order is (logits, p, k), not (logits, k, p)
|
||||||
return torch_npu.npu_top_k_top_p(logits, p, k)
|
return torch_npu.npu_top_k_top_p(logits, p, k)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user