From aa7fb5d7073b70b35445ad87bcc1fbf6aca34df1 Mon Sep 17 00:00:00 2001 From: Zhu Yi Lin <116337067+GDzhu01@users.noreply.github.com> Date: Wed, 25 Feb 2026 23:02:00 +0800 Subject: [PATCH] [Bugfix] Fix DeepseekV3.1 Accuracy issue (#6805) ### What this PR does / why we need it? In order to adapt to the GLM model, logits were passed in the sample, which can cause accuracy issues in version 0.15.0. - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/83b47f67b1dfad505606070ae4d9f83e50ad4ebd Signed-off-by: GDzhu01 <809721801@qq.com> --- vllm_ascend/sample/rejection_sampler.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm_ascend/sample/rejection_sampler.py b/vllm_ascend/sample/rejection_sampler.py index 695ae649..04552b97 100644 --- a/vllm_ascend/sample/rejection_sampler.py +++ b/vllm_ascend/sample/rejection_sampler.py @@ -19,6 +19,7 @@ from vllm_ascend.ops.triton.reject_sample import ( sample_recovered_tokens_kernel, ) from vllm_ascend.sample.sampler import apply_top_k_top_p +from vllm_ascend.utils import vllm_version_is def apply_sampling_constraints( @@ -166,7 +167,10 @@ def rejection_sample( return output_token_ids # Compute probability distribution from target logits. - target_probs = target_logits.softmax(dim=-1, dtype=torch.float32) + if vllm_version_is("0.15.0"): + target_probs = target_logits + else: + target_probs = target_logits.softmax(dim=-1, dtype=torch.float32) assert target_probs.is_contiguous() # Generate uniform probabilities for rejection sampling.