[Bugfix] Fix DeepseekV3.1 Accuracy issue (#6805)

### What this PR does / why we need it? In order to adapt to the GLM model, logits were passed in the sample, which can cause accuracy issues in version 0.15.0. - vLLM version: v0.15.0 - vLLM main: 83b47f67b1 Signed-off-by: GDzhu01 <809721801@qq.com>
2026-02-25 23:02:00 +08:00
parent e3927cc8f5
commit aa7fb5d707
1 changed files with 5 additions and 1 deletions
--- a/vllm_ascend/sample/rejection_sampler.py
+++ b/vllm_ascend/sample/rejection_sampler.py
@@ -19,6 +19,7 @@ from vllm_ascend.ops.triton.reject_sample import (
    sample_recovered_tokens_kernel,
 )
 from vllm_ascend.sample.sampler import apply_top_k_top_p
 from vllm_ascend.utils import vllm_version_is
 def apply_sampling_constraints(
@@ -166,7 +167,10 @@ def rejection_sample(
            return output_token_ids
    # Compute probability distribution from target logits.
-    target_probs = target_logits.softmax(dim=-1, dtype=torch.float32)
+    if vllm_version_is("0.15.0"):
        target_probs = target_logits
    else:
        target_probs = target_logits.softmax(dim=-1, dtype=torch.float32)
    assert target_probs.is_contiguous()
    # Generate uniform probabilities for rejection sampling.