From 2add3dc3e086467e64bf9a41ad2d36117afcd24b Mon Sep 17 00:00:00 2001
From: realliujiaxu <realliujiaxu@163.com>
Date: Sat, 27 Dec 2025 17:04:10 +0800
Subject: [PATCH] [Bugfix] fix greedy temperature detection (#5417)

### What this PR does / why we need it?
fix greedy temperature detection from
https://github.com/vllm-project/vllm/pull/27077

- vLLM version: release/v0.13.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/81786c87748b0177111dfdc07af5351d8389baa1
---------
Signed-off-by: realliujiaxu <realliujiaxu@163.com>
---
 vllm_ascend/sample/rejection_sampler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm_ascend/sample/rejection_sampler.py b/vllm_ascend/sample/rejection_sampler.py
index b0e6f848..3361c6f2 100644
--- a/vllm_ascend/sample/rejection_sampler.py
+++ b/vllm_ascend/sample/rejection_sampler.py
@@ -4,12 +4,12 @@ from typing import Optional
 import torch
 from vllm.triton_utils import HAS_TRITON, tl, triton
 from vllm.v1.sample.metadata import SamplingMetadata
-from vllm.v1.sample.rejection_sampler import generate_uniform_probs
+from vllm.v1.sample.rejection_sampler import (GREEDY_TEMPERATURE,
+                                              generate_uniform_probs)
 
 from vllm_ascend.sample.sampler import apply_top_k_top_p
 
 PLACEHOLDER_TOKEN_ID = -1
-GREEDY_TEMPERATURE = -1
 # Maximum number of speculative draft tokens allowed per request in a single
 # step. This value is chosen to be large enough to handle typical use cases.
 MAX_SPEC_LEN = 32