[Bugfix] fix greedy temperature detection (#5417)
### What this PR does / why we need it?
fix greedy temperature detection from
https://github.com/vllm-project/vllm/pull/27077
- vLLM version: release/v0.13.0
- vLLM main:
81786c8774
---------
Signed-off-by: realliujiaxu <realliujiaxu@163.com>
This commit is contained in:
@@ -4,12 +4,12 @@ from typing import Optional
|
||||
import torch
|
||||
from vllm.triton_utils import HAS_TRITON, tl, triton
|
||||
from vllm.v1.sample.metadata import SamplingMetadata
|
||||
from vllm.v1.sample.rejection_sampler import generate_uniform_probs
|
||||
from vllm.v1.sample.rejection_sampler import (GREEDY_TEMPERATURE,
|
||||
generate_uniform_probs)
|
||||
|
||||
from vllm_ascend.sample.sampler import apply_top_k_top_p
|
||||
|
||||
PLACEHOLDER_TOKEN_ID = -1
|
||||
GREEDY_TEMPERATURE = -1
|
||||
# Maximum number of speculative draft tokens allowed per request in a single
|
||||
# step. This value is chosen to be large enough to handle typical use cases.
|
||||
MAX_SPEC_LEN = 32
|
||||
|
||||
Reference in New Issue
Block a user