Adjust max prefix len (#980)
This commit is contained in:
@@ -163,11 +163,21 @@ class Req:
|
|||||||
return self.finished_reason is not None
|
return self.finished_reason is not None
|
||||||
|
|
||||||
def adjust_max_prefix_ids(self):
|
def adjust_max_prefix_ids(self):
|
||||||
max_prefix_ids = self.input_ids
|
input_len = len(self.input_ids)
|
||||||
if self.return_logprob:
|
max_prefix_len = input_len
|
||||||
max_prefix_ids = self.input_ids[: self.logprob_start_len]
|
|
||||||
|
|
||||||
return max_prefix_ids
|
if self.sampling_params.max_new_tokens > 0:
|
||||||
|
# Need at least one token to compute logits
|
||||||
|
max_prefix_len = min(max_prefix_len, input_len - 1)
|
||||||
|
|
||||||
|
if self.return_logprob:
|
||||||
|
max_prefix_len = min(max_prefix_len, self.logprob_start_len)
|
||||||
|
|
||||||
|
if self.normalized_prompt_logprob is None:
|
||||||
|
# Need at least two tokens to compute normalized logprob
|
||||||
|
max_prefix_len = min(max_prefix_len, input_len - 2)
|
||||||
|
|
||||||
|
return self.input_ids[:max_prefix_len]
|
||||||
|
|
||||||
# Based on https://github.com/vllm-project/vllm/blob/7a64d24aad69e4d2548aa0bf528d9fe63428ab01/vllm/transformers_utils/detokenizer.py#L194-L313
|
# Based on https://github.com/vllm-project/vllm/blob/7a64d24aad69e4d2548aa0bf528d9fe63428ab01/vllm/transformers_utils/detokenizer.py#L194-L313
|
||||||
def init_incremental_detokenize(self):
|
def init_incremental_detokenize(self):
|
||||||
|
|||||||
@@ -387,23 +387,6 @@ class ModelTpServer:
|
|||||||
|
|
||||||
for req in self.waiting_queue:
|
for req in self.waiting_queue:
|
||||||
|
|
||||||
# FIXME: Move this code into adjust_max_prefix_len
|
|
||||||
if req.return_logprob and req.normalized_prompt_logprob is None:
|
|
||||||
# Need at least two tokens to compute normalized logprob
|
|
||||||
if req.extend_input_len < 2:
|
|
||||||
delta = 2 - req.extend_input_len
|
|
||||||
req.extend_input_len += delta
|
|
||||||
req.prefix_indices = req.prefix_indices[:-delta]
|
|
||||||
if req.image_offset is not None:
|
|
||||||
req.image_offset += delta
|
|
||||||
|
|
||||||
if req.extend_input_len == 0 and req.sampling_params.max_new_tokens > 0:
|
|
||||||
# Need at least one token to compute logits
|
|
||||||
req.extend_input_len = 1
|
|
||||||
req.prefix_indices = req.prefix_indices[:-1]
|
|
||||||
if req.image_offset is not None:
|
|
||||||
req.image_offset += 1
|
|
||||||
|
|
||||||
res = adder.add_one_req(req)
|
res = adder.add_one_req(req)
|
||||||
if (
|
if (
|
||||||
not res
|
not res
|
||||||
|
|||||||
Reference in New Issue
Block a user