[Generative Score API] Scoring(Prefill-only) optimizations. (#9748)
This commit is contained in:
committed by
GitHub
parent
94d0f656fb
commit
a360511d7b
@@ -1261,11 +1261,19 @@ class Scheduler(
|
||||
# Copy more attributes
|
||||
if recv_req.logprob_start_len == -1 or not recv_req.return_logprob:
|
||||
# By default, only return the logprobs for output tokens
|
||||
req.logprob_start_len = len(req.origin_input_ids) - 1
|
||||
# For prefill-only requests with logprob_start_len == -1, set logprob_start_len beyond input sequence
|
||||
# to skip input logprob computation entirely
|
||||
if req.is_prefill_only:
|
||||
req.logprob_start_len = len(req.origin_input_ids)
|
||||
else:
|
||||
# TODO: For text generation, evaluate setting logprob_start_len to len(req.origin_input_ids) as well
|
||||
req.logprob_start_len = len(req.origin_input_ids) - 1
|
||||
else:
|
||||
req.logprob_start_len = recv_req.logprob_start_len
|
||||
|
||||
if req.logprob_start_len >= len(req.origin_input_ids):
|
||||
if not req.is_prefill_only and req.logprob_start_len >= len(
|
||||
req.origin_input_ids
|
||||
):
|
||||
error_msg = f"{req.logprob_start_len=} is higher than the number of input tokens {len(req.origin_input_ids)=}. Please use a smaller logprob_start_len."
|
||||
req.logprob_start_len = len(req.origin_input_ids) - 1
|
||||
req.set_finish_with_abort(error_msg)
|
||||
|
||||
Reference in New Issue
Block a user