Remove sampling info events and overlap thread file (#11300)
This commit is contained in:
@@ -2057,15 +2057,11 @@ class ModelRunner:
|
||||
def _preprocess_logits(
|
||||
self, logits_output: LogitsProcessorOutput, sampling_info: SamplingBatchInfo
|
||||
):
|
||||
# Apply logit bias
|
||||
if sampling_info.sampling_info_done:
|
||||
# Overlap mode: the function update_regex_vocab_mask was executed
|
||||
# in process_batch_result of the last batch.
|
||||
if sampling_info.grammars:
|
||||
sampling_info.sampling_info_done.wait()
|
||||
else:
|
||||
# Normal mode: Put CPU-heavy tasks here. They will be overlapped with the forward pass.
|
||||
sampling_info.update_regex_vocab_mask()
|
||||
# NOTE: In overlap mode, the function update_regex_vocab_mask (in sample)
|
||||
# was executed after we processed last batch's results.
|
||||
|
||||
# Calculate logits bias and apply it to next_token_logits.
|
||||
sampling_info.update_regex_vocab_mask()
|
||||
sampling_info.apply_logits_bias(logits_output.next_token_logits)
|
||||
|
||||
def sample(
|
||||
|
||||
Reference in New Issue
Block a user