Return more infos for computing average acceptance length (#3152)

This commit is contained in:
Lianmin Zheng
2025-01-26 04:51:54 -08:00
committed by GitHub
parent 7e0976133c
commit 1dda8c5e4c
10 changed files with 97 additions and 15 deletions

View File

@@ -252,7 +252,6 @@ class Req:
# Sampling info
self.sampling_params = sampling_params
self.lora_path = lora_path
self.custom_logit_processor = custom_logit_processor
# Memory pool info
@@ -300,7 +299,7 @@ class Req:
self.logprob_start_len = 0
self.top_logprobs_num = top_logprobs_num
# Logprobs (return value)
# Logprobs (return values)
self.input_token_logprobs_val: Optional[List[float]] = None
self.input_token_logprobs_idx: Optional[List[int]] = None
self.input_top_logprobs_val: Optional[List[float]] = None
@@ -329,10 +328,15 @@ class Req:
# Constrained decoding
self.grammar: Optional[BaseGrammarObject] = None
# The number of cached tokens, that were already cached in the KV cache
# The number of cached tokens that were already cached in the KV cache
self.cached_tokens = 0
self.already_computed = 0
# The number of verification forward passes in the speculative decoding.
# This is used to compute the average acceptance length per request.
self.spec_verify_ct = 0
self.lora_path = lora_path
def extend_image_inputs(self, image_inputs):
if self.image_inputs is None:
self.image_inputs = image_inputs