Fix cache hit rate when chunked prefill (#2555)

This commit is contained in:
Liangsheng Yin
2024-12-26 03:14:28 -08:00
committed by GitHub
parent 9a23c48456
commit e7ebecf82e
2 changed files with 8 additions and 11 deletions

View File

@@ -248,7 +248,7 @@ class PrefillAdder:
self.can_run_list.append(req)
self._prefill_one_req(
len(req.prefix_indices),
0,
req.extend_input_len,
(
min(req.sampling_params.max_new_tokens, CLIP_MAX_NEW_TOKENS_ESTIMATION)