Reduce the overhead when cache is disabled (#1010)

This commit is contained in:
Liangsheng Yin
2024-08-09 16:36:57 -07:00
committed by GitHub
parent 73fa2d49d5
commit 62757db6f0
4 changed files with 35 additions and 43 deletions

View File

@@ -164,7 +164,12 @@ class Req:
def finished(self) -> bool:
return self.finished_reason is not None
def init_next_round_input(self):
self.input_ids = self.origin_input_ids + self.output_ids
self.extend_input_len = len(self.input_ids) - len(self.prefix_indices)
def adjust_max_prefix_ids(self):
self.input_ids = self.origin_input_ids + self.output_ids
input_len = len(self.input_ids)
max_prefix_len = input_len