refactor model loader: initial refactor (#664)

This commit is contained in:
Ying Sheng
2024-07-20 02:18:22 -07:00
committed by GitHub
parent 39c57317e1
commit 06487f126e
6 changed files with 100 additions and 15 deletions

View File

@@ -304,6 +304,12 @@ class ModelTpServer:
self.model_config.context_len - 1 - len(req.origin_input_ids),
self.max_total_num_tokens - 128 - len(req.origin_input_ids),
)
if req.sampling_params.max_new_tokens < 0:
req.origin_input_ids = req.origin_input_ids[
: self.max_total_num_tokens - 128
]
logger.error("Request longer than memory pool size, truncated!!!")
self.forward_queue.append(req)
def get_new_prefill_batch(self) -> Optional[Batch]: