Handle truncation errors (#436)

This commit is contained in:
Lianmin Zheng
2024-05-13 15:56:00 -07:00
committed by GitHub
parent 4231a42fa8
commit 5dc55a5f02
10 changed files with 44 additions and 41 deletions

View File

@@ -98,7 +98,6 @@ class TokenizerManager:
self.hf_config = get_config(
self.model_path, trust_remote_code=server_args.trust_remote_code
)
self.context_len = get_context_length(self.hf_config)
if is_multimodal_model(self.model_path):
@@ -156,6 +155,12 @@ class TokenizerManager:
else:
input_ids = obj.input_ids
if len(input_ids) >= self.context_len:
raise ValueError(
f"The input ({len(input_ids)} tokens) is longer than the "
f"model's context length ({self.context_len} tokens)"
)
sampling_params = SamplingParams(**obj.sampling_params)
if sampling_params.max_new_tokens != 0:
sampling_params.normalize(self.tokenizer)