Improve docs and warnings (#1164)

This commit is contained in:
Lianmin Zheng
2024-08-20 08:31:29 -07:00
committed by GitHub
parent d8476818ef
commit a8ae640328
7 changed files with 25 additions and 24 deletions

View File

@@ -237,7 +237,7 @@ class ModelRunner:
self.max_total_num_tokens = self.profile_max_num_token(total_gpu_memory)
if max_total_tokens is not None:
if max_total_tokens > self.max_total_num_tokens:
warnings.warn(
logging.warning(
f"max_total_tokens={max_total_tokens} is larger than the profiled value "
f"{self.max_total_num_tokens}. "
f"Use the profiled value instead."