Improve docs and warnings (#1164)
This commit is contained in:
@@ -237,7 +237,7 @@ class ModelRunner:
|
||||
self.max_total_num_tokens = self.profile_max_num_token(total_gpu_memory)
|
||||
if max_total_tokens is not None:
|
||||
if max_total_tokens > self.max_total_num_tokens:
|
||||
warnings.warn(
|
||||
logging.warning(
|
||||
f"max_total_tokens={max_total_tokens} is larger than the profiled value "
|
||||
f"{self.max_total_num_tokens}. "
|
||||
f"Use the profiled value instead."
|
||||
|
||||
Reference in New Issue
Block a user