Simplify batch result resolution (#1735)

This commit is contained in:
Lianmin Zheng
2024-10-20 19:47:14 -07:00
committed by GitHub
parent e12358dc91
commit b121bc03a3
5 changed files with 64 additions and 90 deletions

View File

@@ -120,7 +120,7 @@ class ModelRunner:
)
if self.is_multimodal_model:
logger.info(
logger.warning(
"Automatically turn off --chunked-prefill-size and adjust --mem-fraction-static for multimodal models."
)
server_args.chunked_prefill_size = None
@@ -131,13 +131,6 @@ class ModelRunner:
]:
server_args.disable_cuda_graph = True
if self.server_args.enable_overlap_schedule:
logger.warning(
"Overlap scheduler is enabled. This is an experimental feature. "
"Sampling penalizer (e.g., frequency and repetition penalty), constrained decoding (e.g., regex, JSON), "
"and embedding APIs are not supported and will lead to wrong results."
)
# Global vars
if server_args.show_time_cost:
enable_show_time_cost()