Simplify batch result resolution (#1735)

2024-10-20 19:47:14 -07:00
parent e12358dc91
commit b121bc03a3
5 changed files with 64 additions and 90 deletions
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -120,7 +120,7 @@ class ModelRunner:
            )

        if self.is_multimodal_model:
-            logger.info(
+            logger.warning(
                "Automatically turn off --chunked-prefill-size and adjust --mem-fraction-static for multimodal models."
            )
            server_args.chunked_prefill_size = None
@@ -131,13 +131,6 @@ class ModelRunner:
            ]:
                server_args.disable_cuda_graph = True

-        if self.server_args.enable_overlap_schedule:
-            logger.warning(
-                "Overlap scheduler is enabled. This is an experimental feature. "
-                "Sampling penalizer (e.g., frequency and repetition penalty), constrained decoding (e.g., regex, JSON), "
-                "and embedding APIs are not supported and will lead to wrong results."
-            )
-
        # Global vars
        if server_args.show_time_cost:
            enable_show_time_cost()