Support GC Freezing to improve latency & throughput (#9241)

Co-authored-by: Chanh Nguyen <cnguyen@linkedin.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
2025-08-22 22:43:09 -07:00
parent 7e880286b5
commit 127d4b0d5e
8 changed files with 119 additions and 1 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -123,6 +123,7 @@ class ServerArgs:
    decode_log_interval: int = 40
    enable_request_time_stats_logging: bool = False
    kv_events_config: Optional[str] = None
+    gc_warning_threshold_secs: float = 0.0

    # API related
    api_key: Optional[str] = None
@@ -1172,6 +1173,12 @@ class ServerArgs:
            default=ServerArgs.collect_tokens_histogram,
            help="Collect prompt/generation tokens histogram.",
        )
+        parser.add_argument(
+            "--gc-warning-threshold-secs",
+            type=float,
+            default=ServerArgs.gc_warning_threshold_secs,
+            help="The threshold for long GC warning. If a GC takes longer than this, a warning will be logged. Set to 0 to disable.",
+        )
        parser.add_argument(
            "--decode-log-interval",
            type=int,