From dd650e0e21bbe07d131dd861aa475b0b9fc89ead Mon Sep 17 00:00:00 2001 From: Zilin Zhu Date: Fri, 8 Aug 2025 19:34:38 +0800 Subject: [PATCH] [RL] fix skip_server_warmup and rl health_generate logic (#8757) --- python/sglang/srt/entrypoints/http_server.py | 2 ++ python/sglang/srt/managers/scheduler.py | 5 ++++- .../sglang/srt/managers/scheduler_update_weights_mixin.py | 6 ++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/entrypoints/http_server.py b/python/sglang/srt/entrypoints/http_server.py index c4d36088f..a6bcb0b5b 100644 --- a/python/sglang/srt/entrypoints/http_server.py +++ b/python/sglang/srt/entrypoints/http_server.py @@ -1172,6 +1172,8 @@ def _wait_and_warmup( pipe_finish_writer, ): return + else: + _global_state.tokenizer_manager.server_status = ServerStatus.Up logger.info("The server is fired up and ready to roll!") diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index a97cca261..5d2204c3f 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -473,6 +473,7 @@ class Scheduler( self.memory_saver_adapter = TorchMemorySaverAdapter.create( enable=server_args.enable_memory_saver ) + self.offload_tags = set() self.init_profier() self.recv_skipper = SchedulerRecvSkipper.maybe_create(server_args) @@ -1040,7 +1041,9 @@ class Scheduler( for recv_req in recv_reqs: # If it is a health check generation request and there are running requests, ignore it. if is_health_check_generate_req(recv_req) and ( - self.chunked_req is not None or not self.running_batch.is_empty() + self.chunked_req is not None + or not self.running_batch.is_empty() + or len(self.offload_tags) > 0 ): self.return_health_check_ct += 1 continue diff --git a/python/sglang/srt/managers/scheduler_update_weights_mixin.py b/python/sglang/srt/managers/scheduler_update_weights_mixin.py index eba92a2e0..8da3d07be 100644 --- a/python/sglang/srt/managers/scheduler_update_weights_mixin.py +++ b/python/sglang/srt/managers/scheduler_update_weights_mixin.py @@ -78,6 +78,9 @@ class SchedulerUpdateWeightsMixin: if tags is None or len(tags) == 0: tags = [GPU_MEMORY_TYPE_WEIGHTS, GPU_MEMORY_TYPE_KV_CACHE] + for tag in tags: + self.offload_tags.add(tag) + if GPU_MEMORY_TYPE_KV_CACHE in tags: self.memory_saver_adapter.pause(GPU_MEMORY_TYPE_KV_CACHE) self.flush_cache() @@ -97,6 +100,9 @@ class SchedulerUpdateWeightsMixin: if tags is None or len(tags) == 0: tags = [GPU_MEMORY_TYPE_WEIGHTS, GPU_MEMORY_TYPE_KV_CACHE] + for tag in tags: + self.offload_tags.remove(tag) + if GPU_MEMORY_TYPE_WEIGHTS in tags: self.memory_saver_adapter.resume(GPU_MEMORY_TYPE_WEIGHTS) torch.distributed.barrier(self.tp_cpu_group)