[Fix] Reduce busy polling when scheduler is idle (#6026)

This commit is contained in:
Povilas Kanapickas
2025-06-13 00:58:22 +03:00
committed by GitHub
parent 4b9971e401
commit bd7cfbd2f8
5 changed files with 48 additions and 1 deletions

View File

@@ -550,6 +550,7 @@ class SchedulerDisaggregationDecodeMixin:
# When the server is idle, do self-check and re-init some states
self.check_memory()
self.new_token_ratio = self.init_new_token_ratio
self.maybe_sleep_on_idle()
self.last_batch = batch
@@ -628,6 +629,7 @@ class SchedulerDisaggregationDecodeMixin:
# When the server is idle, do self-check and re-init some states
self.check_memory()
self.new_token_ratio = self.init_new_token_ratio
self.maybe_sleep_on_idle()
self.last_batch = batch
self.last_batch_in_queue = last_batch_in_queue

View File

@@ -242,6 +242,7 @@ class SchedulerDisaggregationPrefillMixin:
if batch is None and len(self.disagg_prefill_inflight_queue) == 0:
self.check_memory()
self.new_token_ratio = self.init_new_token_ratio
self.maybe_sleep_on_idle()
self.last_batch = batch
# HACK (byronhsu): reset the batch_is_full flag because we never enter update_running_batch which resets it
@@ -294,6 +295,7 @@ class SchedulerDisaggregationPrefillMixin:
if batch is None and len(self.disagg_prefill_inflight_queue) == 0:
self.check_memory()
self.new_token_ratio = self.init_new_token_ratio
self.maybe_sleep_on_idle()
self.last_batch = batch
# HACK (byronhsu): reset the batch_is_full flag because we never enter update_running_batch which resets it