[Fix] Reduce busy polling when scheduler is idle (#6026)
This commit is contained in:
committed by
GitHub
parent
4b9971e401
commit
bd7cfbd2f8
@@ -550,6 +550,7 @@ class SchedulerDisaggregationDecodeMixin:
|
||||
# When the server is idle, do self-check and re-init some states
|
||||
self.check_memory()
|
||||
self.new_token_ratio = self.init_new_token_ratio
|
||||
self.maybe_sleep_on_idle()
|
||||
|
||||
self.last_batch = batch
|
||||
|
||||
@@ -628,6 +629,7 @@ class SchedulerDisaggregationDecodeMixin:
|
||||
# When the server is idle, do self-check and re-init some states
|
||||
self.check_memory()
|
||||
self.new_token_ratio = self.init_new_token_ratio
|
||||
self.maybe_sleep_on_idle()
|
||||
|
||||
self.last_batch = batch
|
||||
self.last_batch_in_queue = last_batch_in_queue
|
||||
|
||||
@@ -242,6 +242,7 @@ class SchedulerDisaggregationPrefillMixin:
|
||||
if batch is None and len(self.disagg_prefill_inflight_queue) == 0:
|
||||
self.check_memory()
|
||||
self.new_token_ratio = self.init_new_token_ratio
|
||||
self.maybe_sleep_on_idle()
|
||||
|
||||
self.last_batch = batch
|
||||
# HACK (byronhsu): reset the batch_is_full flag because we never enter update_running_batch which resets it
|
||||
@@ -294,6 +295,7 @@ class SchedulerDisaggregationPrefillMixin:
|
||||
if batch is None and len(self.disagg_prefill_inflight_queue) == 0:
|
||||
self.check_memory()
|
||||
self.new_token_ratio = self.init_new_token_ratio
|
||||
self.maybe_sleep_on_idle()
|
||||
|
||||
self.last_batch = batch
|
||||
# HACK (byronhsu): reset the batch_is_full flag because we never enter update_running_batch which resets it
|
||||
|
||||
Reference in New Issue
Block a user