Split the scheduler into multiple mixin classes to reduce the file size (#8483)

This commit is contained in:
Lianmin Zheng
2025-07-29 12:46:50 -07:00
committed by GitHub
parent 5973675bc3
commit a4c3b121d8
12 changed files with 869 additions and 785 deletions

View File

@@ -694,10 +694,7 @@ class SchedulerDisaggregationDecodeMixin:
+ len(self.disagg_decode_prealloc_queue.queue)
== 0
):
# When the server is idle, do self-check and re-init some states
self.check_memory()
self.new_token_ratio = self.init_new_token_ratio
self.maybe_sleep_on_idle()
self.self_check_during_idle()
self.last_batch = batch
@@ -771,10 +768,7 @@ class SchedulerDisaggregationDecodeMixin:
+ len(self.disagg_decode_prealloc_queue.queue)
== 0
):
# When the server is idle, do self-check and re-init some states
self.check_memory()
self.new_token_ratio = self.init_new_token_ratio
self.maybe_sleep_on_idle()
self.self_check_during_idle()
self.last_batch = batch
self.last_batch_in_queue = last_batch_in_queue

View File

@@ -287,9 +287,7 @@ class SchedulerDisaggregationPrefillMixin:
self.process_disagg_prefill_inflight_queue()
if batch is None and len(self.disagg_prefill_inflight_queue) == 0:
self.check_memory()
self.new_token_ratio = self.init_new_token_ratio
self.maybe_sleep_on_idle()
self.self_check_during_idle()
self.last_batch = batch
# HACK (byronhsu): reset the batch_is_full flag because we never enter update_running_batch which resets it
@@ -337,9 +335,7 @@ class SchedulerDisaggregationPrefillMixin:
self.process_disagg_prefill_inflight_queue()
if batch is None and len(self.disagg_prefill_inflight_queue) == 0:
self.check_memory()
self.new_token_ratio = self.init_new_token_ratio
self.maybe_sleep_on_idle()
self.self_check_during_idle()
self.last_batch = batch
# HACK (byronhsu): reset the batch_is_full flag because we never enter update_running_batch which resets it