[Auto Sync] Update scheduler.py, server_args.py (20251020) (#11875)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Kan Wu <wukanustc@gmail.com>
This commit is contained in:
@@ -306,6 +306,9 @@ class Scheduler(
|
|||||||
self.dp_size = server_args.dp_size
|
self.dp_size = server_args.dp_size
|
||||||
self.schedule_policy = server_args.schedule_policy
|
self.schedule_policy = server_args.schedule_policy
|
||||||
self.enable_priority_scheduling = server_args.enable_priority_scheduling
|
self.enable_priority_scheduling = server_args.enable_priority_scheduling
|
||||||
|
self.abort_on_priority_when_disabled = (
|
||||||
|
server_args.abort_on_priority_when_disabled
|
||||||
|
)
|
||||||
self.schedule_low_priority_values_first = (
|
self.schedule_low_priority_values_first = (
|
||||||
server_args.schedule_low_priority_values_first
|
server_args.schedule_low_priority_values_first
|
||||||
)
|
)
|
||||||
@@ -1560,7 +1563,11 @@ class Scheduler(
|
|||||||
req.priority = sys.maxsize
|
req.priority = sys.maxsize
|
||||||
else:
|
else:
|
||||||
req.priority = -sys.maxsize - 1
|
req.priority = -sys.maxsize - 1
|
||||||
elif not self.enable_priority_scheduling and req.priority is not None:
|
elif (
|
||||||
|
not self.enable_priority_scheduling
|
||||||
|
and req.priority is not None
|
||||||
|
and self.abort_on_priority_when_disabled
|
||||||
|
):
|
||||||
abort_req = AbortReq(
|
abort_req = AbortReq(
|
||||||
finished_reason={
|
finished_reason={
|
||||||
"type": "abort",
|
"type": "abort",
|
||||||
|
|||||||
@@ -220,6 +220,7 @@ class ServerArgs:
|
|||||||
max_prefill_tokens: int = 16384
|
max_prefill_tokens: int = 16384
|
||||||
schedule_policy: str = "fcfs"
|
schedule_policy: str = "fcfs"
|
||||||
enable_priority_scheduling: bool = False
|
enable_priority_scheduling: bool = False
|
||||||
|
abort_on_priority_when_disabled: bool = False
|
||||||
schedule_low_priority_values_first: bool = False
|
schedule_low_priority_values_first: bool = False
|
||||||
priority_scheduling_preemption_threshold: int = 10
|
priority_scheduling_preemption_threshold: int = 10
|
||||||
schedule_conservativeness: float = 1.0
|
schedule_conservativeness: float = 1.0
|
||||||
@@ -1771,6 +1772,12 @@ class ServerArgs:
|
|||||||
default=ServerArgs.enable_priority_scheduling,
|
default=ServerArgs.enable_priority_scheduling,
|
||||||
help="Enable priority scheduling. Requests with higher priority integer values will be scheduled first by default.",
|
help="Enable priority scheduling. Requests with higher priority integer values will be scheduled first by default.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--abort-on-priority-when-disabled",
|
||||||
|
action="store_true",
|
||||||
|
default=ServerArgs.abort_on_priority_when_disabled,
|
||||||
|
help="If set, abort requests that specify a priority when priority scheduling is disabled.",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--schedule-low-priority-values-first",
|
"--schedule-low-priority-values-first",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
|
|||||||
Reference in New Issue
Block a user