[Auto Sync] Update scheduler.py, server_args.py (20251020) (#11875)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Kan Wu <wukanustc@gmail.com>
This commit is contained in:
Lianmin Zheng
2025-10-20 17:41:19 -07:00
committed by GitHub
parent b4948512b8
commit 43ad05907c
2 changed files with 15 additions and 1 deletions

View File

@@ -306,6 +306,9 @@ class Scheduler(
self.dp_size = server_args.dp_size
self.schedule_policy = server_args.schedule_policy
self.enable_priority_scheduling = server_args.enable_priority_scheduling
self.abort_on_priority_when_disabled = (
server_args.abort_on_priority_when_disabled
)
self.schedule_low_priority_values_first = (
server_args.schedule_low_priority_values_first
)
@@ -1560,7 +1563,11 @@ class Scheduler(
req.priority = sys.maxsize
else:
req.priority = -sys.maxsize - 1
elif not self.enable_priority_scheduling and req.priority is not None:
elif (
not self.enable_priority_scheduling
and req.priority is not None
and self.abort_on_priority_when_disabled
):
abort_req = AbortReq(
finished_reason={
"type": "abort",

View File

@@ -220,6 +220,7 @@ class ServerArgs:
max_prefill_tokens: int = 16384
schedule_policy: str = "fcfs"
enable_priority_scheduling: bool = False
abort_on_priority_when_disabled: bool = False
schedule_low_priority_values_first: bool = False
priority_scheduling_preemption_threshold: int = 10
schedule_conservativeness: float = 1.0
@@ -1771,6 +1772,12 @@ class ServerArgs:
default=ServerArgs.enable_priority_scheduling,
help="Enable priority scheduling. Requests with higher priority integer values will be scheduled first by default.",
)
parser.add_argument(
"--abort-on-priority-when-disabled",
action="store_true",
default=ServerArgs.abort_on_priority_when_disabled,
help="If set, abort requests that specify a priority when priority scheduling is disabled.",
)
parser.add_argument(
"--schedule-low-priority-values-first",
action="store_true",