feat: add priority based scheduling with priority based request acceptance and preemption (#8746)

This commit is contained in:
harrisonlimh
2025-09-16 17:10:10 -07:00
committed by GitHub
parent f949ad5794
commit 14fdd52740
16 changed files with 822 additions and 71 deletions

View File

@@ -95,6 +95,7 @@ suites = {
TestFile("test_original_logprobs.py", 200),
TestFile("test_penalty.py", 41),
TestFile("test_page_size.py", 60),
TestFile("test_priority_scheduling.py", 100),
TestFile("test_pytorch_sampling_backend.py", 66),
TestFile("test_radix_attention.py", 105),
TestFile("test_regex_constrained.py", 64),