feat: throttle requests at scheduler based on --max_queued_requests (#7565)

This commit is contained in:
harrisonlimh
2025-07-28 07:32:33 -07:00
committed by GitHub
parent b582159246
commit 747dd45077
10 changed files with 218 additions and 6 deletions

View File

@@ -86,6 +86,7 @@ suites = {
TestFile("test_radix_attention.py", 105),
TestFile("test_regex_constrained.py", 64),
TestFile("test_retract_decode.py", 54),
TestFile("test_request_queue_validation.py", 30),
TestFile("test_server_args.py", 1),
TestFile("test_skip_tokenizer_init.py", 117),
TestFile("test_srt_engine.py", 261),