[Frontend] Fix request length check and add option to disallow auto truncation in scheduler (#2876)

This commit is contained in:
Chang Su
2025-01-16 14:51:19 -08:00
committed by GitHub
parent 0427416b59
commit a8ccacc8b8
6 changed files with 154 additions and 17 deletions

View File

@@ -31,6 +31,7 @@ suites = {
"test_pytorch_sampling_backend.py",
"test_radix_attention.py",
"test_release_memory_occupation.py",
"test_request_length_validation.py",
"test_retract_decode.py",
"test_server_args.py",
"test_session_control.py",