Support penalty in overlap mode; return logprob with chunked prefill; improve benchmark scripts (#3988)
Co-authored-by: SangBin Cho <rkooo567@gmail.com> Co-authored-by: dhou-xai <dhou@x.ai> Co-authored-by: Hanming Lu <hanming_lu@berkeley.edu>
This commit is contained in:
@@ -12,7 +12,6 @@ suites = {
|
||||
"models/test_generation_models.py",
|
||||
"models/test_qwen_models.py",
|
||||
"models/test_reward_models.py",
|
||||
"sampling/penaltylib",
|
||||
"test_abort.py",
|
||||
"test_chunked_prefill.py",
|
||||
"test_custom_allreduce.py",
|
||||
@@ -31,6 +30,7 @@ suites = {
|
||||
"test_no_chunked_prefill.py",
|
||||
"test_no_overlap_scheduler.py",
|
||||
"test_openai_server.py",
|
||||
"test_penalty.py",
|
||||
"test_pytorch_sampling_backend.py",
|
||||
"test_radix_attention.py",
|
||||
"test_regex_constrained.py",
|
||||
@@ -38,7 +38,8 @@ suites = {
|
||||
"test_request_length_validation.py",
|
||||
"test_retract_decode.py",
|
||||
"test_server_args.py",
|
||||
"test_session_control.py",
|
||||
# Disabled temporarily
|
||||
# "test_session_control.py",
|
||||
"test_skip_tokenizer_init.py",
|
||||
"test_srt_engine.py",
|
||||
"test_srt_endpoint.py",
|
||||
@@ -64,9 +65,6 @@ suites = {
|
||||
# Disable temporarily
|
||||
# "test_nightly_math_eval.py",
|
||||
],
|
||||
"sampling/penaltylib": glob.glob(
|
||||
"sampling/penaltylib/**/test_*.py", recursive=True
|
||||
),
|
||||
}
|
||||
|
||||
# Expand suite
|
||||
@@ -83,7 +81,7 @@ if __name__ == "__main__":
|
||||
arg_parser.add_argument(
|
||||
"--timeout-per-file",
|
||||
type=int,
|
||||
default=2000,
|
||||
default=1800,
|
||||
help="The time limit for running one file in seconds.",
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
|
||||
Reference in New Issue
Block a user