diff --git a/tests/e2e/nightly/single_node/models/configs/Prefix-Cache-DeepSeek-R1-0528-W8A8.yaml b/tests/e2e/nightly/single_node/models/configs/Prefix-Cache-DeepSeek-R1-0528-W8A8.yaml index ddfbcab6..7629a7ff 100644 --- a/tests/e2e/nightly/single_node/models/configs/Prefix-Cache-DeepSeek-R1-0528-W8A8.yaml +++ b/tests/e2e/nightly/single_node/models/configs/Prefix-Cache-DeepSeek-R1-0528-W8A8.yaml @@ -42,7 +42,7 @@ test_cases: - metric: "TTFT" baseline: "prefix0" target: "prefix75" - ratio: 0.8 + ratio: 0.4 operator: "<" benchmarks: warm_up: @@ -51,7 +51,7 @@ test_cases: request_conf: vllm_api_stream_chat dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_str_perf num_prompts: 210 - max_out_len: 2 + max_out_len: 1 batch_size: 1000 baseline: 0 threshold: 0.97 @@ -61,7 +61,7 @@ test_cases: request_conf: vllm_api_stream_chat dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_str_perf num_prompts: 210 - max_out_len: 1500 + max_out_len: 1 batch_size: 18 baseline: 1 threshold: 0.97 @@ -71,7 +71,7 @@ test_cases: request_conf: vllm_api_stream_chat dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_str_perf num_prompts: 210 - max_out_len: 1500 + max_out_len: 1 batch_size: 18 baseline: 1 threshold: 0.97 diff --git a/tests/e2e/nightly/single_node/models/configs/Prefix-Cache-Qwen3-32B-Int8.yaml b/tests/e2e/nightly/single_node/models/configs/Prefix-Cache-Qwen3-32B-Int8.yaml index 6ead3525..d57368dd 100644 --- a/tests/e2e/nightly/single_node/models/configs/Prefix-Cache-Qwen3-32B-Int8.yaml +++ b/tests/e2e/nightly/single_node/models/configs/Prefix-Cache-Qwen3-32B-Int8.yaml @@ -35,7 +35,7 @@ test_cases: - metric: "TTFT" baseline: "prefix0" target: "prefix75" - ratio: 0.8 + ratio: 0.4 operator: "<" benchmarks: warm_up: @@ -44,7 +44,7 @@ test_cases: request_conf: vllm_api_stream_chat dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_str_perf num_prompts: 210 - max_out_len: 2 + max_out_len: 1 batch_size: 1000 baseline: 0 threshold: 0.97 @@ -54,7 +54,7 @@ test_cases: request_conf: vllm_api_stream_chat dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_str_perf num_prompts: 210 - max_out_len: 1500 + max_out_len: 1 batch_size: 48 baseline: 1 threshold: 0.97 @@ -64,7 +64,7 @@ test_cases: request_conf: vllm_api_stream_chat dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_str_perf num_prompts: 210 - max_out_len: 1500 + max_out_len: 1 batch_size: 48 baseline: 1 threshold: 0.97