[router] Fix short timeout for the prefill client (#9803)

This commit is contained in:
LukasBluebaum
2025-09-02 04:57:04 +02:00
committed by GitHub
parent 58d06fdc95
commit 9d9fa9a537
4 changed files with 66 additions and 45 deletions

View File

@@ -305,10 +305,10 @@ jobs:
# Set mean thresholds (allowing for reasonable variance)
# These can be adjusted based on your performance requirements
ttft_threshold=2.0 # Max 2.0 seconds for mean TTFT
e2e_latency_threshold=24.0 # Max 8.0 seconds for mean E2E latency
input_throughput_threshold=10000 # Min 9000 tokens/s for mean input throughput
output_throughput_threshold=90 # Min 100 tokens/s for mean output throughput
ttft_threshold=4.7 # Max 4.7 seconds for mean TTFT
e2e_latency_threshold=35.0 # Max 35.0 seconds for mean E2E latency
input_throughput_threshold=12000 # Min 12000 tokens/s for mean input throughput
output_throughput_threshold=68 # Min 68 tokens/s for mean output throughput
# Validate mean thresholds