Add metrics for speculative decoding (acceptance rate, average acceptance length) (#11441)

This commit is contained in:
Scott Lee
2025-10-13 11:24:27 -07:00
committed by GitHub
parent f4aa78801e
commit b6fb5d7666
9 changed files with 70 additions and 1 deletions

View File

@@ -608,6 +608,10 @@ class Req:
# This is used to compute the average acceptance length per request.
self.spec_verify_ct = 0
# The number of accepted tokens in speculative decoding for this request.
# This is used to compute the acceptance rate and average acceptance length per request.
self.spec_accepted_tokens = 0
# For metrics
self.metrics_collector = metrics_collector
self.time_stats: TimeStats = TimeStats(disagg_mode=disagg_mode)