Add metrics for speculative decoding (acceptance rate, average acceptance length) (#11144)

This commit is contained in:
Scott Lee
2025-10-10 00:46:44 -07:00
committed by GitHub
parent f19613e6c3
commit 0babd48736
9 changed files with 74 additions and 1 deletions

View File

@@ -631,6 +631,10 @@ class Req:
# This is used to compute the average acceptance length per request.
self.spec_verify_ct = 0
# The number of accepted tokens in speculative decoding for this request.
# This is used to compute the acceptance rate and average acceptance length per request.
self.spec_accepted_tokens = 0
# For metrics
self.metrics_collector = metrics_collector
self.time_stats: TimeStats = TimeStats(disagg_mode=disagg_mode)