Revert "Add metrics for speculative decoding (acceptance rate, average acceptance length)" (#11433)

This commit is contained in:
Scott Lee
2025-10-10 12:54:57 -07:00
committed by GitHub
parent b4408e6098
commit 55b14656e6
9 changed files with 1 additions and 74 deletions

View File

@@ -631,10 +631,6 @@ class Req:
# This is used to compute the average acceptance length per request.
self.spec_verify_ct = 0
# The number of accepted tokens in speculative decoding for this request.
# This is used to compute the acceptance rate and average acceptance length per request.
self.spec_accepted_tokens = 0
# For metrics
self.metrics_collector = metrics_collector
self.time_stats: TimeStats = TimeStats(disagg_mode=disagg_mode)