Fix accept rate in speculative decoding metrics (#11572)

This commit is contained in:
Qiaolin Yu
2025-10-13 16:35:50 -07:00
committed by GitHub
parent 60b0503227
commit 43f80884c5

View File

@@ -259,7 +259,7 @@ class SchedulerMetricsMixin:
) )
# Calculate acceptance rate: accepted tokens / total draft tokens # Calculate acceptance rate: accepted tokens / total draft tokens
total_draft_tokens = self.spec_num_total_forward_ct * ( total_draft_tokens = self.spec_num_total_forward_ct * (
self.server_args.speculative_num_steps or 1 (self.server_args.speculative_num_steps or 0) + 1
) )
spec_accept_rate = ( spec_accept_rate = (
self.spec_num_total_accepted_tokens / total_draft_tokens self.spec_num_total_accepted_tokens / total_draft_tokens