Fix accept rate in speculative decoding metrics (#11572)

This commit is contained in:
Qiaolin Yu
2025-10-13 16:35:50 -07:00
committed by GitHub
parent 60b0503227
commit 43f80884c5

View File

@@ -259,7 +259,7 @@ class SchedulerMetricsMixin:
)
# Calculate acceptance rate: accepted tokens / total draft tokens
total_draft_tokens = self.spec_num_total_forward_ct * (
self.server_args.speculative_num_steps or 1
(self.server_args.speculative_num_steps or 0) + 1
)
spec_accept_rate = (
self.spec_num_total_accepted_tokens / total_draft_tokens