Fix accept rate in speculative decoding metrics (#11572)
This commit is contained in:
@@ -259,7 +259,7 @@ class SchedulerMetricsMixin:
|
|||||||
)
|
)
|
||||||
# Calculate acceptance rate: accepted tokens / total draft tokens
|
# Calculate acceptance rate: accepted tokens / total draft tokens
|
||||||
total_draft_tokens = self.spec_num_total_forward_ct * (
|
total_draft_tokens = self.spec_num_total_forward_ct * (
|
||||||
self.server_args.speculative_num_steps or 1
|
(self.server_args.speculative_num_steps or 0) + 1
|
||||||
)
|
)
|
||||||
spec_accept_rate = (
|
spec_accept_rate = (
|
||||||
self.spec_num_total_accepted_tokens / total_draft_tokens
|
self.spec_num_total_accepted_tokens / total_draft_tokens
|
||||||
|
|||||||
Reference in New Issue
Block a user