Improve the computation for time_per_output_token Prometheus metrics (#2674)
This commit is contained in:
@@ -699,6 +699,7 @@ class TokenizerManager:
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
if completion_tokens >= 2:
|
if completion_tokens >= 2:
|
||||||
|
# Compute time_per_output_token for the streaming case
|
||||||
self.metrics_collector.observe_time_per_output_token(
|
self.metrics_collector.observe_time_per_output_token(
|
||||||
(time.time() - state.first_token_time)
|
(time.time() - state.first_token_time)
|
||||||
/ (completion_tokens - 1)
|
/ (completion_tokens - 1)
|
||||||
@@ -714,7 +715,8 @@ class TokenizerManager:
|
|||||||
self.metrics_collector.observe_e2e_request_latency(
|
self.metrics_collector.observe_e2e_request_latency(
|
||||||
time.time() - state.created_time
|
time.time() - state.created_time
|
||||||
)
|
)
|
||||||
if completion_tokens >= 1:
|
# Compute time_per_output_token for the non-streaming case
|
||||||
|
if not state.obj.stream and completion_tokens >= 1:
|
||||||
self.metrics_collector.observe_time_per_output_token(
|
self.metrics_collector.observe_time_per_output_token(
|
||||||
(time.time() - state.created_time)
|
(time.time() - state.created_time)
|
||||||
/ completion_tokens
|
/ completion_tokens
|
||||||
|
|||||||
Reference in New Issue
Block a user