diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index e12d9cdb4..3196e60cb 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -699,6 +699,7 @@ class TokenizerManager: ) else: if completion_tokens >= 2: + # Compute time_per_output_token for the streaming case self.metrics_collector.observe_time_per_output_token( (time.time() - state.first_token_time) / (completion_tokens - 1) @@ -714,7 +715,8 @@ class TokenizerManager: self.metrics_collector.observe_e2e_request_latency( time.time() - state.created_time ) - if completion_tokens >= 1: + # Compute time_per_output_token for the non-streaming case + if not state.obj.stream and completion_tokens >= 1: self.metrics_collector.observe_time_per_output_token( (time.time() - state.created_time) / completion_tokens