Returning a per request metric for number of cached_tokens read (#1599)

This commit is contained in:
havetc
2024-10-16 20:49:22 +02:00
committed by GitHub
parent dbec2f1847
commit ecb8bad276
7 changed files with 245 additions and 3 deletions

View File

@@ -978,6 +978,7 @@ class Scheduler:
"prompt_tokens": len(req.origin_input_ids),
"completion_tokens": len(req.output_ids),
"completion_tokens_wo_jump_forward": req.completion_tokens_wo_jump_forward,
"cached_tokens": req.cached_tokens,
"finish_reason": (
req.finished_reason.to_json()
if req.finished_reason is not None