Returning a per request metric for number of cached_tokens read (#1599)
This commit is contained in:
@@ -978,6 +978,7 @@ class Scheduler:
|
||||
"prompt_tokens": len(req.origin_input_ids),
|
||||
"completion_tokens": len(req.output_ids),
|
||||
"completion_tokens_wo_jump_forward": req.completion_tokens_wo_jump_forward,
|
||||
"cached_tokens": req.cached_tokens,
|
||||
"finish_reason": (
|
||||
req.finished_reason.to_json()
|
||||
if req.finished_reason is not None
|
||||
|
||||
Reference in New Issue
Block a user