{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 500, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 386.0, "completions/max_terminated_length": 386.0, "completions/mean_length": 129.8984375, "completions/mean_terminated_length": 129.8984375, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.0032, "grad_norm": 0.5696932673454285, "learning_rate": 0.0, "loss": 0.0045, "num_tokens": 682196.0, "reward": 0.8896484375, "reward_std": 0.2799686789512634, "rewards/accuracy_reward_conf_tag": 0.4140625, "rewards/format_reward_conf_tag": 0.951171875, "step": 1 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 466.0, "completions/max_terminated_length": 466.0, "completions/mean_length": 135.37890625, "completions/mean_terminated_length": 135.37890625, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.0064, "grad_norm": 0.7087669968605042, "learning_rate": 3.125e-08, "loss": -0.0005, "num_tokens": 1388414.0, "reward": 0.876953125, "reward_std": 0.2851727604866028, "rewards/accuracy_reward_conf_tag": 0.41015625, "rewards/format_reward_conf_tag": 0.93359375, "step": 2 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 400.0, "completions/max_terminated_length": 400.0, "completions/mean_length": 132.919921875, "completions/mean_terminated_length": 132.919921875, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.0096, "grad_norm": 0.6015397906303406, "learning_rate": 6.25e-08, "loss": 0.0152, "num_tokens": 2114917.0, "reward": 0.91796875, "reward_std": 0.2880901098251343, "rewards/accuracy_reward_conf_tag": 0.44140625, "rewards/format_reward_conf_tag": 0.953125, "step": 3 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.0, "completions/max_terminated_length": 321.0, "completions/mean_length": 130.029296875, "completions/mean_terminated_length": 130.029296875, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.0128, "grad_norm": 0.614819347858429, "learning_rate": 9.375e-08, "loss": 0.0048, "num_tokens": 2800780.0, "reward": 0.93359375, "reward_std": 0.3034687042236328, "rewards/accuracy_reward_conf_tag": 0.46484375, "rewards/format_reward_conf_tag": 0.9375, "step": 4 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/max_terminated_length": 478.0, "completions/mean_length": 148.236328125, "completions/mean_terminated_length": 148.236328125, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.016, "grad_norm": 0.4650380313396454, "learning_rate": 1.25e-07, "loss": 0.0037, "num_tokens": 3524981.0, "reward": 0.7470703125, "reward_std": 0.2420598566532135, "rewards/accuracy_reward_conf_tag": 0.28125, "rewards/format_reward_conf_tag": 0.931640625, "step": 5 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 408.0, "completions/max_terminated_length": 408.0, "completions/mean_length": 138.994140625, "completions/mean_terminated_length": 138.994140625, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.0192, "grad_norm": 0.5227891206741333, "learning_rate": 1.5624999999999999e-07, "loss": -0.0016, "num_tokens": 4245562.0, "reward": 0.912109375, "reward_std": 0.29249250888824463, "rewards/accuracy_reward_conf_tag": 0.435546875, "rewards/format_reward_conf_tag": 0.953125, "step": 6 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 480.0, "completions/max_terminated_length": 480.0, "completions/mean_length": 137.193359375, "completions/mean_terminated_length": 137.193359375, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.0224, "grad_norm": 0.4919546842575073, "learning_rate": 1.875e-07, "loss": -0.0021, "num_tokens": 4975437.0, "reward": 0.91015625, "reward_std": 0.2555137276649475, "rewards/accuracy_reward_conf_tag": 0.431640625, "rewards/format_reward_conf_tag": 0.95703125, "step": 7 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 479.0, "completions/max_terminated_length": 479.0, "completions/mean_length": 134.935546875, "completions/mean_terminated_length": 134.935546875, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.0256, "grad_norm": 0.5210925936698914, "learning_rate": 2.1875e-07, "loss": 0.0074, "num_tokens": 5710548.0, "reward": 0.8974609375, "reward_std": 0.2795425057411194, "rewards/accuracy_reward_conf_tag": 0.416015625, "rewards/format_reward_conf_tag": 0.962890625, "step": 8 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 745.0, "completions/max_terminated_length": 745.0, "completions/mean_length": 144.29296875, "completions/mean_terminated_length": 144.29296875, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.0288, "grad_norm": 0.4255554676055908, "learning_rate": 2.5e-07, "loss": 0.0074, "num_tokens": 6401338.0, "reward": 0.826171875, "reward_std": 0.24940608441829681, "rewards/accuracy_reward_conf_tag": 0.34765625, "rewards/format_reward_conf_tag": 0.95703125, "step": 9 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/max_terminated_length": 444.0, "completions/mean_length": 136.876953125, "completions/mean_terminated_length": 136.876953125, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.032, "grad_norm": 0.5149644017219543, "learning_rate": 2.8125e-07, "loss": 0.0045, "num_tokens": 7122283.0, "reward": 0.8369140625, "reward_std": 0.2635266184806824, "rewards/accuracy_reward_conf_tag": 0.357421875, "rewards/format_reward_conf_tag": 0.958984375, "step": 10 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 527.0, "completions/max_terminated_length": 527.0, "completions/mean_length": 147.564453125, "completions/mean_terminated_length": 147.564453125, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.0352, "grad_norm": 0.3749123215675354, "learning_rate": 3.1249999999999997e-07, "loss": 0.0009, "num_tokens": 7844076.0, "reward": 0.7958984375, "reward_std": 0.18583612143993378, "rewards/accuracy_reward_conf_tag": 0.314453125, "rewards/format_reward_conf_tag": 0.962890625, "step": 11 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 489.0, "completions/max_terminated_length": 489.0, "completions/mean_length": 131.255859375, "completions/mean_terminated_length": 131.255859375, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.0384, "grad_norm": 0.43781086802482605, "learning_rate": 3.4375e-07, "loss": 0.0067, "num_tokens": 8555231.0, "reward": 0.88671875, "reward_std": 0.21360914409160614, "rewards/accuracy_reward_conf_tag": 0.40234375, "rewards/format_reward_conf_tag": 0.96875, "step": 12 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 365.0, "completions/max_terminated_length": 365.0, "completions/mean_length": 144.42578125, "completions/mean_terminated_length": 144.42578125, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.0416, "grad_norm": 0.43813973665237427, "learning_rate": 3.75e-07, "loss": 0.0025, "num_tokens": 9294841.0, "reward": 0.826171875, "reward_std": 0.22373488545417786, "rewards/accuracy_reward_conf_tag": 0.3515625, "rewards/format_reward_conf_tag": 0.94921875, "step": 13 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 435.0, "completions/max_terminated_length": 435.0, "completions/mean_length": 146.2109375, "completions/mean_terminated_length": 146.2109375, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.0448, "grad_norm": 0.39122122526168823, "learning_rate": 4.0625e-07, "loss": 0.0013, "num_tokens": 10000757.0, "reward": 0.865234375, "reward_std": 0.1890736222267151, "rewards/accuracy_reward_conf_tag": 0.380859375, "rewards/format_reward_conf_tag": 0.96875, "step": 14 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 812.0, "completions/max_terminated_length": 812.0, "completions/mean_length": 139.2890625, "completions/mean_terminated_length": 139.2890625, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.048, "grad_norm": 0.4133016765117645, "learning_rate": 4.375e-07, "loss": 0.0106, "num_tokens": 10714945.0, "reward": 0.89453125, "reward_std": 0.20083385705947876, "rewards/accuracy_reward_conf_tag": 0.408203125, "rewards/format_reward_conf_tag": 0.97265625, "step": 15 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 451.0, "completions/max_terminated_length": 451.0, "completions/mean_length": 128.763671875, "completions/mean_terminated_length": 128.763671875, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.0512, "grad_norm": 0.45241987705230713, "learning_rate": 4.6874999999999996e-07, "loss": 0.0021, "num_tokens": 11406400.0, "reward": 0.87109375, "reward_std": 0.22898858785629272, "rewards/accuracy_reward_conf_tag": 0.37890625, "rewards/format_reward_conf_tag": 0.984375, "step": 16 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 529.0, "completions/max_terminated_length": 529.0, "completions/mean_length": 135.091796875, "completions/mean_terminated_length": 135.091796875, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "epoch": 0.0544, "grad_norm": 0.43074896931648254, "learning_rate": 5e-07, "loss": 0.003, "num_tokens": 12121991.0, "reward": 0.8603515625, "reward_std": 0.20363333821296692, "rewards/accuracy_reward_conf_tag": 0.376953125, "rewards/format_reward_conf_tag": 0.966796875, "step": 17 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 479.0, "completions/max_terminated_length": 479.0, "completions/mean_length": 133.248046875, "completions/mean_terminated_length": 133.248046875, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.0576, "grad_norm": 0.5622017979621887, "learning_rate": 5.3125e-07, "loss": 0.01, "num_tokens": 12832646.0, "reward": 1.0009765625, "reward_std": 0.2823396921157837, "rewards/accuracy_reward_conf_tag": 0.51171875, "rewards/format_reward_conf_tag": 0.978515625, "step": 18 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 595.0, "completions/max_terminated_length": 595.0, "completions/mean_length": 137.880859375, "completions/mean_terminated_length": 137.880859375, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.0608, "grad_norm": 0.42013996839523315, "learning_rate": 5.625e-07, "loss": 0.0064, "num_tokens": 13554545.0, "reward": 0.9013671875, "reward_std": 0.1799347698688507, "rewards/accuracy_reward_conf_tag": 0.412109375, "rewards/format_reward_conf_tag": 0.978515625, "step": 19 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 572.0, "completions/max_terminated_length": 572.0, "completions/mean_length": 143.6171875, "completions/mean_terminated_length": 143.6171875, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.064, "grad_norm": 0.38607338070869446, "learning_rate": 5.937499999999999e-07, "loss": 0.0026, "num_tokens": 14259629.0, "reward": 0.85546875, "reward_std": 0.1840585172176361, "rewards/accuracy_reward_conf_tag": 0.361328125, "rewards/format_reward_conf_tag": 0.98828125, "step": 20 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 405.0, "completions/max_terminated_length": 405.0, "completions/mean_length": 130.44921875, "completions/mean_terminated_length": 130.44921875, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.0672, "grad_norm": 0.43252310156822205, "learning_rate": 6.249999999999999e-07, "loss": 0.0048, "num_tokens": 14972307.0, "reward": 0.92578125, "reward_std": 0.18132153153419495, "rewards/accuracy_reward_conf_tag": 0.435546875, "rewards/format_reward_conf_tag": 0.98046875, "step": 21 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 409.0, "completions/max_terminated_length": 409.0, "completions/mean_length": 129.318359375, "completions/mean_terminated_length": 129.318359375, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 0.0704, "grad_norm": 0.4275171756744385, "learning_rate": 6.5625e-07, "loss": 0.0071, "num_tokens": 15702902.0, "reward": 0.890625, "reward_std": 0.16756784915924072, "rewards/accuracy_reward_conf_tag": 0.396484375, "rewards/format_reward_conf_tag": 0.98828125, "step": 22 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 379.0, "completions/max_terminated_length": 379.0, "completions/mean_length": 123.634765625, "completions/mean_terminated_length": 123.634765625, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 0.0736, "grad_norm": 0.41762423515319824, "learning_rate": 6.875e-07, "loss": 0.0014, "num_tokens": 16385827.0, "reward": 0.9150390625, "reward_std": 0.15485742688179016, "rewards/accuracy_reward_conf_tag": 0.421875, "rewards/format_reward_conf_tag": 0.986328125, "step": 23 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 630.0, "completions/max_terminated_length": 630.0, "completions/mean_length": 122.8671875, "completions/mean_terminated_length": 122.8671875, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.0768, "grad_norm": 0.32948175072669983, "learning_rate": 7.1875e-07, "loss": 0.0043, "num_tokens": 17064855.0, "reward": 0.9697265625, "reward_std": 0.12152266502380371, "rewards/accuracy_reward_conf_tag": 0.470703125, "rewards/format_reward_conf_tag": 0.998046875, "step": 24 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 657.0, "completions/max_terminated_length": 657.0, "completions/mean_length": 119.166015625, "completions/mean_terminated_length": 119.166015625, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.08, "grad_norm": 0.4611853361129761, "learning_rate": 7.5e-07, "loss": 0.003, "num_tokens": 17780772.0, "reward": 0.96875, "reward_std": 0.2091064751148224, "rewards/accuracy_reward_conf_tag": 0.47265625, "rewards/format_reward_conf_tag": 0.9921875, "step": 25 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 449.0, "completions/max_terminated_length": 449.0, "completions/mean_length": 120.908203125, "completions/mean_terminated_length": 120.908203125, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "epoch": 0.0832, "grad_norm": 0.3272891640663147, "learning_rate": 7.812499999999999e-07, "loss": 0.0073, "num_tokens": 18493589.0, "reward": 0.986328125, "reward_std": 0.12441777437925339, "rewards/accuracy_reward_conf_tag": 0.490234375, "rewards/format_reward_conf_tag": 0.9921875, "step": 26 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 537.0, "completions/max_terminated_length": 537.0, "completions/mean_length": 118.529296875, "completions/mean_terminated_length": 118.529296875, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.0864, "grad_norm": 0.3120933175086975, "learning_rate": 8.125e-07, "loss": -0.001, "num_tokens": 19191684.0, "reward": 0.8515625, "reward_std": 0.11107683926820755, "rewards/accuracy_reward_conf_tag": 0.353515625, "rewards/format_reward_conf_tag": 0.99609375, "step": 27 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 402.0, "completions/max_terminated_length": 402.0, "completions/mean_length": 119.529296875, "completions/mean_terminated_length": 119.529296875, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "epoch": 0.0896, "grad_norm": 0.4711366593837738, "learning_rate": 8.4375e-07, "loss": 0.0021, "num_tokens": 19916331.0, "reward": 0.9736328125, "reward_std": 0.1592046320438385, "rewards/accuracy_reward_conf_tag": 0.478515625, "rewards/format_reward_conf_tag": 0.990234375, "step": 28 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 589.0, "completions/max_terminated_length": 589.0, "completions/mean_length": 119.876953125, "completions/mean_terminated_length": 119.876953125, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 0.0928, "grad_norm": 0.2972983121871948, "learning_rate": 8.75e-07, "loss": 0.0002, "num_tokens": 20612092.0, "reward": 0.9306640625, "reward_std": 0.10890618711709976, "rewards/accuracy_reward_conf_tag": 0.43359375, "rewards/format_reward_conf_tag": 0.994140625, "step": 29 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 451.0, "completions/max_terminated_length": 451.0, "completions/mean_length": 121.0, "completions/mean_terminated_length": 121.0, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.096, "grad_norm": 0.4076586067676544, "learning_rate": 9.0625e-07, "loss": -0.0047, "num_tokens": 21316700.0, "reward": 0.869140625, "reward_std": 0.16637209057807922, "rewards/accuracy_reward_conf_tag": 0.37109375, "rewards/format_reward_conf_tag": 0.99609375, "step": 30 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 437.0, "completions/max_terminated_length": 437.0, "completions/mean_length": 113.517578125, "completions/mean_terminated_length": 113.517578125, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "epoch": 0.0992, "grad_norm": 0.5173690915107727, "learning_rate": 9.374999999999999e-07, "loss": 0.0033, "num_tokens": 22020325.0, "reward": 1.0048828125, "reward_std": 0.1903141289949417, "rewards/accuracy_reward_conf_tag": 0.509765625, "rewards/format_reward_conf_tag": 0.990234375, "step": 31 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 358.0, "completions/max_terminated_length": 358.0, "completions/mean_length": 121.724609375, "completions/mean_terminated_length": 121.724609375, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.1024, "grad_norm": 0.43424108624458313, "learning_rate": 9.6875e-07, "loss": -0.0056, "num_tokens": 22742224.0, "reward": 0.9140625, "reward_std": 0.1641375869512558, "rewards/accuracy_reward_conf_tag": 0.416015625, "rewards/format_reward_conf_tag": 0.99609375, "step": 32 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 349.0, "completions/max_terminated_length": 349.0, "completions/mean_length": 110.896484375, "completions/mean_terminated_length": 111.1135025024414, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.1056, "grad_norm": 0.4005340337753296, "learning_rate": 1e-06, "loss": -0.0024, "num_tokens": 23456827.0, "reward": 0.908203125, "reward_std": 0.14249923825263977, "rewards/accuracy_reward_conf_tag": 0.41015625, "rewards/format_reward_conf_tag": 0.99609375, "step": 33 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 336.0, "completions/max_terminated_length": 336.0, "completions/mean_length": 113.3359375, "completions/mean_terminated_length": 113.3359375, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.1088, "grad_norm": 0.37346094846725464, "learning_rate": 9.9644128113879e-07, "loss": 0.0002, "num_tokens": 24170503.0, "reward": 0.990234375, "reward_std": 0.1363212764263153, "rewards/accuracy_reward_conf_tag": 0.490234375, "rewards/format_reward_conf_tag": 1.0, "step": 34 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 396.0, "completions/max_terminated_length": 396.0, "completions/mean_length": 112.53515625, "completions/mean_terminated_length": 112.53515625, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 0.112, "grad_norm": 0.40560030937194824, "learning_rate": 9.9288256227758e-07, "loss": 0.0034, "num_tokens": 24854665.0, "reward": 1.0517578125, "reward_std": 0.15736976265907288, "rewards/accuracy_reward_conf_tag": 0.552734375, "rewards/format_reward_conf_tag": 0.998046875, "step": 35 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 342.0, "completions/max_terminated_length": 342.0, "completions/mean_length": 109.083984375, "completions/mean_terminated_length": 109.083984375, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.1152, "grad_norm": 0.37966471910476685, "learning_rate": 9.8932384341637e-07, "loss": 0.0025, "num_tokens": 25537916.0, "reward": 0.98828125, "reward_std": 0.11993881314992905, "rewards/accuracy_reward_conf_tag": 0.48828125, "rewards/format_reward_conf_tag": 1.0, "step": 36 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 384.0, "completions/max_terminated_length": 384.0, "completions/mean_length": 114.1640625, "completions/mean_terminated_length": 114.1640625, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.1184, "grad_norm": 0.49092215299606323, "learning_rate": 9.8576512455516e-07, "loss": 0.0001, "num_tokens": 26229336.0, "reward": 0.87890625, "reward_std": 0.1713615357875824, "rewards/accuracy_reward_conf_tag": 0.380859375, "rewards/format_reward_conf_tag": 0.99609375, "step": 37 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 342.0, "completions/max_terminated_length": 342.0, "completions/mean_length": 111.259765625, "completions/mean_terminated_length": 111.259765625, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "epoch": 0.1216, "grad_norm": 0.3609483242034912, "learning_rate": 9.8220640569395e-07, "loss": -0.004, "num_tokens": 26935821.0, "reward": 0.96484375, "reward_std": 0.12743577361106873, "rewards/accuracy_reward_conf_tag": 0.46484375, "rewards/format_reward_conf_tag": 1.0, "step": 38 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 539.0, "completions/max_terminated_length": 539.0, "completions/mean_length": 108.09765625, "completions/mean_terminated_length": 108.09765625, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 0.1248, "grad_norm": 0.37006741762161255, "learning_rate": 9.786476868327401e-07, "loss": -0.0013, "num_tokens": 27632351.0, "reward": 0.9599609375, "reward_std": 0.14255574345588684, "rewards/accuracy_reward_conf_tag": 0.4609375, "rewards/format_reward_conf_tag": 0.998046875, "step": 39 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 326.0, "completions/max_terminated_length": 326.0, "completions/mean_length": 111.908203125, "completions/mean_terminated_length": 111.908203125, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.128, "grad_norm": 0.33325719833374023, "learning_rate": 9.750889679715302e-07, "loss": -0.002, "num_tokens": 28335424.0, "reward": 0.892578125, "reward_std": 0.08929628133773804, "rewards/accuracy_reward_conf_tag": 0.392578125, "rewards/format_reward_conf_tag": 1.0, "step": 40 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 416.0, "completions/max_terminated_length": 416.0, "completions/mean_length": 105.275390625, "completions/mean_terminated_length": 105.275390625, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 0.1312, "grad_norm": 0.38754379749298096, "learning_rate": 9.715302491103202e-07, "loss": -0.001, "num_tokens": 29036165.0, "reward": 0.9609375, "reward_std": 0.12428481131792068, "rewards/accuracy_reward_conf_tag": 0.4609375, "rewards/format_reward_conf_tag": 1.0, "step": 41 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.0, "completions/max_terminated_length": 301.0, "completions/mean_length": 103.38671875, "completions/mean_terminated_length": 103.38671875, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 0.1344, "grad_norm": 0.5092437267303467, "learning_rate": 9.679715302491102e-07, "loss": 0.0012, "num_tokens": 29740691.0, "reward": 1.0556640625, "reward_std": 0.12526994943618774, "rewards/accuracy_reward_conf_tag": 0.556640625, "rewards/format_reward_conf_tag": 0.998046875, "step": 42 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 373.0, "completions/max_terminated_length": 373.0, "completions/mean_length": 103.85546875, "completions/mean_terminated_length": 103.85546875, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.1376, "grad_norm": 0.3068692088127136, "learning_rate": 9.644128113879002e-07, "loss": 0.0016, "num_tokens": 30422817.0, "reward": 1.0537109375, "reward_std": 0.08325093984603882, "rewards/accuracy_reward_conf_tag": 0.5546875, "rewards/format_reward_conf_tag": 0.998046875, "step": 43 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.0, "completions/max_terminated_length": 267.0, "completions/mean_length": 104.0390625, "completions/mean_terminated_length": 104.0390625, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.1408, "grad_norm": 0.4194898009300232, "learning_rate": 9.608540925266903e-07, "loss": -0.0007, "num_tokens": 31110461.0, "reward": 1.1123046875, "reward_std": 0.1408482789993286, "rewards/accuracy_reward_conf_tag": 0.61328125, "rewards/format_reward_conf_tag": 0.998046875, "step": 44 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 490.0, "completions/max_terminated_length": 490.0, "completions/mean_length": 110.890625, "completions/mean_terminated_length": 110.890625, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "epoch": 0.144, "grad_norm": 0.35951900482177734, "learning_rate": 9.572953736654805e-07, "loss": -0.0015, "num_tokens": 31810749.0, "reward": 0.966796875, "reward_std": 0.12099841982126236, "rewards/accuracy_reward_conf_tag": 0.466796875, "rewards/format_reward_conf_tag": 1.0, "step": 45 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.0, "completions/max_terminated_length": 322.0, "completions/mean_length": 107.833984375, "completions/mean_terminated_length": 107.833984375, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.1472, "grad_norm": 0.30960193276405334, "learning_rate": 9.537366548042705e-07, "loss": 0.0026, "num_tokens": 32494312.0, "reward": 1.046875, "reward_std": 0.10974523425102234, "rewards/accuracy_reward_conf_tag": 0.546875, "rewards/format_reward_conf_tag": 1.0, "step": 46 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 494.0, "completions/max_terminated_length": 494.0, "completions/mean_length": 112.703125, "completions/mean_terminated_length": 112.92367553710938, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.1504, "grad_norm": 0.40593746304512024, "learning_rate": 9.501779359430605e-07, "loss": -0.0002, "num_tokens": 33188664.0, "reward": 1.0126953125, "reward_std": 0.1391420215368271, "rewards/accuracy_reward_conf_tag": 0.513671875, "rewards/format_reward_conf_tag": 0.998046875, "step": 47 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 348.0, "completions/max_terminated_length": 348.0, "completions/mean_length": 106.701171875, "completions/mean_terminated_length": 106.701171875, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.1536, "grad_norm": 0.34867721796035767, "learning_rate": 9.466192170818504e-07, "loss": -0.0004, "num_tokens": 33907967.0, "reward": 1.02734375, "reward_std": 0.12585808336734772, "rewards/accuracy_reward_conf_tag": 0.52734375, "rewards/format_reward_conf_tag": 1.0, "step": 48 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 814.0, "completions/max_terminated_length": 814.0, "completions/mean_length": 111.453125, "completions/mean_terminated_length": 111.453125, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 0.1568, "grad_norm": 0.39540815353393555, "learning_rate": 9.430604982206405e-07, "loss": 0.0047, "num_tokens": 34588639.0, "reward": 0.9755859375, "reward_std": 0.12697330117225647, "rewards/accuracy_reward_conf_tag": 0.4765625, "rewards/format_reward_conf_tag": 0.998046875, "step": 49 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 420.0, "completions/max_terminated_length": 420.0, "completions/mean_length": 108.625, "completions/mean_terminated_length": 108.625, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.16, "grad_norm": 0.4082591235637665, "learning_rate": 9.395017793594306e-07, "loss": 0.0064, "num_tokens": 35313863.0, "reward": 0.962890625, "reward_std": 0.13952839374542236, "rewards/accuracy_reward_conf_tag": 0.462890625, "rewards/format_reward_conf_tag": 1.0, "step": 50 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 372.0, "completions/max_terminated_length": 372.0, "completions/mean_length": 101.4765625, "completions/mean_terminated_length": 101.4765625, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.1632, "grad_norm": 0.32683131098747253, "learning_rate": 9.359430604982206e-07, "loss": -0.0007, "num_tokens": 36018587.0, "reward": 0.943359375, "reward_std": 0.08364099264144897, "rewards/accuracy_reward_conf_tag": 0.443359375, "rewards/format_reward_conf_tag": 1.0, "step": 51 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 760.0, "completions/max_terminated_length": 760.0, "completions/mean_length": 102.732421875, "completions/mean_terminated_length": 102.732421875, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.1664, "grad_norm": 0.3419206738471985, "learning_rate": 9.323843416370106e-07, "loss": -0.0001, "num_tokens": 36726514.0, "reward": 1.025390625, "reward_std": 0.1361870914697647, "rewards/accuracy_reward_conf_tag": 0.525390625, "rewards/format_reward_conf_tag": 1.0, "step": 52 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 388.0, "completions/max_terminated_length": 388.0, "completions/mean_length": 104.595703125, "completions/mean_terminated_length": 104.8003921508789, "completions/min_length": 0.0, "completions/min_terminated_length": 38.0, "epoch": 0.1696, "grad_norm": 0.35515135526657104, "learning_rate": 9.288256227758006e-07, "loss": 0.001, "num_tokens": 37450499.0, "reward": 1.0068359375, "reward_std": 0.11343596875667572, "rewards/accuracy_reward_conf_tag": 0.5078125, "rewards/format_reward_conf_tag": 0.998046875, "step": 53 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/max_terminated_length": 444.0, "completions/mean_length": 112.248046875, "completions/mean_terminated_length": 112.248046875, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.1728, "grad_norm": 0.3525088131427765, "learning_rate": 9.252669039145908e-07, "loss": 0.0058, "num_tokens": 38165586.0, "reward": 0.947265625, "reward_std": 0.13717183470726013, "rewards/accuracy_reward_conf_tag": 0.447265625, "rewards/format_reward_conf_tag": 1.0, "step": 54 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 385.0, "completions/max_terminated_length": 385.0, "completions/mean_length": 105.18359375, "completions/mean_terminated_length": 105.18359375, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.176, "grad_norm": 0.285494863986969, "learning_rate": 9.217081850533808e-07, "loss": -0.0009, "num_tokens": 38860288.0, "reward": 0.98046875, "reward_std": 0.10272009670734406, "rewards/accuracy_reward_conf_tag": 0.48046875, "rewards/format_reward_conf_tag": 1.0, "step": 55 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 401.0, "completions/max_terminated_length": 401.0, "completions/mean_length": 108.349609375, "completions/mean_terminated_length": 108.349609375, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.1792, "grad_norm": 0.3341920077800751, "learning_rate": 9.181494661921708e-07, "loss": 0.0054, "num_tokens": 39569003.0, "reward": 0.931640625, "reward_std": 0.1034957766532898, "rewards/accuracy_reward_conf_tag": 0.431640625, "rewards/format_reward_conf_tag": 1.0, "step": 56 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 319.0, "completions/max_terminated_length": 319.0, "completions/mean_length": 98.427734375, "completions/mean_terminated_length": 98.427734375, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "epoch": 0.1824, "grad_norm": 0.41299957036972046, "learning_rate": 9.145907473309609e-07, "loss": 0.002, "num_tokens": 40239166.0, "reward": 1.072265625, "reward_std": 0.10074299573898315, "rewards/accuracy_reward_conf_tag": 0.572265625, "rewards/format_reward_conf_tag": 1.0, "step": 57 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 375.0, "completions/max_terminated_length": 375.0, "completions/mean_length": 101.177734375, "completions/mean_terminated_length": 101.177734375, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.1856, "grad_norm": 0.3476005494594574, "learning_rate": 9.110320284697508e-07, "loss": 0.0007, "num_tokens": 40938969.0, "reward": 1.107421875, "reward_std": 0.10843471437692642, "rewards/accuracy_reward_conf_tag": 0.607421875, "rewards/format_reward_conf_tag": 1.0, "step": 58 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 375.0, "completions/max_terminated_length": 375.0, "completions/mean_length": 101.630859375, "completions/mean_terminated_length": 101.630859375, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.1888, "grad_norm": 0.39399948716163635, "learning_rate": 9.074733096085408e-07, "loss": -0.0007, "num_tokens": 41616580.0, "reward": 1.017578125, "reward_std": 0.15452474355697632, "rewards/accuracy_reward_conf_tag": 0.517578125, "rewards/format_reward_conf_tag": 1.0, "step": 59 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.0, "completions/max_terminated_length": 301.0, "completions/mean_length": 100.8046875, "completions/mean_terminated_length": 100.8046875, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.192, "grad_norm": 0.43161484599113464, "learning_rate": 9.03914590747331e-07, "loss": -0.004, "num_tokens": 42293008.0, "reward": 0.951171875, "reward_std": 0.12973544001579285, "rewards/accuracy_reward_conf_tag": 0.451171875, "rewards/format_reward_conf_tag": 1.0, "step": 60 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 317.0, "completions/max_terminated_length": 317.0, "completions/mean_length": 111.44140625, "completions/mean_terminated_length": 111.44140625, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.1952, "grad_norm": 0.36474984884262085, "learning_rate": 9.00355871886121e-07, "loss": 0.003, "num_tokens": 42975466.0, "reward": 1.0517578125, "reward_std": 0.13342617452144623, "rewards/accuracy_reward_conf_tag": 0.552734375, "rewards/format_reward_conf_tag": 0.998046875, "step": 61 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 345.0, "completions/max_terminated_length": 345.0, "completions/mean_length": 99.87109375, "completions/mean_terminated_length": 99.87109375, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 0.1984, "grad_norm": 0.36876848340034485, "learning_rate": 8.96797153024911e-07, "loss": -0.0061, "num_tokens": 43673400.0, "reward": 1.0546875, "reward_std": 0.12619948387145996, "rewards/accuracy_reward_conf_tag": 0.556640625, "rewards/format_reward_conf_tag": 0.99609375, "step": 62 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 543.0, "completions/max_terminated_length": 543.0, "completions/mean_length": 110.521484375, "completions/mean_terminated_length": 110.521484375, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.2016, "grad_norm": 0.38113224506378174, "learning_rate": 8.93238434163701e-07, "loss": 0.0041, "num_tokens": 44388315.0, "reward": 1.1005859375, "reward_std": 0.11744188517332077, "rewards/accuracy_reward_conf_tag": 0.6015625, "rewards/format_reward_conf_tag": 0.998046875, "step": 63 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 292.0, "completions/max_terminated_length": 292.0, "completions/mean_length": 103.083984375, "completions/mean_terminated_length": 103.083984375, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.2048, "grad_norm": 0.3483692705631256, "learning_rate": 8.896797153024911e-07, "loss": 0.0019, "num_tokens": 45101342.0, "reward": 1.0, "reward_std": 0.11651946604251862, "rewards/accuracy_reward_conf_tag": 0.5, "rewards/format_reward_conf_tag": 1.0, "step": 64 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 687.0, "completions/max_terminated_length": 687.0, "completions/mean_length": 107.66796875, "completions/mean_terminated_length": 107.66796875, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.208, "grad_norm": 0.26865509152412415, "learning_rate": 8.861209964412811e-07, "loss": 0.0003, "num_tokens": 45791420.0, "reward": 1.041015625, "reward_std": 0.07397978007793427, "rewards/accuracy_reward_conf_tag": 0.541015625, "rewards/format_reward_conf_tag": 1.0, "step": 65 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 502.0, "completions/max_terminated_length": 502.0, "completions/mean_length": 109.1171875, "completions/mean_terminated_length": 109.1171875, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.2112, "grad_norm": 0.29685088992118835, "learning_rate": 8.825622775800712e-07, "loss": -0.0006, "num_tokens": 46502144.0, "reward": 0.96875, "reward_std": 0.08534969389438629, "rewards/accuracy_reward_conf_tag": 0.46875, "rewards/format_reward_conf_tag": 1.0, "step": 66 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 387.0, "completions/max_terminated_length": 387.0, "completions/mean_length": 103.14453125, "completions/mean_terminated_length": 103.14453125, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "epoch": 0.2144, "grad_norm": 0.39966320991516113, "learning_rate": 8.790035587188612e-07, "loss": 0.0024, "num_tokens": 47189898.0, "reward": 0.919921875, "reward_std": 0.12855593860149384, "rewards/accuracy_reward_conf_tag": 0.419921875, "rewards/format_reward_conf_tag": 1.0, "step": 67 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 563.0, "completions/max_terminated_length": 563.0, "completions/mean_length": 118.955078125, "completions/mean_terminated_length": 119.1878662109375, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.2176, "grad_norm": 0.290499746799469, "learning_rate": 8.754448398576512e-07, "loss": -0.004, "num_tokens": 47905347.0, "reward": 1.0126953125, "reward_std": 0.08002512156963348, "rewards/accuracy_reward_conf_tag": 0.513671875, "rewards/format_reward_conf_tag": 0.998046875, "step": 68 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.0, "completions/max_terminated_length": 290.0, "completions/mean_length": 104.892578125, "completions/mean_terminated_length": 104.892578125, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 0.2208, "grad_norm": 0.3273511230945587, "learning_rate": 8.718861209964412e-07, "loss": 0.0007, "num_tokens": 48617268.0, "reward": 0.98046875, "reward_std": 0.11211731284856796, "rewards/accuracy_reward_conf_tag": 0.48046875, "rewards/format_reward_conf_tag": 1.0, "step": 69 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 527.0, "completions/max_terminated_length": 527.0, "completions/mean_length": 113.2890625, "completions/mean_terminated_length": 113.2890625, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.224, "grad_norm": 0.4415186643600464, "learning_rate": 8.683274021352312e-07, "loss": -0.0019, "num_tokens": 49328640.0, "reward": 0.9208984375, "reward_std": 0.09035773575305939, "rewards/accuracy_reward_conf_tag": 0.421875, "rewards/format_reward_conf_tag": 0.998046875, "step": 70 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 340.0, "completions/max_terminated_length": 340.0, "completions/mean_length": 115.138671875, "completions/mean_terminated_length": 115.138671875, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.2272, "grad_norm": 0.29847803711891174, "learning_rate": 8.647686832740213e-07, "loss": 0.0031, "num_tokens": 50057927.0, "reward": 1.080078125, "reward_std": 0.12132295966148376, "rewards/accuracy_reward_conf_tag": 0.580078125, "rewards/format_reward_conf_tag": 1.0, "step": 71 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 420.0, "completions/max_terminated_length": 420.0, "completions/mean_length": 114.884765625, "completions/mean_terminated_length": 114.884765625, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.2304, "grad_norm": 0.2995069622993469, "learning_rate": 8.612099644128114e-07, "loss": 0.0008, "num_tokens": 50776676.0, "reward": 1.072265625, "reward_std": 0.11225028336048126, "rewards/accuracy_reward_conf_tag": 0.572265625, "rewards/format_reward_conf_tag": 1.0, "step": 72 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.0, "completions/max_terminated_length": 293.0, "completions/mean_length": 107.51953125, "completions/mean_terminated_length": 107.51953125, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.2336, "grad_norm": 0.3123959004878998, "learning_rate": 8.576512455516014e-07, "loss": -0.0031, "num_tokens": 51501750.0, "reward": 1.1025390625, "reward_std": 0.10646820068359375, "rewards/accuracy_reward_conf_tag": 0.603515625, "rewards/format_reward_conf_tag": 0.998046875, "step": 73 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 391.0, "completions/max_terminated_length": 391.0, "completions/mean_length": 109.5078125, "completions/mean_terminated_length": 109.5078125, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.2368, "grad_norm": 0.36578795313835144, "learning_rate": 8.540925266903915e-07, "loss": -0.0032, "num_tokens": 52199714.0, "reward": 1.1796875, "reward_std": 0.11783071607351303, "rewards/accuracy_reward_conf_tag": 0.6796875, "rewards/format_reward_conf_tag": 1.0, "step": 74 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.0, "completions/max_terminated_length": 309.0, "completions/mean_length": 112.310546875, "completions/mean_terminated_length": 112.310546875, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 0.24, "grad_norm": 0.3437058925628662, "learning_rate": 8.505338078291815e-07, "loss": 0.0002, "num_tokens": 52895609.0, "reward": 0.953125, "reward_std": 0.1145455539226532, "rewards/accuracy_reward_conf_tag": 0.453125, "rewards/format_reward_conf_tag": 1.0, "step": 75 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/max_terminated_length": 459.0, "completions/mean_length": 112.302734375, "completions/mean_terminated_length": 112.302734375, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.2432, "grad_norm": 0.3406142592430115, "learning_rate": 8.469750889679715e-07, "loss": -0.0012, "num_tokens": 53584540.0, "reward": 0.96484375, "reward_std": 0.11579868942499161, "rewards/accuracy_reward_conf_tag": 0.46484375, "rewards/format_reward_conf_tag": 1.0, "step": 76 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 362.0, "completions/max_terminated_length": 362.0, "completions/mean_length": 110.951171875, "completions/mean_terminated_length": 110.951171875, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.2464, "grad_norm": 0.35264548659324646, "learning_rate": 8.434163701067614e-07, "loss": 0.002, "num_tokens": 54289955.0, "reward": 1.07421875, "reward_std": 0.1356578916311264, "rewards/accuracy_reward_conf_tag": 0.57421875, "rewards/format_reward_conf_tag": 1.0, "step": 77 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/max_terminated_length": 427.0, "completions/mean_length": 113.669921875, "completions/mean_terminated_length": 113.669921875, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.2496, "grad_norm": 0.3438767194747925, "learning_rate": 8.398576512455516e-07, "loss": 0.0021, "num_tokens": 55001058.0, "reward": 1.052734375, "reward_std": 0.12118876725435257, "rewards/accuracy_reward_conf_tag": 0.552734375, "rewards/format_reward_conf_tag": 1.0, "step": 78 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 325.0, "completions/max_terminated_length": 325.0, "completions/mean_length": 116.921875, "completions/mean_terminated_length": 116.921875, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.2528, "grad_norm": 0.33218762278556824, "learning_rate": 8.362989323843416e-07, "loss": 0.0009, "num_tokens": 55718834.0, "reward": 1.02734375, "reward_std": 0.08588011562824249, "rewards/accuracy_reward_conf_tag": 0.52734375, "rewards/format_reward_conf_tag": 1.0, "step": 79 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 498.0, "completions/max_terminated_length": 498.0, "completions/mean_length": 114.783203125, "completions/mean_terminated_length": 114.783203125, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.256, "grad_norm": 0.24106380343437195, "learning_rate": 8.327402135231316e-07, "loss": -0.003, "num_tokens": 56418067.0, "reward": 0.990234375, "reward_std": 0.09127214550971985, "rewards/accuracy_reward_conf_tag": 0.490234375, "rewards/format_reward_conf_tag": 1.0, "step": 80 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 508.0, "completions/max_terminated_length": 508.0, "completions/mean_length": 131.59765625, "completions/mean_terminated_length": 131.59765625, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.2592, "grad_norm": 0.34052160382270813, "learning_rate": 8.291814946619217e-07, "loss": -0.0007, "num_tokens": 57118517.0, "reward": 0.9453125, "reward_std": 0.12986400723457336, "rewards/accuracy_reward_conf_tag": 0.4453125, "rewards/format_reward_conf_tag": 1.0, "step": 81 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 336.0, "completions/max_terminated_length": 336.0, "completions/mean_length": 114.978515625, "completions/mean_terminated_length": 114.978515625, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.2624, "grad_norm": 0.32460305094718933, "learning_rate": 8.256227758007117e-07, "loss": -0.0002, "num_tokens": 57827042.0, "reward": 1.15234375, "reward_std": 0.12519346177577972, "rewards/accuracy_reward_conf_tag": 0.65234375, "rewards/format_reward_conf_tag": 1.0, "step": 82 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 334.0, "completions/max_terminated_length": 334.0, "completions/mean_length": 116.74609375, "completions/mean_terminated_length": 116.74609375, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.2656, "grad_norm": 0.3671474754810333, "learning_rate": 8.220640569395017e-07, "loss": -0.0018, "num_tokens": 58522448.0, "reward": 1.07421875, "reward_std": 0.10915425419807434, "rewards/accuracy_reward_conf_tag": 0.57421875, "rewards/format_reward_conf_tag": 1.0, "step": 83 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 405.0, "completions/max_terminated_length": 405.0, "completions/mean_length": 124.150390625, "completions/mean_terminated_length": 124.150390625, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.2688, "grad_norm": 0.261443555355072, "learning_rate": 8.185053380782919e-07, "loss": -0.0004, "num_tokens": 59245277.0, "reward": 0.923828125, "reward_std": 0.09087396413087845, "rewards/accuracy_reward_conf_tag": 0.423828125, "rewards/format_reward_conf_tag": 1.0, "step": 84 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 424.0, "completions/max_terminated_length": 424.0, "completions/mean_length": 129.166015625, "completions/mean_terminated_length": 129.166015625, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.272, "grad_norm": 0.23352903127670288, "learning_rate": 8.149466192170819e-07, "loss": -0.0007, "num_tokens": 59942858.0, "reward": 0.951171875, "reward_std": 0.08916649222373962, "rewards/accuracy_reward_conf_tag": 0.451171875, "rewards/format_reward_conf_tag": 1.0, "step": 85 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 396.0, "completions/max_terminated_length": 396.0, "completions/mean_length": 124.97265625, "completions/mean_terminated_length": 124.97265625, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.2752, "grad_norm": 0.4312765300273895, "learning_rate": 8.113879003558719e-07, "loss": 0.0022, "num_tokens": 60664276.0, "reward": 1.00390625, "reward_std": 0.12960247695446014, "rewards/accuracy_reward_conf_tag": 0.50390625, "rewards/format_reward_conf_tag": 1.0, "step": 86 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 480.0, "completions/max_terminated_length": 480.0, "completions/mean_length": 124.25390625, "completions/mean_terminated_length": 124.25390625, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.2784, "grad_norm": 0.23223963379859924, "learning_rate": 8.078291814946618e-07, "loss": -0.0012, "num_tokens": 61382414.0, "reward": 0.923828125, "reward_std": 0.07910466194152832, "rewards/accuracy_reward_conf_tag": 0.423828125, "rewards/format_reward_conf_tag": 1.0, "step": 87 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 405.0, "completions/max_terminated_length": 405.0, "completions/mean_length": 120.853515625, "completions/mean_terminated_length": 120.853515625, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.2816, "grad_norm": 0.38976651430130005, "learning_rate": 8.042704626334519e-07, "loss": -0.003, "num_tokens": 62066867.0, "reward": 1.0517578125, "reward_std": 0.14316235482692719, "rewards/accuracy_reward_conf_tag": 0.552734375, "rewards/format_reward_conf_tag": 0.998046875, "step": 88 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 435.0, "completions/max_terminated_length": 435.0, "completions/mean_length": 123.83203125, "completions/mean_terminated_length": 123.83203125, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.2848, "grad_norm": 0.30516958236694336, "learning_rate": 8.007117437722419e-07, "loss": 0.0042, "num_tokens": 62771789.0, "reward": 0.9892578125, "reward_std": 0.1184254065155983, "rewards/accuracy_reward_conf_tag": 0.490234375, "rewards/format_reward_conf_tag": 0.998046875, "step": 89 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 613.0, "completions/max_terminated_length": 613.0, "completions/mean_length": 126.091796875, "completions/mean_terminated_length": 126.091796875, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.288, "grad_norm": 0.35033246874809265, "learning_rate": 7.97153024911032e-07, "loss": 0.0012, "num_tokens": 63467484.0, "reward": 1.052734375, "reward_std": 0.13197088241577148, "rewards/accuracy_reward_conf_tag": 0.552734375, "rewards/format_reward_conf_tag": 1.0, "step": 90 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 377.0, "completions/max_terminated_length": 377.0, "completions/mean_length": 123.984375, "completions/mean_terminated_length": 123.984375, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.2912, "grad_norm": 0.27110937237739563, "learning_rate": 7.935943060498221e-07, "loss": -0.0001, "num_tokens": 64168980.0, "reward": 1.0458984375, "reward_std": 0.1114620566368103, "rewards/accuracy_reward_conf_tag": 0.546875, "rewards/format_reward_conf_tag": 0.998046875, "step": 91 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 711.0, "completions/max_terminated_length": 711.0, "completions/mean_length": 128.50390625, "completions/mean_terminated_length": 128.50390625, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.2944, "grad_norm": 0.2403380125761032, "learning_rate": 7.900355871886121e-07, "loss": 0.0039, "num_tokens": 64868038.0, "reward": 1.015625, "reward_std": 0.0864686369895935, "rewards/accuracy_reward_conf_tag": 0.515625, "rewards/format_reward_conf_tag": 1.0, "step": 92 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 317.0, "completions/max_terminated_length": 317.0, "completions/mean_length": 123.880859375, "completions/mean_terminated_length": 123.880859375, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.2976, "grad_norm": 0.29576191306114197, "learning_rate": 7.864768683274021e-07, "loss": 0.0007, "num_tokens": 65568777.0, "reward": 1.048828125, "reward_std": 0.10218648612499237, "rewards/accuracy_reward_conf_tag": 0.548828125, "rewards/format_reward_conf_tag": 1.0, "step": 93 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 533.0, "completions/max_terminated_length": 533.0, "completions/mean_length": 122.111328125, "completions/mean_terminated_length": 122.111328125, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.3008, "grad_norm": 0.39196375012397766, "learning_rate": 7.829181494661921e-07, "loss": 0.0013, "num_tokens": 66287642.0, "reward": 1.03125, "reward_std": 0.12815654277801514, "rewards/accuracy_reward_conf_tag": 0.53125, "rewards/format_reward_conf_tag": 1.0, "step": 94 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 506.0, "completions/max_terminated_length": 506.0, "completions/mean_length": 119.0078125, "completions/mean_terminated_length": 119.0078125, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.304, "grad_norm": 0.30568569898605347, "learning_rate": 7.793594306049822e-07, "loss": -0.0015, "num_tokens": 66983814.0, "reward": 0.99609375, "reward_std": 0.09231622517108917, "rewards/accuracy_reward_conf_tag": 0.49609375, "rewards/format_reward_conf_tag": 1.0, "step": 95 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 420.0, "completions/max_terminated_length": 420.0, "completions/mean_length": 122.529296875, "completions/mean_terminated_length": 122.529296875, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 0.3072, "grad_norm": 0.25298038125038147, "learning_rate": 7.758007117437722e-07, "loss": 0.0025, "num_tokens": 67703637.0, "reward": 0.876953125, "reward_std": 0.07963507622480392, "rewards/accuracy_reward_conf_tag": 0.376953125, "rewards/format_reward_conf_tag": 1.0, "step": 96 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.0, "completions/max_terminated_length": 321.0, "completions/mean_length": 125.6484375, "completions/mean_terminated_length": 125.6484375, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.3104, "grad_norm": 0.3075512647628784, "learning_rate": 7.722419928825622e-07, "loss": 0.0018, "num_tokens": 68426505.0, "reward": 1.044921875, "reward_std": 0.12158694118261337, "rewards/accuracy_reward_conf_tag": 0.544921875, "rewards/format_reward_conf_tag": 1.0, "step": 97 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 327.0, "completions/max_terminated_length": 327.0, "completions/mean_length": 119.001953125, "completions/mean_terminated_length": 119.001953125, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.3136, "grad_norm": 0.27927401661872864, "learning_rate": 7.686832740213523e-07, "loss": 0.0008, "num_tokens": 69136106.0, "reward": 1.13671875, "reward_std": 0.1054728776216507, "rewards/accuracy_reward_conf_tag": 0.63671875, "rewards/format_reward_conf_tag": 1.0, "step": 98 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 643.0, "completions/max_terminated_length": 643.0, "completions/mean_length": 124.619140625, "completions/mean_terminated_length": 124.619140625, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.3168, "grad_norm": 0.32262176275253296, "learning_rate": 7.651245551601423e-07, "loss": 0.0011, "num_tokens": 69851343.0, "reward": 0.9853515625, "reward_std": 0.13401469588279724, "rewards/accuracy_reward_conf_tag": 0.486328125, "rewards/format_reward_conf_tag": 0.998046875, "step": 99 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 415.0, "completions/max_terminated_length": 415.0, "completions/mean_length": 121.791015625, "completions/mean_terminated_length": 121.791015625, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.32, "grad_norm": 0.23345372080802917, "learning_rate": 7.615658362989323e-07, "loss": 0.0062, "num_tokens": 70537860.0, "reward": 1.0, "reward_std": 0.08404040336608887, "rewards/accuracy_reward_conf_tag": 0.5, "rewards/format_reward_conf_tag": 1.0, "step": 100 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 470.0, "completions/max_terminated_length": 470.0, "completions/mean_length": 123.078125, "completions/mean_terminated_length": 123.078125, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.3232, "grad_norm": 0.3626199960708618, "learning_rate": 7.580071174377223e-07, "loss": 0.0054, "num_tokens": 71219380.0, "reward": 0.8740234375, "reward_std": 0.13244062662124634, "rewards/accuracy_reward_conf_tag": 0.375, "rewards/format_reward_conf_tag": 0.998046875, "step": 101 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 440.0, "completions/max_terminated_length": 440.0, "completions/mean_length": 125.478515625, "completions/mean_terminated_length": 125.478515625, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.3264, "grad_norm": 0.32900869846343994, "learning_rate": 7.544483985765125e-07, "loss": -0.0028, "num_tokens": 71921705.0, "reward": 1.0595703125, "reward_std": 0.14519304037094116, "rewards/accuracy_reward_conf_tag": 0.560546875, "rewards/format_reward_conf_tag": 0.998046875, "step": 102 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 739.0, "completions/max_terminated_length": 739.0, "completions/mean_length": 129.78515625, "completions/mean_terminated_length": 129.78515625, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.3296, "grad_norm": 0.27870070934295654, "learning_rate": 7.508896797153025e-07, "loss": -0.0008, "num_tokens": 72607955.0, "reward": 1.0859375, "reward_std": 0.10784495621919632, "rewards/accuracy_reward_conf_tag": 0.5859375, "rewards/format_reward_conf_tag": 1.0, "step": 103 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 342.0, "completions/max_terminated_length": 342.0, "completions/mean_length": 119.208984375, "completions/mean_terminated_length": 119.44226837158203, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.3328, "grad_norm": 0.3086647093296051, "learning_rate": 7.473309608540925e-07, "loss": -0.0011, "num_tokens": 73312158.0, "reward": 1.0751953125, "reward_std": 0.11778206378221512, "rewards/accuracy_reward_conf_tag": 0.576171875, "rewards/format_reward_conf_tag": 0.998046875, "step": 104 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 383.0, "completions/max_terminated_length": 383.0, "completions/mean_length": 126.595703125, "completions/mean_terminated_length": 126.595703125, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.336, "grad_norm": 0.24830111861228943, "learning_rate": 7.437722419928826e-07, "loss": -0.0003, "num_tokens": 74005687.0, "reward": 1.0390625, "reward_std": 0.11961427330970764, "rewards/accuracy_reward_conf_tag": 0.5390625, "rewards/format_reward_conf_tag": 1.0, "step": 105 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 458.0, "completions/max_terminated_length": 458.0, "completions/mean_length": 128.09375, "completions/mean_terminated_length": 128.09375, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.3392, "grad_norm": 0.36403143405914307, "learning_rate": 7.402135231316725e-07, "loss": -0.0005, "num_tokens": 74718207.0, "reward": 0.970703125, "reward_std": 0.10586786270141602, "rewards/accuracy_reward_conf_tag": 0.470703125, "rewards/format_reward_conf_tag": 1.0, "step": 106 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 462.0, "completions/max_terminated_length": 462.0, "completions/mean_length": 127.37890625, "completions/mean_terminated_length": 127.37890625, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.3424, "grad_norm": 0.3605183959007263, "learning_rate": 7.366548042704625e-07, "loss": 0.0076, "num_tokens": 75440481.0, "reward": 0.982421875, "reward_std": 0.14683744311332703, "rewards/accuracy_reward_conf_tag": 0.482421875, "rewards/format_reward_conf_tag": 1.0, "step": 107 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 483.0, "completions/max_terminated_length": 483.0, "completions/mean_length": 119.15625, "completions/mean_terminated_length": 119.15625, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.3456, "grad_norm": 0.36166146397590637, "learning_rate": 7.330960854092527e-07, "loss": -0.0064, "num_tokens": 76160809.0, "reward": 0.9990234375, "reward_std": 0.1018570065498352, "rewards/accuracy_reward_conf_tag": 0.5, "rewards/format_reward_conf_tag": 0.998046875, "step": 108 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 417.0, "completions/max_terminated_length": 417.0, "completions/mean_length": 128.896484375, "completions/mean_terminated_length": 128.896484375, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.3488, "grad_norm": 0.1903688907623291, "learning_rate": 7.295373665480427e-07, "loss": 0.0003, "num_tokens": 76886116.0, "reward": 0.96484375, "reward_std": 0.05852591618895531, "rewards/accuracy_reward_conf_tag": 0.46484375, "rewards/format_reward_conf_tag": 1.0, "step": 109 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 525.0, "completions/max_terminated_length": 525.0, "completions/mean_length": 127.50390625, "completions/mean_terminated_length": 127.50390625, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.352, "grad_norm": 0.2527593970298767, "learning_rate": 7.259786476868327e-07, "loss": 0.0003, "num_tokens": 77611566.0, "reward": 0.998046875, "reward_std": 0.0958084836602211, "rewards/accuracy_reward_conf_tag": 0.498046875, "rewards/format_reward_conf_tag": 1.0, "step": 110 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 396.0, "completions/max_terminated_length": 396.0, "completions/mean_length": 122.361328125, "completions/mean_terminated_length": 122.361328125, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.3552, "grad_norm": 0.3307143449783325, "learning_rate": 7.224199288256227e-07, "loss": 0.0033, "num_tokens": 78295487.0, "reward": 1.076171875, "reward_std": 0.09804637730121613, "rewards/accuracy_reward_conf_tag": 0.576171875, "rewards/format_reward_conf_tag": 1.0, "step": 111 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 386.0, "completions/max_terminated_length": 386.0, "completions/mean_length": 120.083984375, "completions/mean_terminated_length": 120.083984375, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.3584, "grad_norm": 0.27053558826446533, "learning_rate": 7.188612099644128e-07, "loss": 0.0036, "num_tokens": 79002450.0, "reward": 1.0, "reward_std": 0.08529354631900787, "rewards/accuracy_reward_conf_tag": 0.5, "rewards/format_reward_conf_tag": 1.0, "step": 112 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 288.0, "completions/max_terminated_length": 288.0, "completions/mean_length": 113.77734375, "completions/mean_terminated_length": 113.77734375, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.3616, "grad_norm": 0.3072631061077118, "learning_rate": 7.153024911032028e-07, "loss": 0.0008, "num_tokens": 79721328.0, "reward": 1.068359375, "reward_std": 0.10514955222606659, "rewards/accuracy_reward_conf_tag": 0.568359375, "rewards/format_reward_conf_tag": 1.0, "step": 113 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 571.0, "completions/max_terminated_length": 571.0, "completions/mean_length": 131.81640625, "completions/mean_terminated_length": 131.81640625, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.3648, "grad_norm": 0.2604560852050781, "learning_rate": 7.117437722419929e-07, "loss": 0.0, "num_tokens": 80445730.0, "reward": 0.91796875, "reward_std": 0.08462892472743988, "rewards/accuracy_reward_conf_tag": 0.41796875, "rewards/format_reward_conf_tag": 1.0, "step": 114 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 556.0, "completions/max_terminated_length": 556.0, "completions/mean_length": 120.966796875, "completions/mean_terminated_length": 120.966796875, "completions/min_length": 55.0, "completions/min_terminated_length": 55.0, "epoch": 0.368, "grad_norm": 0.2620708644390106, "learning_rate": 7.08185053380783e-07, "loss": -0.0027, "num_tokens": 81157393.0, "reward": 1.029296875, "reward_std": 0.08475994318723679, "rewards/accuracy_reward_conf_tag": 0.529296875, "rewards/format_reward_conf_tag": 1.0, "step": 115 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 351.0, "completions/max_terminated_length": 351.0, "completions/mean_length": 116.03125, "completions/mean_terminated_length": 116.03125, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.3712, "grad_norm": 0.3374411165714264, "learning_rate": 7.046263345195729e-07, "loss": 0.0031, "num_tokens": 81862697.0, "reward": 1.0546875, "reward_std": 0.11638721823692322, "rewards/accuracy_reward_conf_tag": 0.5546875, "rewards/format_reward_conf_tag": 1.0, "step": 116 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.0, "completions/max_terminated_length": 284.0, "completions/mean_length": 115.44140625, "completions/mean_terminated_length": 115.44140625, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.3744, "grad_norm": 0.2924691438674927, "learning_rate": 7.010676156583629e-07, "loss": -0.0026, "num_tokens": 82538027.0, "reward": 0.978515625, "reward_std": 0.0666126236319542, "rewards/accuracy_reward_conf_tag": 0.478515625, "rewards/format_reward_conf_tag": 1.0, "step": 117 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 719.0, "completions/max_terminated_length": 719.0, "completions/mean_length": 118.435546875, "completions/mean_terminated_length": 118.435546875, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 0.3776, "grad_norm": 0.3892248868942261, "learning_rate": 6.975088967971529e-07, "loss": 0.0001, "num_tokens": 83243658.0, "reward": 1.001953125, "reward_std": 0.09284786880016327, "rewards/accuracy_reward_conf_tag": 0.501953125, "rewards/format_reward_conf_tag": 1.0, "step": 118 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 405.0, "completions/max_terminated_length": 405.0, "completions/mean_length": 126.115234375, "completions/mean_terminated_length": 126.115234375, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.3808, "grad_norm": 0.34575212001800537, "learning_rate": 6.93950177935943e-07, "loss": -0.0026, "num_tokens": 83942717.0, "reward": 0.974609375, "reward_std": 0.12388540059328079, "rewards/accuracy_reward_conf_tag": 0.474609375, "rewards/format_reward_conf_tag": 1.0, "step": 119 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 371.0, "completions/max_terminated_length": 371.0, "completions/mean_length": 120.318359375, "completions/mean_terminated_length": 120.318359375, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.384, "grad_norm": 0.3228378891944885, "learning_rate": 6.903914590747331e-07, "loss": -0.0019, "num_tokens": 84644472.0, "reward": 1.0615234375, "reward_std": 0.11856082826852798, "rewards/accuracy_reward_conf_tag": 0.5625, "rewards/format_reward_conf_tag": 0.998046875, "step": 120 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 381.0, "completions/max_terminated_length": 381.0, "completions/mean_length": 115.396484375, "completions/mean_terminated_length": 115.396484375, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.3872, "grad_norm": 0.3080546259880066, "learning_rate": 6.868327402135231e-07, "loss": 0.0042, "num_tokens": 85346659.0, "reward": 1.0322265625, "reward_std": 0.09166219830513, "rewards/accuracy_reward_conf_tag": 0.533203125, "rewards/format_reward_conf_tag": 0.998046875, "step": 121 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 557.0, "completions/max_terminated_length": 557.0, "completions/mean_length": 123.498046875, "completions/mean_terminated_length": 123.498046875, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.3904, "grad_norm": 0.2807541489601135, "learning_rate": 6.832740213523132e-07, "loss": 0.0019, "num_tokens": 86033194.0, "reward": 1.119140625, "reward_std": 0.11750739067792892, "rewards/accuracy_reward_conf_tag": 0.619140625, "rewards/format_reward_conf_tag": 1.0, "step": 122 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 381.0, "completions/max_terminated_length": 381.0, "completions/mean_length": 119.154296875, "completions/mean_terminated_length": 119.154296875, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.3936, "grad_norm": 0.2932218313217163, "learning_rate": 6.797153024911032e-07, "loss": -0.0024, "num_tokens": 86743521.0, "reward": 0.982421875, "reward_std": 0.12789133191108704, "rewards/accuracy_reward_conf_tag": 0.482421875, "rewards/format_reward_conf_tag": 1.0, "step": 123 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 451.0, "completions/max_terminated_length": 451.0, "completions/mean_length": 111.0, "completions/mean_terminated_length": 111.0, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.3968, "grad_norm": 0.2224765568971634, "learning_rate": 6.761565836298932e-07, "loss": 0.0008, "num_tokens": 87461897.0, "reward": 0.962890625, "reward_std": 0.09001900255680084, "rewards/accuracy_reward_conf_tag": 0.462890625, "rewards/format_reward_conf_tag": 1.0, "step": 124 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 352.0, "completions/max_terminated_length": 352.0, "completions/mean_length": 118.009765625, "completions/mean_terminated_length": 118.009765625, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.4, "grad_norm": 0.3143448233604431, "learning_rate": 6.725978647686833e-07, "loss": 0.0015, "num_tokens": 88171806.0, "reward": 1.1005859375, "reward_std": 0.0854901671409607, "rewards/accuracy_reward_conf_tag": 0.6015625, "rewards/format_reward_conf_tag": 0.998046875, "step": 125 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 370.0, "completions/max_terminated_length": 370.0, "completions/mean_length": 127.052734375, "completions/mean_terminated_length": 127.052734375, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.4032, "grad_norm": 0.2967285215854645, "learning_rate": 6.690391459074733e-07, "loss": -0.0034, "num_tokens": 88882945.0, "reward": 0.95703125, "reward_std": 0.12855716049671173, "rewards/accuracy_reward_conf_tag": 0.45703125, "rewards/format_reward_conf_tag": 1.0, "step": 126 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 361.0, "completions/max_terminated_length": 361.0, "completions/mean_length": 112.880859375, "completions/mean_terminated_length": 112.880859375, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "epoch": 0.4064, "grad_norm": 0.29222628474235535, "learning_rate": 6.654804270462633e-07, "loss": 0.0005, "num_tokens": 89552300.0, "reward": 1.01953125, "reward_std": 0.10645762085914612, "rewards/accuracy_reward_conf_tag": 0.51953125, "rewards/format_reward_conf_tag": 1.0, "step": 127 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 740.0, "completions/max_terminated_length": 740.0, "completions/mean_length": 110.9296875, "completions/mean_terminated_length": 110.9296875, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.4096, "grad_norm": 0.3010176420211792, "learning_rate": 6.619217081850533e-07, "loss": 0.003, "num_tokens": 90253248.0, "reward": 1.0439453125, "reward_std": 0.08535781502723694, "rewards/accuracy_reward_conf_tag": 0.544921875, "rewards/format_reward_conf_tag": 0.998046875, "step": 128 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 396.0, "completions/max_terminated_length": 396.0, "completions/mean_length": 116.814453125, "completions/mean_terminated_length": 116.814453125, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.4128, "grad_norm": 0.3324480950832367, "learning_rate": 6.583629893238434e-07, "loss": 0.0007, "num_tokens": 90963753.0, "reward": 0.994140625, "reward_std": 0.14064979553222656, "rewards/accuracy_reward_conf_tag": 0.494140625, "rewards/format_reward_conf_tag": 1.0, "step": 129 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 509.0, "completions/max_terminated_length": 509.0, "completions/mean_length": 120.736328125, "completions/mean_terminated_length": 120.736328125, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.416, "grad_norm": 0.3121265172958374, "learning_rate": 6.548042704626334e-07, "loss": 0.0041, "num_tokens": 91660994.0, "reward": 1.0234375, "reward_std": 0.11435520648956299, "rewards/accuracy_reward_conf_tag": 0.5234375, "rewards/format_reward_conf_tag": 1.0, "step": 130 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/max_terminated_length": 444.0, "completions/mean_length": 115.466796875, "completions/mean_terminated_length": 115.466796875, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.4192, "grad_norm": 0.20846891403198242, "learning_rate": 6.512455516014234e-07, "loss": -0.0025, "num_tokens": 92371857.0, "reward": 1.083984375, "reward_std": 0.07213812321424484, "rewards/accuracy_reward_conf_tag": 0.583984375, "rewards/format_reward_conf_tag": 1.0, "step": 131 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 483.0, "completions/max_terminated_length": 483.0, "completions/mean_length": 122.80859375, "completions/mean_terminated_length": 122.80859375, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.4224, "grad_norm": 0.3403347432613373, "learning_rate": 6.476868327402136e-07, "loss": 0.0003, "num_tokens": 93079431.0, "reward": 0.892578125, "reward_std": 0.12572510540485382, "rewards/accuracy_reward_conf_tag": 0.392578125, "rewards/format_reward_conf_tag": 1.0, "step": 132 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 468.0, "completions/max_terminated_length": 468.0, "completions/mean_length": 118.029296875, "completions/mean_terminated_length": 118.029296875, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.4256, "grad_norm": 0.32223713397979736, "learning_rate": 6.441281138790036e-07, "loss": 0.0029, "num_tokens": 93788366.0, "reward": 1.150390625, "reward_std": 0.1244158148765564, "rewards/accuracy_reward_conf_tag": 0.650390625, "rewards/format_reward_conf_tag": 1.0, "step": 133 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 333.0, "completions/max_terminated_length": 333.0, "completions/mean_length": 114.587890625, "completions/mean_terminated_length": 114.587890625, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 0.4288, "grad_norm": 0.2855245769023895, "learning_rate": 6.405693950177936e-07, "loss": -0.0027, "num_tokens": 94470443.0, "reward": 0.9609375, "reward_std": 0.08022482693195343, "rewards/accuracy_reward_conf_tag": 0.4609375, "rewards/format_reward_conf_tag": 1.0, "step": 134 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 404.0, "completions/max_terminated_length": 404.0, "completions/mean_length": 115.17578125, "completions/mean_terminated_length": 115.17578125, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.432, "grad_norm": 0.28281596302986145, "learning_rate": 6.370106761565835e-07, "loss": 0.0042, "num_tokens": 95182293.0, "reward": 1.037109375, "reward_std": 0.11684277653694153, "rewards/accuracy_reward_conf_tag": 0.537109375, "rewards/format_reward_conf_tag": 1.0, "step": 135 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 442.0, "completions/max_terminated_length": 442.0, "completions/mean_length": 119.787109375, "completions/mean_terminated_length": 119.787109375, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.4352, "grad_norm": 0.2585252523422241, "learning_rate": 6.334519572953736e-07, "loss": 0.0017, "num_tokens": 95877848.0, "reward": 0.9501953125, "reward_std": 0.0925920158624649, "rewards/accuracy_reward_conf_tag": 0.451171875, "rewards/format_reward_conf_tag": 0.998046875, "step": 136 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 667.0, "completions/max_terminated_length": 667.0, "completions/mean_length": 117.841796875, "completions/mean_terminated_length": 117.841796875, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.4384, "grad_norm": 0.3071497976779938, "learning_rate": 6.298932384341636e-07, "loss": 0.0005, "num_tokens": 96571727.0, "reward": 1.009765625, "reward_std": 0.09994859993457794, "rewards/accuracy_reward_conf_tag": 0.509765625, "rewards/format_reward_conf_tag": 1.0, "step": 137 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 394.0, "completions/max_terminated_length": 394.0, "completions/mean_length": 120.087890625, "completions/mean_terminated_length": 120.087890625, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.4416, "grad_norm": 0.30834370851516724, "learning_rate": 6.263345195729537e-07, "loss": -0.0003, "num_tokens": 97261036.0, "reward": 1.076171875, "reward_std": 0.12316463887691498, "rewards/accuracy_reward_conf_tag": 0.576171875, "rewards/format_reward_conf_tag": 1.0, "step": 138 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 566.0, "completions/max_terminated_length": 566.0, "completions/mean_length": 120.49609375, "completions/mean_terminated_length": 120.49609375, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "epoch": 0.4448, "grad_norm": 0.3317796289920807, "learning_rate": 6.227758007117438e-07, "loss": 0.0068, "num_tokens": 97985682.0, "reward": 1.0029296875, "reward_std": 0.09929457306861877, "rewards/accuracy_reward_conf_tag": 0.50390625, "rewards/format_reward_conf_tag": 0.998046875, "step": 139 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 338.0, "completions/max_terminated_length": 338.0, "completions/mean_length": 115.91015625, "completions/mean_terminated_length": 115.91015625, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.448, "grad_norm": 0.30667245388031006, "learning_rate": 6.192170818505338e-07, "loss": -0.0021, "num_tokens": 98692340.0, "reward": 1.064453125, "reward_std": 0.11494496464729309, "rewards/accuracy_reward_conf_tag": 0.564453125, "rewards/format_reward_conf_tag": 1.0, "step": 140 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 506.0, "completions/max_terminated_length": 506.0, "completions/mean_length": 120.390625, "completions/mean_terminated_length": 120.390625, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.4512, "grad_norm": 0.2922412157058716, "learning_rate": 6.156583629893238e-07, "loss": 0.0067, "num_tokens": 99419076.0, "reward": 1.078125, "reward_std": 0.115932896733284, "rewards/accuracy_reward_conf_tag": 0.578125, "rewards/format_reward_conf_tag": 1.0, "step": 141 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1022.0, "completions/max_terminated_length": 1022.0, "completions/mean_length": 121.181640625, "completions/mean_terminated_length": 121.181640625, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.4544, "grad_norm": 0.2767346501350403, "learning_rate": 6.120996441281139e-07, "loss": 0.0063, "num_tokens": 100136025.0, "reward": 0.951171875, "reward_std": 0.09436499327421188, "rewards/accuracy_reward_conf_tag": 0.451171875, "rewards/format_reward_conf_tag": 1.0, "step": 142 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 978.0, "completions/max_terminated_length": 978.0, "completions/mean_length": 135.03515625, "completions/mean_terminated_length": 135.03515625, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.4576, "grad_norm": 0.19604112207889557, "learning_rate": 6.085409252669039e-07, "loss": 0.0, "num_tokens": 100862955.0, "reward": 0.9580078125, "reward_std": 0.06812161952257156, "rewards/accuracy_reward_conf_tag": 0.458984375, "rewards/format_reward_conf_tag": 0.998046875, "step": 143 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 589.0, "completions/max_terminated_length": 589.0, "completions/mean_length": 132.37890625, "completions/mean_terminated_length": 132.37890625, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.4608, "grad_norm": 0.2899148464202881, "learning_rate": 6.04982206405694e-07, "loss": 0.004, "num_tokens": 101580701.0, "reward": 0.837890625, "reward_std": 0.10290726274251938, "rewards/accuracy_reward_conf_tag": 0.337890625, "rewards/format_reward_conf_tag": 1.0, "step": 144 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 790.0, "completions/max_terminated_length": 790.0, "completions/mean_length": 129.275390625, "completions/mean_terminated_length": 129.275390625, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.464, "grad_norm": 0.19214464724063873, "learning_rate": 6.014234875444839e-07, "loss": 0.0004, "num_tokens": 102279466.0, "reward": 0.9375, "reward_std": 0.05596347898244858, "rewards/accuracy_reward_conf_tag": 0.4375, "rewards/format_reward_conf_tag": 1.0, "step": 145 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 396.0, "completions/max_terminated_length": 396.0, "completions/mean_length": 133.865234375, "completions/mean_terminated_length": 133.865234375, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.4672, "grad_norm": 0.2788802683353424, "learning_rate": 5.97864768683274e-07, "loss": 0.0023, "num_tokens": 102992461.0, "reward": 1.013671875, "reward_std": 0.08035779744386673, "rewards/accuracy_reward_conf_tag": 0.513671875, "rewards/format_reward_conf_tag": 1.0, "step": 146 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 513.0, "completions/max_terminated_length": 513.0, "completions/mean_length": 134.048828125, "completions/mean_terminated_length": 134.048828125, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.4704, "grad_norm": 0.3049945533275604, "learning_rate": 5.94306049822064e-07, "loss": -0.0014, "num_tokens": 103722622.0, "reward": 1.005859375, "reward_std": 0.11737515777349472, "rewards/accuracy_reward_conf_tag": 0.505859375, "rewards/format_reward_conf_tag": 1.0, "step": 147 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/max_terminated_length": 427.0, "completions/mean_length": 131.533203125, "completions/mean_terminated_length": 131.533203125, "completions/min_length": 64.0, "completions/min_terminated_length": 64.0, "epoch": 0.4736, "grad_norm": 0.24999631941318512, "learning_rate": 5.90747330960854e-07, "loss": 0.0002, "num_tokens": 104435783.0, "reward": 0.958984375, "reward_std": 0.08745656907558441, "rewards/accuracy_reward_conf_tag": 0.458984375, "rewards/format_reward_conf_tag": 1.0, "step": 148 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 656.0, "completions/max_terminated_length": 656.0, "completions/mean_length": 134.099609375, "completions/mean_terminated_length": 134.099609375, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.4768, "grad_norm": 0.2993578016757965, "learning_rate": 5.871886120996441e-07, "loss": 0.0044, "num_tokens": 105146954.0, "reward": 1.052734375, "reward_std": 0.11027441918849945, "rewards/accuracy_reward_conf_tag": 0.552734375, "rewards/format_reward_conf_tag": 1.0, "step": 149 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 577.0, "completions/max_terminated_length": 577.0, "completions/mean_length": 141.923828125, "completions/mean_terminated_length": 141.923828125, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.48, "grad_norm": 0.29732728004455566, "learning_rate": 5.836298932384342e-07, "loss": -0.0001, "num_tokens": 105854939.0, "reward": 1.060546875, "reward_std": 0.1720854938030243, "rewards/accuracy_reward_conf_tag": 0.560546875, "rewards/format_reward_conf_tag": 1.0, "step": 150 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 495.0, "completions/max_terminated_length": 495.0, "completions/mean_length": 129.712890625, "completions/mean_terminated_length": 129.712890625, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.4832, "grad_norm": 0.3430553674697876, "learning_rate": 5.800711743772242e-07, "loss": 0.001, "num_tokens": 106583016.0, "reward": 1.0087890625, "reward_std": 0.14236661791801453, "rewards/accuracy_reward_conf_tag": 0.509765625, "rewards/format_reward_conf_tag": 0.998046875, "step": 151 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 526.0, "completions/max_terminated_length": 526.0, "completions/mean_length": 127.662109375, "completions/mean_terminated_length": 127.662109375, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 0.4864, "grad_norm": 0.20804554224014282, "learning_rate": 5.765124555160142e-07, "loss": 0.0015, "num_tokens": 107295859.0, "reward": 1.0498046875, "reward_std": 0.0739220678806305, "rewards/accuracy_reward_conf_tag": 0.55078125, "rewards/format_reward_conf_tag": 0.998046875, "step": 152 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 653.0, "completions/max_terminated_length": 653.0, "completions/mean_length": 133.904296875, "completions/mean_terminated_length": 133.904296875, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 0.4896, "grad_norm": 0.4418400228023529, "learning_rate": 5.729537366548043e-07, "loss": -0.0003, "num_tokens": 108020442.0, "reward": 1.041015625, "reward_std": 0.12079255282878876, "rewards/accuracy_reward_conf_tag": 0.541015625, "rewards/format_reward_conf_tag": 1.0, "step": 153 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 492.0, "completions/max_terminated_length": 492.0, "completions/mean_length": 120.732421875, "completions/mean_terminated_length": 120.732421875, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.4928, "grad_norm": 0.26306527853012085, "learning_rate": 5.693950177935943e-07, "loss": -0.0045, "num_tokens": 108763561.0, "reward": 1.0341796875, "reward_std": 0.08357548713684082, "rewards/accuracy_reward_conf_tag": 0.53515625, "rewards/format_reward_conf_tag": 0.998046875, "step": 154 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 368.0, "completions/max_terminated_length": 368.0, "completions/mean_length": 122.984375, "completions/mean_terminated_length": 122.984375, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.496, "grad_norm": 0.32889747619628906, "learning_rate": 5.658362989323842e-07, "loss": 0.0017, "num_tokens": 109474705.0, "reward": 1.013671875, "reward_std": 0.12204372882843018, "rewards/accuracy_reward_conf_tag": 0.513671875, "rewards/format_reward_conf_tag": 1.0, "step": 155 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1081.0, "completions/max_terminated_length": 1081.0, "completions/mean_length": 123.974609375, "completions/mean_terminated_length": 123.974609375, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.4992, "grad_norm": 0.37933510541915894, "learning_rate": 5.622775800711744e-07, "loss": 0.0018, "num_tokens": 110193772.0, "reward": 1.0966796875, "reward_std": 0.10429303348064423, "rewards/accuracy_reward_conf_tag": 0.59765625, "rewards/format_reward_conf_tag": 0.998046875, "step": 156 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 570.0, "completions/max_terminated_length": 570.0, "completions/mean_length": 116.595703125, "completions/mean_terminated_length": 116.8238754272461, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.5024, "grad_norm": 0.2643898129463196, "learning_rate": 5.587188612099644e-07, "loss": 0.001, "num_tokens": 110892333.0, "reward": 1.0615234375, "reward_std": 0.12381990253925323, "rewards/accuracy_reward_conf_tag": 0.5625, "rewards/format_reward_conf_tag": 0.998046875, "step": 157 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 622.0, "completions/max_terminated_length": 622.0, "completions/mean_length": 123.662109375, "completions/mean_terminated_length": 123.662109375, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.5056, "grad_norm": 0.32417362928390503, "learning_rate": 5.551601423487544e-07, "loss": -0.0011, "num_tokens": 111618800.0, "reward": 1.009765625, "reward_std": 0.10362998396158218, "rewards/accuracy_reward_conf_tag": 0.509765625, "rewards/format_reward_conf_tag": 1.0, "step": 158 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 482.0, "completions/max_terminated_length": 482.0, "completions/mean_length": 124.189453125, "completions/mean_terminated_length": 124.189453125, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.5088, "grad_norm": 0.3062315881252289, "learning_rate": 5.516014234875445e-07, "loss": -0.0009, "num_tokens": 112338761.0, "reward": 1.046875, "reward_std": 0.08956344425678253, "rewards/accuracy_reward_conf_tag": 0.546875, "rewards/format_reward_conf_tag": 1.0, "step": 159 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.0, "completions/max_terminated_length": 266.0, "completions/mean_length": 112.80859375, "completions/mean_terminated_length": 112.80859375, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.512, "grad_norm": 0.3602045178413391, "learning_rate": 5.480427046263345e-07, "loss": 0.0026, "num_tokens": 113036599.0, "reward": 1.0224609375, "reward_std": 0.13203760981559753, "rewards/accuracy_reward_conf_tag": 0.5234375, "rewards/format_reward_conf_tag": 0.998046875, "step": 160 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 964.0, "completions/max_terminated_length": 964.0, "completions/mean_length": 121.591796875, "completions/mean_terminated_length": 121.591796875, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.5152, "grad_norm": 0.36861321330070496, "learning_rate": 5.444839857651245e-07, "loss": -0.0005, "num_tokens": 113745438.0, "reward": 1.033203125, "reward_std": 0.11691641062498093, "rewards/accuracy_reward_conf_tag": 0.533203125, "rewards/format_reward_conf_tag": 1.0, "step": 161 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 345.0, "completions/max_terminated_length": 345.0, "completions/mean_length": 113.275390625, "completions/mean_terminated_length": 113.275390625, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.5184, "grad_norm": 0.21255213022232056, "learning_rate": 5.409252669039146e-07, "loss": 0.0005, "num_tokens": 114444123.0, "reward": 1.041015625, "reward_std": 0.05102773755788803, "rewards/accuracy_reward_conf_tag": 0.541015625, "rewards/format_reward_conf_tag": 1.0, "step": 162 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 823.0, "completions/max_terminated_length": 823.0, "completions/mean_length": 119.599609375, "completions/mean_terminated_length": 119.599609375, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.5216, "grad_norm": 0.17109361290931702, "learning_rate": 5.373665480427047e-07, "loss": 0.0029, "num_tokens": 115140918.0, "reward": 0.923828125, "reward_std": 0.05444513261318207, "rewards/accuracy_reward_conf_tag": 0.423828125, "rewards/format_reward_conf_tag": 1.0, "step": 163 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 426.0, "completions/max_terminated_length": 426.0, "completions/mean_length": 114.478515625, "completions/mean_terminated_length": 114.478515625, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.5248, "grad_norm": 0.2568250298500061, "learning_rate": 5.338078291814946e-07, "loss": -0.0014, "num_tokens": 115846435.0, "reward": 1.126953125, "reward_std": 0.08982865512371063, "rewards/accuracy_reward_conf_tag": 0.626953125, "rewards/format_reward_conf_tag": 1.0, "step": 164 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 764.0, "completions/max_terminated_length": 764.0, "completions/mean_length": 118.228515625, "completions/mean_terminated_length": 118.228515625, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "epoch": 0.528, "grad_norm": 0.29473572969436646, "learning_rate": 5.302491103202846e-07, "loss": -0.0012, "num_tokens": 116562272.0, "reward": 1.001953125, "reward_std": 0.07752697169780731, "rewards/accuracy_reward_conf_tag": 0.501953125, "rewards/format_reward_conf_tag": 1.0, "step": 165 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 466.0, "completions/max_terminated_length": 466.0, "completions/mean_length": 114.53125, "completions/mean_terminated_length": 114.53125, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 0.5312, "grad_norm": 0.3796415328979492, "learning_rate": 5.266903914590747e-07, "loss": -0.0038, "num_tokens": 117266864.0, "reward": 1.06640625, "reward_std": 0.15682196617126465, "rewards/accuracy_reward_conf_tag": 0.56640625, "rewards/format_reward_conf_tag": 1.0, "step": 166 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 445.0, "completions/max_terminated_length": 445.0, "completions/mean_length": 115.1171875, "completions/mean_terminated_length": 115.1171875, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.5344, "grad_norm": 0.3149721324443817, "learning_rate": 5.231316725978647e-07, "loss": 0.0031, "num_tokens": 117977604.0, "reward": 1.119140625, "reward_std": 0.11040420830249786, "rewards/accuracy_reward_conf_tag": 0.619140625, "rewards/format_reward_conf_tag": 1.0, "step": 167 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 409.0, "completions/max_terminated_length": 409.0, "completions/mean_length": 118.15625, "completions/mean_terminated_length": 118.15625, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 0.5376, "grad_norm": 0.34061557054519653, "learning_rate": 5.195729537366548e-07, "loss": -0.0016, "num_tokens": 118671964.0, "reward": 1.048828125, "reward_std": 0.12014345824718475, "rewards/accuracy_reward_conf_tag": 0.548828125, "rewards/format_reward_conf_tag": 1.0, "step": 168 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 463.0, "completions/max_terminated_length": 463.0, "completions/mean_length": 116.755859375, "completions/mean_terminated_length": 116.755859375, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.5408, "grad_norm": 0.2851979434490204, "learning_rate": 5.160142348754448e-07, "loss": -0.0004, "num_tokens": 119399271.0, "reward": 1.0703125, "reward_std": 0.07483351975679398, "rewards/accuracy_reward_conf_tag": 0.5703125, "rewards/format_reward_conf_tag": 1.0, "step": 169 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 594.0, "completions/max_terminated_length": 594.0, "completions/mean_length": 114.17578125, "completions/mean_terminated_length": 114.39921569824219, "completions/min_length": 0.0, "completions/min_terminated_length": 41.0, "epoch": 0.544, "grad_norm": 0.5101816058158875, "learning_rate": 5.124555160142349e-07, "loss": -0.0, "num_tokens": 120117137.0, "reward": 1.0439453125, "reward_std": 0.07975868880748749, "rewards/accuracy_reward_conf_tag": 0.544921875, "rewards/format_reward_conf_tag": 0.998046875, "step": 170 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1287.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 118.01953125, "completions/mean_terminated_length": 118.01953125, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.5472, "grad_norm": 0.3264475464820862, "learning_rate": 5.088967971530249e-07, "loss": -0.0012, "num_tokens": 120812827.0, "reward": 1.0087890625, "reward_std": 0.1307452917098999, "rewards/accuracy_reward_conf_tag": 0.51171875, "rewards/format_reward_conf_tag": 0.994140625, "step": 171 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 288.0, "completions/max_terminated_length": 288.0, "completions/mean_length": 111.177734375, "completions/mean_terminated_length": 111.39530181884766, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.5504, "grad_norm": 0.2982167899608612, "learning_rate": 5.053380782918149e-07, "loss": 0.0012, "num_tokens": 121500278.0, "reward": 1.099609375, "reward_std": 0.10055398941040039, "rewards/accuracy_reward_conf_tag": 0.6015625, "rewards/format_reward_conf_tag": 0.99609375, "step": 172 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 606.0, "completions/max_terminated_length": 606.0, "completions/mean_length": 116.041015625, "completions/mean_terminated_length": 116.041015625, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.5536, "grad_norm": 0.3056163787841797, "learning_rate": 5.01779359430605e-07, "loss": -0.0033, "num_tokens": 122194275.0, "reward": 1.0107421875, "reward_std": 0.110807403922081, "rewards/accuracy_reward_conf_tag": 0.513671875, "rewards/format_reward_conf_tag": 0.994140625, "step": 173 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 422.0, "completions/max_terminated_length": 422.0, "completions/mean_length": 116.109375, "completions/mean_terminated_length": 116.33659362792969, "completions/min_length": 0.0, "completions/min_terminated_length": 41.0, "epoch": 0.5568, "grad_norm": 0.3178585469722748, "learning_rate": 4.98220640569395e-07, "loss": 0.0029, "num_tokens": 122884171.0, "reward": 0.9658203125, "reward_std": 0.12013532966375351, "rewards/accuracy_reward_conf_tag": 0.466796875, "rewards/format_reward_conf_tag": 0.998046875, "step": 174 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 390.0, "completions/max_terminated_length": 390.0, "completions/mean_length": 117.310546875, "completions/mean_terminated_length": 117.310546875, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.56, "grad_norm": 0.30916622281074524, "learning_rate": 4.94661921708185e-07, "loss": 0.003, "num_tokens": 123586378.0, "reward": 0.994140625, "reward_std": 0.07555306702852249, "rewards/accuracy_reward_conf_tag": 0.494140625, "rewards/format_reward_conf_tag": 1.0, "step": 175 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 465.0, "completions/max_terminated_length": 465.0, "completions/mean_length": 124.25390625, "completions/mean_terminated_length": 124.74118041992188, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.5632, "grad_norm": 0.2795889377593994, "learning_rate": 4.91103202846975e-07, "loss": 0.0006, "num_tokens": 124316028.0, "reward": 0.91015625, "reward_std": 0.13375455141067505, "rewards/accuracy_reward_conf_tag": 0.412109375, "rewards/format_reward_conf_tag": 0.99609375, "step": 176 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 390.0, "completions/max_terminated_length": 390.0, "completions/mean_length": 112.29296875, "completions/mean_terminated_length": 112.29296875, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.5664, "grad_norm": 0.24903085827827454, "learning_rate": 4.875444839857651e-07, "loss": 0.0034, "num_tokens": 125036914.0, "reward": 1.0546875, "reward_std": 0.09239231050014496, "rewards/accuracy_reward_conf_tag": 0.5546875, "rewards/format_reward_conf_tag": 1.0, "step": 177 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 528.0, "completions/max_terminated_length": 528.0, "completions/mean_length": 114.640625, "completions/mean_terminated_length": 114.640625, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.5696, "grad_norm": 0.28691524267196655, "learning_rate": 4.839857651245551e-07, "loss": 0.0008, "num_tokens": 125732938.0, "reward": 1.060546875, "reward_std": 0.1258593201637268, "rewards/accuracy_reward_conf_tag": 0.560546875, "rewards/format_reward_conf_tag": 1.0, "step": 178 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 509.0, "completions/max_terminated_length": 509.0, "completions/mean_length": 125.01953125, "completions/mean_terminated_length": 125.01953125, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 0.5728, "grad_norm": 0.27484962344169617, "learning_rate": 4.804270462633451e-07, "loss": 0.0006, "num_tokens": 126441572.0, "reward": 1.029296875, "reward_std": 0.08219750225543976, "rewards/accuracy_reward_conf_tag": 0.529296875, "rewards/format_reward_conf_tag": 1.0, "step": 179 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 334.0, "completions/max_terminated_length": 334.0, "completions/mean_length": 114.263671875, "completions/mean_terminated_length": 114.263671875, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.576, "grad_norm": 0.2465706318616867, "learning_rate": 4.768683274021353e-07, "loss": 0.0014, "num_tokens": 127153011.0, "reward": 0.95703125, "reward_std": 0.1047501489520073, "rewards/accuracy_reward_conf_tag": 0.45703125, "rewards/format_reward_conf_tag": 1.0, "step": 180 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 468.0, "completions/max_terminated_length": 468.0, "completions/mean_length": 123.73046875, "completions/mean_terminated_length": 123.73046875, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.5792, "grad_norm": 0.23253266513347626, "learning_rate": 4.733096085409252e-07, "loss": 0.0024, "num_tokens": 127890969.0, "reward": 1.0341796875, "reward_std": 0.11296170204877853, "rewards/accuracy_reward_conf_tag": 0.53515625, "rewards/format_reward_conf_tag": 0.998046875, "step": 181 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 585.0, "completions/max_terminated_length": 585.0, "completions/mean_length": 117.541015625, "completions/mean_terminated_length": 117.541015625, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.5824, "grad_norm": 0.22888119518756866, "learning_rate": 4.697508896797153e-07, "loss": 0.0041, "num_tokens": 128599398.0, "reward": 0.9697265625, "reward_std": 0.07049369812011719, "rewards/accuracy_reward_conf_tag": 0.470703125, "rewards/format_reward_conf_tag": 0.998046875, "step": 182 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 571.0, "completions/max_terminated_length": 571.0, "completions/mean_length": 120.60546875, "completions/mean_terminated_length": 120.60546875, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.5856, "grad_norm": 0.2716979682445526, "learning_rate": 4.661921708185053e-07, "loss": -0.0001, "num_tokens": 129309604.0, "reward": 0.982421875, "reward_std": 0.10448494553565979, "rewards/accuracy_reward_conf_tag": 0.482421875, "rewards/format_reward_conf_tag": 1.0, "step": 183 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 526.0, "completions/max_terminated_length": 526.0, "completions/mean_length": 120.044921875, "completions/mean_terminated_length": 120.044921875, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.5888, "grad_norm": 0.22985929250717163, "learning_rate": 4.626334519572954e-07, "loss": -0.0002, "num_tokens": 130010235.0, "reward": 1.046875, "reward_std": 0.07318221032619476, "rewards/accuracy_reward_conf_tag": 0.546875, "rewards/format_reward_conf_tag": 1.0, "step": 184 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 489.0, "completions/max_terminated_length": 489.0, "completions/mean_length": 122.2890625, "completions/mean_terminated_length": 122.2890625, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.592, "grad_norm": 0.3127327859401703, "learning_rate": 4.590747330960854e-07, "loss": -0.0014, "num_tokens": 130721879.0, "reward": 1.041015625, "reward_std": 0.13762861490249634, "rewards/accuracy_reward_conf_tag": 0.541015625, "rewards/format_reward_conf_tag": 1.0, "step": 185 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 410.0, "completions/max_terminated_length": 410.0, "completions/mean_length": 115.82421875, "completions/mean_terminated_length": 115.82421875, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.5952, "grad_norm": 0.2917990982532501, "learning_rate": 4.555160142348754e-07, "loss": 0.0026, "num_tokens": 131429917.0, "reward": 1.044921875, "reward_std": 0.10915547609329224, "rewards/accuracy_reward_conf_tag": 0.544921875, "rewards/format_reward_conf_tag": 1.0, "step": 186 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 401.0, "completions/max_terminated_length": 401.0, "completions/mean_length": 119.55078125, "completions/mean_terminated_length": 119.55078125, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 0.5984, "grad_norm": 0.2931201457977295, "learning_rate": 4.519572953736655e-07, "loss": 0.0022, "num_tokens": 132127159.0, "reward": 0.94921875, "reward_std": 0.1420920491218567, "rewards/accuracy_reward_conf_tag": 0.44921875, "rewards/format_reward_conf_tag": 1.0, "step": 187 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 313.0, "completions/max_terminated_length": 313.0, "completions/mean_length": 117.220703125, "completions/mean_terminated_length": 117.220703125, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.6016, "grad_norm": 0.31902194023132324, "learning_rate": 4.483985765124555e-07, "loss": 0.0057, "num_tokens": 132821896.0, "reward": 1.029296875, "reward_std": 0.10403060913085938, "rewards/accuracy_reward_conf_tag": 0.529296875, "rewards/format_reward_conf_tag": 1.0, "step": 188 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 577.0, "completions/max_terminated_length": 577.0, "completions/mean_length": 115.369140625, "completions/mean_terminated_length": 115.59490966796875, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.6048, "grad_norm": 0.27381059527397156, "learning_rate": 4.4483985765124553e-07, "loss": 0.0004, "num_tokens": 133510541.0, "reward": 1.080078125, "reward_std": 0.09759338200092316, "rewards/accuracy_reward_conf_tag": 0.58203125, "rewards/format_reward_conf_tag": 0.99609375, "step": 189 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 430.0, "completions/max_terminated_length": 430.0, "completions/mean_length": 118.484375, "completions/mean_terminated_length": 118.484375, "completions/min_length": 52.0, "completions/min_terminated_length": 52.0, "epoch": 0.608, "grad_norm": 0.2547455132007599, "learning_rate": 4.412811387900356e-07, "loss": -0.0002, "num_tokens": 134239669.0, "reward": 0.994140625, "reward_std": 0.09725197404623032, "rewards/accuracy_reward_conf_tag": 0.494140625, "rewards/format_reward_conf_tag": 1.0, "step": 190 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 665.0, "completions/max_terminated_length": 665.0, "completions/mean_length": 123.349609375, "completions/mean_terminated_length": 123.349609375, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.6112, "grad_norm": 0.3871206045150757, "learning_rate": 4.377224199288256e-07, "loss": -0.0025, "num_tokens": 134972328.0, "reward": 1.0380859375, "reward_std": 0.11173088103532791, "rewards/accuracy_reward_conf_tag": 0.5390625, "rewards/format_reward_conf_tag": 0.998046875, "step": 191 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 582.0, "completions/max_terminated_length": 582.0, "completions/mean_length": 124.984375, "completions/mean_terminated_length": 124.984375, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 0.6144, "grad_norm": 0.3271653950214386, "learning_rate": 4.341637010676156e-07, "loss": 0.0061, "num_tokens": 135704184.0, "reward": 1.013671875, "reward_std": 0.10304145514965057, "rewards/accuracy_reward_conf_tag": 0.513671875, "rewards/format_reward_conf_tag": 1.0, "step": 192 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 498.0, "completions/max_terminated_length": 498.0, "completions/mean_length": 115.599609375, "completions/mean_terminated_length": 115.8258285522461, "completions/min_length": 0.0, "completions/min_terminated_length": 42.0, "epoch": 0.6176, "grad_norm": 0.24646882712841034, "learning_rate": 4.306049822064057e-07, "loss": -0.0012, "num_tokens": 136416035.0, "reward": 1.0517578125, "reward_std": 0.09889516979455948, "rewards/accuracy_reward_conf_tag": 0.552734375, "rewards/format_reward_conf_tag": 0.998046875, "step": 193 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 543.0, "completions/max_terminated_length": 543.0, "completions/mean_length": 119.814453125, "completions/mean_terminated_length": 119.814453125, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.6208, "grad_norm": 0.2984526753425598, "learning_rate": 4.2704626334519573e-07, "loss": 0.0048, "num_tokens": 137126172.0, "reward": 1.025390625, "reward_std": 0.10869672894477844, "rewards/accuracy_reward_conf_tag": 0.525390625, "rewards/format_reward_conf_tag": 1.0, "step": 194 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 475.0, "completions/max_terminated_length": 475.0, "completions/mean_length": 127.044921875, "completions/mean_terminated_length": 127.044921875, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 0.624, "grad_norm": 0.24040964245796204, "learning_rate": 4.2348754448398576e-07, "loss": -0.0021, "num_tokens": 137855971.0, "reward": 0.8984375, "reward_std": 0.09356936812400818, "rewards/accuracy_reward_conf_tag": 0.3984375, "rewards/format_reward_conf_tag": 1.0, "step": 195 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 512.0, "completions/max_terminated_length": 512.0, "completions/mean_length": 123.66796875, "completions/mean_terminated_length": 123.90998077392578, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.6272, "grad_norm": 0.26657259464263916, "learning_rate": 4.199288256227758e-07, "loss": 0.005, "num_tokens": 138587409.0, "reward": 1.1162109375, "reward_std": 0.1010020449757576, "rewards/accuracy_reward_conf_tag": 0.6171875, "rewards/format_reward_conf_tag": 0.998046875, "step": 196 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1132.0, "completions/max_terminated_length": 1132.0, "completions/mean_length": 124.146484375, "completions/mean_terminated_length": 124.146484375, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.6304, "grad_norm": 0.3319939374923706, "learning_rate": 4.163701067615658e-07, "loss": 0.0031, "num_tokens": 139315148.0, "reward": 1.00390625, "reward_std": 0.07259123027324677, "rewards/accuracy_reward_conf_tag": 0.50390625, "rewards/format_reward_conf_tag": 1.0, "step": 197 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 563.0, "completions/max_terminated_length": 563.0, "completions/mean_length": 125.134765625, "completions/mean_terminated_length": 125.134765625, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.6336, "grad_norm": 0.23367168009281158, "learning_rate": 4.1281138790035585e-07, "loss": 0.0038, "num_tokens": 140026385.0, "reward": 1.1259765625, "reward_std": 0.08870036154985428, "rewards/accuracy_reward_conf_tag": 0.626953125, "rewards/format_reward_conf_tag": 0.998046875, "step": 198 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 438.0, "completions/max_terminated_length": 438.0, "completions/mean_length": 126.7265625, "completions/mean_terminated_length": 126.7265625, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.6368, "grad_norm": 0.24098913371562958, "learning_rate": 4.0925266903914593e-07, "loss": -0.0017, "num_tokens": 140748997.0, "reward": 0.927734375, "reward_std": 0.0949535220861435, "rewards/accuracy_reward_conf_tag": 0.427734375, "rewards/format_reward_conf_tag": 1.0, "step": 199 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 717.0, "completions/max_terminated_length": 717.0, "completions/mean_length": 129.03515625, "completions/mean_terminated_length": 129.03515625, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.64, "grad_norm": 0.1898925006389618, "learning_rate": 4.0569395017793596e-07, "loss": -0.0028, "num_tokens": 141481199.0, "reward": 1.0615234375, "reward_std": 0.062272798269987106, "rewards/accuracy_reward_conf_tag": 0.5625, "rewards/format_reward_conf_tag": 0.998046875, "step": 200 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 337.0, "completions/max_terminated_length": 337.0, "completions/mean_length": 120.771484375, "completions/mean_terminated_length": 120.771484375, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.6432, "grad_norm": 0.2958911061286926, "learning_rate": 4.0213523131672593e-07, "loss": 0.0044, "num_tokens": 142180746.0, "reward": 1.01953125, "reward_std": 0.11316017061471939, "rewards/accuracy_reward_conf_tag": 0.51953125, "rewards/format_reward_conf_tag": 1.0, "step": 201 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 388.0, "completions/max_terminated_length": 388.0, "completions/mean_length": 122.888671875, "completions/mean_terminated_length": 122.888671875, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 0.6464, "grad_norm": 0.20609861612319946, "learning_rate": 3.98576512455516e-07, "loss": 0.0048, "num_tokens": 142876849.0, "reward": 1.021484375, "reward_std": 0.07187168300151825, "rewards/accuracy_reward_conf_tag": 0.521484375, "rewards/format_reward_conf_tag": 1.0, "step": 202 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 414.0, "completions/max_terminated_length": 414.0, "completions/mean_length": 124.310546875, "completions/mean_terminated_length": 124.310546875, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "epoch": 0.6496, "grad_norm": 0.28507986664772034, "learning_rate": 3.9501779359430604e-07, "loss": 0.0035, "num_tokens": 143564056.0, "reward": 1.080078125, "reward_std": 0.11849214136600494, "rewards/accuracy_reward_conf_tag": 0.580078125, "rewards/format_reward_conf_tag": 1.0, "step": 203 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 662.0, "completions/max_terminated_length": 662.0, "completions/mean_length": 138.58203125, "completions/mean_terminated_length": 138.58203125, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 0.6528, "grad_norm": 0.216828852891922, "learning_rate": 3.9145907473309607e-07, "loss": 0.0001, "num_tokens": 144275274.0, "reward": 0.87890625, "reward_std": 0.07358038425445557, "rewards/accuracy_reward_conf_tag": 0.37890625, "rewards/format_reward_conf_tag": 1.0, "step": 204 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 362.0, "completions/max_terminated_length": 362.0, "completions/mean_length": 126.18359375, "completions/mean_terminated_length": 126.18359375, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 0.656, "grad_norm": 0.22173930704593658, "learning_rate": 3.879003558718861e-07, "loss": 0.0017, "num_tokens": 144986928.0, "reward": 1.015625, "reward_std": 0.06450574845075607, "rewards/accuracy_reward_conf_tag": 0.515625, "rewards/format_reward_conf_tag": 1.0, "step": 205 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 788.0, "completions/max_terminated_length": 788.0, "completions/mean_length": 127.94140625, "completions/mean_terminated_length": 127.94140625, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.6592, "grad_norm": 0.2896319329738617, "learning_rate": 3.8434163701067613e-07, "loss": 0.0021, "num_tokens": 145675522.0, "reward": 1.037109375, "reward_std": 0.09876909852027893, "rewards/accuracy_reward_conf_tag": 0.537109375, "rewards/format_reward_conf_tag": 1.0, "step": 206 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 377.0, "completions/max_terminated_length": 377.0, "completions/mean_length": 117.310546875, "completions/mean_terminated_length": 117.310546875, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.6624, "grad_norm": 0.2767243981361389, "learning_rate": 3.8078291814946616e-07, "loss": -0.0037, "num_tokens": 146384049.0, "reward": 1.064453125, "reward_std": 0.10981568694114685, "rewards/accuracy_reward_conf_tag": 0.564453125, "rewards/format_reward_conf_tag": 1.0, "step": 207 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 861.0, "completions/max_terminated_length": 861.0, "completions/mean_length": 120.791015625, "completions/mean_terminated_length": 120.791015625, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 0.6656, "grad_norm": 0.25350600481033325, "learning_rate": 3.7722419928825624e-07, "loss": 0.0069, "num_tokens": 147092478.0, "reward": 1.076171875, "reward_std": 0.08705839514732361, "rewards/accuracy_reward_conf_tag": 0.576171875, "rewards/format_reward_conf_tag": 1.0, "step": 208 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 368.0, "completions/max_terminated_length": 368.0, "completions/mean_length": 125.974609375, "completions/mean_terminated_length": 125.974609375, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.6688, "grad_norm": 0.23264235258102417, "learning_rate": 3.7366548042704627e-07, "loss": -0.0007, "num_tokens": 147802105.0, "reward": 0.98828125, "reward_std": 0.08193229138851166, "rewards/accuracy_reward_conf_tag": 0.48828125, "rewards/format_reward_conf_tag": 1.0, "step": 209 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 473.0, "completions/max_terminated_length": 473.0, "completions/mean_length": 130.1171875, "completions/mean_terminated_length": 130.1171875, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.672, "grad_norm": 0.2470727115869522, "learning_rate": 3.7010676156583625e-07, "loss": -0.0015, "num_tokens": 148501501.0, "reward": 0.9091796875, "reward_std": 0.08568359166383743, "rewards/accuracy_reward_conf_tag": 0.41015625, "rewards/format_reward_conf_tag": 0.998046875, "step": 210 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 503.0, "completions/max_terminated_length": 503.0, "completions/mean_length": 127.25, "completions/mean_terminated_length": 127.25, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.6752, "grad_norm": 0.28654050827026367, "learning_rate": 3.6654804270462633e-07, "loss": 0.002, "num_tokens": 149191381.0, "reward": 1.0615234375, "reward_std": 0.11502020061016083, "rewards/accuracy_reward_conf_tag": 0.5625, "rewards/format_reward_conf_tag": 0.998046875, "step": 211 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 389.0, "completions/max_terminated_length": 389.0, "completions/mean_length": 130.220703125, "completions/mean_terminated_length": 130.4755401611328, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.6784, "grad_norm": 0.26739853620529175, "learning_rate": 3.6298932384341636e-07, "loss": 0.0009, "num_tokens": 149898590.0, "reward": 0.8984375, "reward_std": 0.1149473488330841, "rewards/accuracy_reward_conf_tag": 0.400390625, "rewards/format_reward_conf_tag": 0.99609375, "step": 212 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005859375, "completions/max_length": 475.0, "completions/max_terminated_length": 475.0, "completions/mean_length": 127.45703125, "completions/mean_terminated_length": 128.208251953125, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.6816, "grad_norm": 0.2606695294380188, "learning_rate": 3.594306049822064e-07, "loss": -0.001, "num_tokens": 150606176.0, "reward": 1.0263671875, "reward_std": 0.10803714394569397, "rewards/accuracy_reward_conf_tag": 0.529296875, "rewards/format_reward_conf_tag": 0.994140625, "step": 213 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 373.0, "completions/max_terminated_length": 373.0, "completions/mean_length": 120.955078125, "completions/mean_terminated_length": 120.955078125, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.6848, "grad_norm": 0.26082226634025574, "learning_rate": 3.5587188612099647e-07, "loss": -0.0026, "num_tokens": 151324273.0, "reward": 1.05078125, "reward_std": 0.09935884922742844, "rewards/accuracy_reward_conf_tag": 0.55078125, "rewards/format_reward_conf_tag": 1.0, "step": 214 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 375.0, "completions/max_terminated_length": 375.0, "completions/mean_length": 123.8515625, "completions/mean_terminated_length": 123.8515625, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.688, "grad_norm": 0.3981391191482544, "learning_rate": 3.5231316725978644e-07, "loss": -0.0033, "num_tokens": 152030045.0, "reward": 1.1025390625, "reward_std": 0.13599222898483276, "rewards/accuracy_reward_conf_tag": 0.603515625, "rewards/format_reward_conf_tag": 0.998046875, "step": 215 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 755.0, "completions/max_terminated_length": 755.0, "completions/mean_length": 126.810546875, "completions/mean_terminated_length": 126.810546875, "completions/min_length": 52.0, "completions/min_terminated_length": 52.0, "epoch": 0.6912, "grad_norm": 0.3804955780506134, "learning_rate": 3.4875444839857647e-07, "loss": 0.004, "num_tokens": 152735876.0, "reward": 0.982421875, "reward_std": 0.17557457089424133, "rewards/accuracy_reward_conf_tag": 0.482421875, "rewards/format_reward_conf_tag": 1.0, "step": 216 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 624.0, "completions/max_terminated_length": 624.0, "completions/mean_length": 124.138671875, "completions/mean_terminated_length": 124.138671875, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.6944, "grad_norm": 0.24549926817417145, "learning_rate": 3.4519572953736656e-07, "loss": 0.0022, "num_tokens": 153428035.0, "reward": 1.001953125, "reward_std": 0.06338557600975037, "rewards/accuracy_reward_conf_tag": 0.501953125, "rewards/format_reward_conf_tag": 1.0, "step": 217 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 374.0, "completions/max_terminated_length": 374.0, "completions/mean_length": 123.3359375, "completions/mean_terminated_length": 123.3359375, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 0.6976, "grad_norm": 0.3150384724140167, "learning_rate": 3.416370106761566e-07, "loss": -0.0008, "num_tokens": 154128247.0, "reward": 1.001953125, "reward_std": 0.11889031529426575, "rewards/accuracy_reward_conf_tag": 0.501953125, "rewards/format_reward_conf_tag": 1.0, "step": 218 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 426.0, "completions/max_terminated_length": 426.0, "completions/mean_length": 114.775390625, "completions/mean_terminated_length": 115.0, "completions/min_length": 0.0, "completions/min_terminated_length": 57.0, "epoch": 0.7008, "grad_norm": 0.2552780508995056, "learning_rate": 3.380782918149466e-07, "loss": 0.0039, "num_tokens": 154825932.0, "reward": 1.0927734375, "reward_std": 0.10930263996124268, "rewards/accuracy_reward_conf_tag": 0.59375, "rewards/format_reward_conf_tag": 0.998046875, "step": 219 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 677.0, "completions/max_terminated_length": 677.0, "completions/mean_length": 125.26953125, "completions/mean_terminated_length": 125.26953125, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.704, "grad_norm": 0.22730842232704163, "learning_rate": 3.3451957295373664e-07, "loss": 0.0065, "num_tokens": 155528846.0, "reward": 1.0859375, "reward_std": 0.09271685779094696, "rewards/accuracy_reward_conf_tag": 0.5859375, "rewards/format_reward_conf_tag": 1.0, "step": 220 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 624.0, "completions/max_terminated_length": 624.0, "completions/mean_length": 126.076171875, "completions/mean_terminated_length": 126.076171875, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 0.7072, "grad_norm": 0.2676522433757782, "learning_rate": 3.3096085409252667e-07, "loss": 0.0046, "num_tokens": 156256997.0, "reward": 1.025390625, "reward_std": 0.11579746752977371, "rewards/accuracy_reward_conf_tag": 0.525390625, "rewards/format_reward_conf_tag": 1.0, "step": 221 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.0, "completions/max_terminated_length": 311.0, "completions/mean_length": 125.025390625, "completions/mean_terminated_length": 125.025390625, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 0.7104, "grad_norm": 0.33278122544288635, "learning_rate": 3.274021352313167e-07, "loss": 0.0008, "num_tokens": 156978754.0, "reward": 1.01171875, "reward_std": 0.1259922832250595, "rewards/accuracy_reward_conf_tag": 0.51171875, "rewards/format_reward_conf_tag": 1.0, "step": 222 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.0, "completions/max_terminated_length": 471.0, "completions/mean_length": 126.0625, "completions/mean_terminated_length": 126.0625, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.7136, "grad_norm": 0.32390889525413513, "learning_rate": 3.238434163701068e-07, "loss": 0.0037, "num_tokens": 157693946.0, "reward": 1.087890625, "reward_std": 0.12954068183898926, "rewards/accuracy_reward_conf_tag": 0.587890625, "rewards/format_reward_conf_tag": 1.0, "step": 223 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 463.0, "completions/max_terminated_length": 463.0, "completions/mean_length": 121.4765625, "completions/mean_terminated_length": 121.71428680419922, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.7168, "grad_norm": 0.32255902886390686, "learning_rate": 3.202846975088968e-07, "loss": 0.0025, "num_tokens": 158392110.0, "reward": 1.0849609375, "reward_std": 0.13625742495059967, "rewards/accuracy_reward_conf_tag": 0.5859375, "rewards/format_reward_conf_tag": 0.998046875, "step": 224 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 414.0, "completions/max_terminated_length": 414.0, "completions/mean_length": 128.439453125, "completions/mean_terminated_length": 128.439453125, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.72, "grad_norm": 0.3401506841182709, "learning_rate": 3.167259786476868e-07, "loss": 0.0021, "num_tokens": 159070015.0, "reward": 1.00390625, "reward_std": 0.14841194450855255, "rewards/accuracy_reward_conf_tag": 0.50390625, "rewards/format_reward_conf_tag": 1.0, "step": 225 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 724.0, "completions/max_terminated_length": 724.0, "completions/mean_length": 124.62109375, "completions/mean_terminated_length": 125.10980987548828, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.7232, "grad_norm": 0.21677790582180023, "learning_rate": 3.1316725978647687e-07, "loss": 0.0009, "num_tokens": 159786877.0, "reward": 1.056640625, "reward_std": 0.08923016488552094, "rewards/accuracy_reward_conf_tag": 0.55859375, "rewards/format_reward_conf_tag": 0.99609375, "step": 226 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 433.0, "completions/max_terminated_length": 433.0, "completions/mean_length": 121.728515625, "completions/mean_terminated_length": 121.728515625, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.7264, "grad_norm": 0.29960089921951294, "learning_rate": 3.096085409252669e-07, "loss": 0.0044, "num_tokens": 160472538.0, "reward": 1.10546875, "reward_std": 0.09370160102844238, "rewards/accuracy_reward_conf_tag": 0.60546875, "rewards/format_reward_conf_tag": 1.0, "step": 227 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 387.0, "completions/max_terminated_length": 387.0, "completions/mean_length": 122.484375, "completions/mean_terminated_length": 122.484375, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.7296, "grad_norm": 0.25875020027160645, "learning_rate": 3.0604982206405693e-07, "loss": 0.0031, "num_tokens": 161170386.0, "reward": 1.052734375, "reward_std": 0.12433972954750061, "rewards/accuracy_reward_conf_tag": 0.552734375, "rewards/format_reward_conf_tag": 1.0, "step": 228 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 503.0, "completions/max_terminated_length": 503.0, "completions/mean_length": 121.158203125, "completions/mean_terminated_length": 121.158203125, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.7328, "grad_norm": 0.2586037814617157, "learning_rate": 3.02491103202847e-07, "loss": 0.0045, "num_tokens": 161868827.0, "reward": 1.013671875, "reward_std": 0.11691886186599731, "rewards/accuracy_reward_conf_tag": 0.513671875, "rewards/format_reward_conf_tag": 1.0, "step": 229 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.0, "completions/max_terminated_length": 276.0, "completions/mean_length": 115.7421875, "completions/mean_terminated_length": 115.7421875, "completions/min_length": 52.0, "completions/min_terminated_length": 52.0, "epoch": 0.736, "grad_norm": 0.2556305229663849, "learning_rate": 2.98932384341637e-07, "loss": 0.0017, "num_tokens": 162582639.0, "reward": 1.076171875, "reward_std": 0.07897046208381653, "rewards/accuracy_reward_conf_tag": 0.576171875, "rewards/format_reward_conf_tag": 1.0, "step": 230 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 359.0, "completions/max_terminated_length": 359.0, "completions/mean_length": 121.33984375, "completions/mean_terminated_length": 121.33984375, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.7392, "grad_norm": 0.2588680386543274, "learning_rate": 2.95373665480427e-07, "loss": 0.0029, "num_tokens": 163315461.0, "reward": 1.05859375, "reward_std": 0.12796618044376373, "rewards/accuracy_reward_conf_tag": 0.55859375, "rewards/format_reward_conf_tag": 1.0, "step": 231 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 292.0, "completions/max_terminated_length": 292.0, "completions/mean_length": 112.7734375, "completions/mean_terminated_length": 112.7734375, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.7424, "grad_norm": 0.2550903260707855, "learning_rate": 2.918149466192171e-07, "loss": 0.0006, "num_tokens": 164033505.0, "reward": 1.08984375, "reward_std": 0.09271685779094696, "rewards/accuracy_reward_conf_tag": 0.58984375, "rewards/format_reward_conf_tag": 1.0, "step": 232 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 386.0, "completions/max_terminated_length": 386.0, "completions/mean_length": 128.5234375, "completions/mean_terminated_length": 128.5234375, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.7456, "grad_norm": 0.296998530626297, "learning_rate": 2.882562277580071e-07, "loss": -0.0, "num_tokens": 164768973.0, "reward": 1.0546875, "reward_std": 0.12999820709228516, "rewards/accuracy_reward_conf_tag": 0.5546875, "rewards/format_reward_conf_tag": 1.0, "step": 233 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 497.0, "completions/max_terminated_length": 497.0, "completions/mean_length": 128.359375, "completions/mean_terminated_length": 128.359375, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.7488, "grad_norm": 0.17279453575611115, "learning_rate": 2.8469750889679715e-07, "loss": -0.0005, "num_tokens": 165477557.0, "reward": 0.98046875, "reward_std": 0.06378497928380966, "rewards/accuracy_reward_conf_tag": 0.48046875, "rewards/format_reward_conf_tag": 1.0, "step": 234 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 383.0, "completions/max_terminated_length": 383.0, "completions/mean_length": 123.052734375, "completions/mean_terminated_length": 123.052734375, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 0.752, "grad_norm": 0.1877635419368744, "learning_rate": 2.811387900355872e-07, "loss": 0.0019, "num_tokens": 166171512.0, "reward": 0.994140625, "reward_std": 0.06207628548145294, "rewards/accuracy_reward_conf_tag": 0.494140625, "rewards/format_reward_conf_tag": 1.0, "step": 235 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 490.0, "completions/max_terminated_length": 490.0, "completions/mean_length": 123.8984375, "completions/mean_terminated_length": 123.8984375, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.7552, "grad_norm": 0.23798514902591705, "learning_rate": 2.775800711743772e-07, "loss": 0.0027, "num_tokens": 166868556.0, "reward": 0.974609375, "reward_std": 0.09468954056501389, "rewards/accuracy_reward_conf_tag": 0.474609375, "rewards/format_reward_conf_tag": 1.0, "step": 236 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 386.0, "completions/max_terminated_length": 386.0, "completions/mean_length": 125.880859375, "completions/mean_terminated_length": 125.880859375, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.7584, "grad_norm": 0.2679816782474518, "learning_rate": 2.7402135231316724e-07, "loss": 0.0003, "num_tokens": 167572671.0, "reward": 0.9287109375, "reward_std": 0.12495569884777069, "rewards/accuracy_reward_conf_tag": 0.431640625, "rewards/format_reward_conf_tag": 0.994140625, "step": 237 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/max_terminated_length": 444.0, "completions/mean_length": 130.19140625, "completions/mean_terminated_length": 130.19140625, "completions/min_length": 52.0, "completions/min_terminated_length": 52.0, "epoch": 0.7616, "grad_norm": 0.24259266257286072, "learning_rate": 2.704626334519573e-07, "loss": 0.0006, "num_tokens": 168286737.0, "reward": 1.064453125, "reward_std": 0.10928526520729065, "rewards/accuracy_reward_conf_tag": 0.564453125, "rewards/format_reward_conf_tag": 1.0, "step": 238 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 453.0, "completions/max_terminated_length": 453.0, "completions/mean_length": 122.994140625, "completions/mean_terminated_length": 122.994140625, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.7648, "grad_norm": 0.2749550938606262, "learning_rate": 2.669039145907473e-07, "loss": 0.0019, "num_tokens": 168984782.0, "reward": 1.0888671875, "reward_std": 0.08910098671913147, "rewards/accuracy_reward_conf_tag": 0.58984375, "rewards/format_reward_conf_tag": 0.998046875, "step": 239 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 351.0, "completions/max_terminated_length": 351.0, "completions/mean_length": 117.353515625, "completions/mean_terminated_length": 117.353515625, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.768, "grad_norm": 0.3518067002296448, "learning_rate": 2.6334519572953733e-07, "loss": 0.002, "num_tokens": 169689987.0, "reward": 0.9658203125, "reward_std": 0.09588323533535004, "rewards/accuracy_reward_conf_tag": 0.466796875, "rewards/format_reward_conf_tag": 0.998046875, "step": 240 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 438.0, "completions/max_terminated_length": 438.0, "completions/mean_length": 126.912109375, "completions/mean_terminated_length": 126.912109375, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 0.7712, "grad_norm": 0.2472555935382843, "learning_rate": 2.597864768683274e-07, "loss": -0.0008, "num_tokens": 170403422.0, "reward": 1.0712890625, "reward_std": 0.08325216919183731, "rewards/accuracy_reward_conf_tag": 0.572265625, "rewards/format_reward_conf_tag": 0.998046875, "step": 241 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1383.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 123.63671875, "completions/mean_terminated_length": 123.63671875, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.7744, "grad_norm": 0.25808462500572205, "learning_rate": 2.5622775800711744e-07, "loss": 0.0017, "num_tokens": 171111804.0, "reward": 1.0966796875, "reward_std": 0.12197823077440262, "rewards/accuracy_reward_conf_tag": 0.59765625, "rewards/format_reward_conf_tag": 0.998046875, "step": 242 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 336.0, "completions/max_terminated_length": 336.0, "completions/mean_length": 121.53515625, "completions/mean_terminated_length": 121.53515625, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.7776, "grad_norm": 0.3100128769874573, "learning_rate": 2.5266903914590747e-07, "loss": 0.0053, "num_tokens": 171818726.0, "reward": 1.041015625, "reward_std": 0.14216691255569458, "rewards/accuracy_reward_conf_tag": 0.541015625, "rewards/format_reward_conf_tag": 1.0, "step": 243 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 417.0, "completions/max_terminated_length": 417.0, "completions/mean_length": 126.056640625, "completions/mean_terminated_length": 126.056640625, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 0.7808, "grad_norm": 0.20280516147613525, "learning_rate": 2.491103202846975e-07, "loss": -0.0007, "num_tokens": 172547843.0, "reward": 1.09765625, "reward_std": 0.07384681701660156, "rewards/accuracy_reward_conf_tag": 0.59765625, "rewards/format_reward_conf_tag": 1.0, "step": 244 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 395.0, "completions/max_terminated_length": 395.0, "completions/mean_length": 128.76171875, "completions/mean_terminated_length": 128.76171875, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.784, "grad_norm": 0.18640628457069397, "learning_rate": 2.455516014234875e-07, "loss": 0.0004, "num_tokens": 173272233.0, "reward": 0.923828125, "reward_std": 0.050305016338825226, "rewards/accuracy_reward_conf_tag": 0.423828125, "rewards/format_reward_conf_tag": 1.0, "step": 245 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 472.0, "completions/max_terminated_length": 472.0, "completions/mean_length": 124.55078125, "completions/mean_terminated_length": 124.55078125, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.7872, "grad_norm": 0.305706262588501, "learning_rate": 2.4199288256227755e-07, "loss": 0.0, "num_tokens": 173960371.0, "reward": 1.037109375, "reward_std": 0.10021258145570755, "rewards/accuracy_reward_conf_tag": 0.537109375, "rewards/format_reward_conf_tag": 1.0, "step": 246 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 513.0, "completions/max_terminated_length": 513.0, "completions/mean_length": 123.5390625, "completions/mean_terminated_length": 123.5390625, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.7904, "grad_norm": 0.20943109691143036, "learning_rate": 2.3843416370106764e-07, "loss": -0.0013, "num_tokens": 174670167.0, "reward": 1.087890625, "reward_std": 0.06983967125415802, "rewards/accuracy_reward_conf_tag": 0.587890625, "rewards/format_reward_conf_tag": 1.0, "step": 247 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 590.0, "completions/max_terminated_length": 590.0, "completions/mean_length": 125.927734375, "completions/mean_terminated_length": 125.927734375, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.7936, "grad_norm": 0.20170189440250397, "learning_rate": 2.3487544483985764e-07, "loss": 0.0037, "num_tokens": 175383778.0, "reward": 1.09765625, "reward_std": 0.08179810643196106, "rewards/accuracy_reward_conf_tag": 0.59765625, "rewards/format_reward_conf_tag": 1.0, "step": 248 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 718.0, "completions/max_terminated_length": 718.0, "completions/mean_length": 127.423828125, "completions/mean_terminated_length": 127.423828125, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.7968, "grad_norm": 0.2695271372795105, "learning_rate": 2.313167259786477e-07, "loss": -0.0015, "num_tokens": 176095803.0, "reward": 1.080078125, "reward_std": 0.10192251205444336, "rewards/accuracy_reward_conf_tag": 0.580078125, "rewards/format_reward_conf_tag": 1.0, "step": 249 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 325.0, "completions/max_terminated_length": 325.0, "completions/mean_length": 129.509765625, "completions/mean_terminated_length": 129.509765625, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 0.8, "grad_norm": 0.27595341205596924, "learning_rate": 2.277580071174377e-07, "loss": 0.0013, "num_tokens": 176815544.0, "reward": 1.095703125, "reward_std": 0.09738617390394211, "rewards/accuracy_reward_conf_tag": 0.595703125, "rewards/format_reward_conf_tag": 1.0, "step": 250 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 341.0, "completions/max_terminated_length": 341.0, "completions/mean_length": 119.283203125, "completions/mean_terminated_length": 119.51663208007812, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.8032, "grad_norm": 0.28123873472213745, "learning_rate": 2.2419928825622775e-07, "loss": -0.0004, "num_tokens": 177533113.0, "reward": 1.0654296875, "reward_std": 0.11685336381196976, "rewards/accuracy_reward_conf_tag": 0.56640625, "rewards/format_reward_conf_tag": 0.998046875, "step": 251 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.0, "completions/max_terminated_length": 280.0, "completions/mean_length": 121.328125, "completions/mean_terminated_length": 121.328125, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.8064, "grad_norm": 0.21899612247943878, "learning_rate": 2.206405693950178e-07, "loss": 0.0024, "num_tokens": 178229321.0, "reward": 1.06640625, "reward_std": 0.0963982343673706, "rewards/accuracy_reward_conf_tag": 0.56640625, "rewards/format_reward_conf_tag": 1.0, "step": 252 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 394.0, "completions/max_terminated_length": 394.0, "completions/mean_length": 130.419921875, "completions/mean_terminated_length": 130.419921875, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.8096, "grad_norm": 0.271857887506485, "learning_rate": 2.170818505338078e-07, "loss": 0.004, "num_tokens": 178942944.0, "reward": 1.0498046875, "reward_std": 0.11213028430938721, "rewards/accuracy_reward_conf_tag": 0.55078125, "rewards/format_reward_conf_tag": 0.998046875, "step": 253 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 686.0, "completions/max_terminated_length": 686.0, "completions/mean_length": 129.70703125, "completions/mean_terminated_length": 129.70703125, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.8128, "grad_norm": 0.2682909369468689, "learning_rate": 2.1352313167259786e-07, "loss": 0.0043, "num_tokens": 179658858.0, "reward": 0.9814453125, "reward_std": 0.13150840997695923, "rewards/accuracy_reward_conf_tag": 0.482421875, "rewards/format_reward_conf_tag": 0.998046875, "step": 254 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 331.0, "completions/max_terminated_length": 331.0, "completions/mean_length": 125.44140625, "completions/mean_terminated_length": 125.44140625, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.816, "grad_norm": 0.3045589327812195, "learning_rate": 2.099644128113879e-07, "loss": -0.0032, "num_tokens": 180378580.0, "reward": 1.0703125, "reward_std": 0.14795516431331635, "rewards/accuracy_reward_conf_tag": 0.5703125, "rewards/format_reward_conf_tag": 1.0, "step": 255 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 423.0, "completions/max_terminated_length": 423.0, "completions/mean_length": 125.42578125, "completions/mean_terminated_length": 125.42578125, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.8192, "grad_norm": 0.29352763295173645, "learning_rate": 2.0640569395017792e-07, "loss": -0.0021, "num_tokens": 181081414.0, "reward": 1.09375, "reward_std": 0.12033502757549286, "rewards/accuracy_reward_conf_tag": 0.59375, "rewards/format_reward_conf_tag": 1.0, "step": 256 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 1343.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 132.103515625, "completions/mean_terminated_length": 132.36203002929688, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.8224, "grad_norm": 0.20613841712474823, "learning_rate": 2.0284697508896798e-07, "loss": 0.0, "num_tokens": 181804179.0, "reward": 0.9306640625, "reward_std": 0.08568236231803894, "rewards/accuracy_reward_conf_tag": 0.43359375, "rewards/format_reward_conf_tag": 0.994140625, "step": 257 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 370.0, "completions/max_terminated_length": 370.0, "completions/mean_length": 124.921875, "completions/mean_terminated_length": 124.921875, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.8256, "grad_norm": 0.24263106286525726, "learning_rate": 1.99288256227758e-07, "loss": -0.0019, "num_tokens": 182538403.0, "reward": 1.0625, "reward_std": 0.08844450116157532, "rewards/accuracy_reward_conf_tag": 0.5625, "rewards/format_reward_conf_tag": 1.0, "step": 258 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 363.0, "completions/max_terminated_length": 363.0, "completions/mean_length": 131.3359375, "completions/mean_terminated_length": 131.3359375, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 0.8288, "grad_norm": 0.28023406863212585, "learning_rate": 1.9572953736654804e-07, "loss": 0.0016, "num_tokens": 183238783.0, "reward": 1.005859375, "reward_std": 0.12033576518297195, "rewards/accuracy_reward_conf_tag": 0.505859375, "rewards/format_reward_conf_tag": 1.0, "step": 259 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 347.0, "completions/max_terminated_length": 347.0, "completions/mean_length": 130.841796875, "completions/mean_terminated_length": 130.841796875, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 0.832, "grad_norm": 0.23595190048217773, "learning_rate": 1.9217081850533807e-07, "loss": -0.0023, "num_tokens": 183928390.0, "reward": 1.0, "reward_std": 0.07897168397903442, "rewards/accuracy_reward_conf_tag": 0.5, "rewards/format_reward_conf_tag": 1.0, "step": 260 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 451.0, "completions/max_terminated_length": 451.0, "completions/mean_length": 132.7421875, "completions/mean_terminated_length": 132.7421875, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.8352, "grad_norm": 0.25339820981025696, "learning_rate": 1.8861209964412812e-07, "loss": 0.0073, "num_tokens": 184639042.0, "reward": 1.03515625, "reward_std": 0.0850832611322403, "rewards/accuracy_reward_conf_tag": 0.53515625, "rewards/format_reward_conf_tag": 1.0, "step": 261 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 537.0, "completions/max_terminated_length": 537.0, "completions/mean_length": 126.7734375, "completions/mean_terminated_length": 126.7734375, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 0.8384, "grad_norm": 0.26170578598976135, "learning_rate": 1.8505338078291812e-07, "loss": 0.0058, "num_tokens": 185352278.0, "reward": 1.009765625, "reward_std": 0.11980339139699936, "rewards/accuracy_reward_conf_tag": 0.509765625, "rewards/format_reward_conf_tag": 1.0, "step": 262 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 388.0, "completions/max_terminated_length": 388.0, "completions/mean_length": 128.8515625, "completions/mean_terminated_length": 129.1037139892578, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.8416, "grad_norm": 0.3050854206085205, "learning_rate": 1.8149466192170818e-07, "loss": -0.0018, "num_tokens": 186054970.0, "reward": 1.0478515625, "reward_std": 0.1286795437335968, "rewards/accuracy_reward_conf_tag": 0.548828125, "rewards/format_reward_conf_tag": 0.998046875, "step": 263 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 510.0, "completions/max_terminated_length": 510.0, "completions/mean_length": 135.689453125, "completions/mean_terminated_length": 135.689453125, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.8448, "grad_norm": 0.2786458730697632, "learning_rate": 1.7793594306049823e-07, "loss": 0.0002, "num_tokens": 186763307.0, "reward": 0.9853515625, "reward_std": 0.12059161812067032, "rewards/accuracy_reward_conf_tag": 0.486328125, "rewards/format_reward_conf_tag": 0.998046875, "step": 264 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 855.0, "completions/max_terminated_length": 855.0, "completions/mean_length": 132.556640625, "completions/mean_terminated_length": 132.556640625, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.848, "grad_norm": 0.2970069348812103, "learning_rate": 1.7437722419928824e-07, "loss": -0.0031, "num_tokens": 187485848.0, "reward": 1.037109375, "reward_std": 0.08956026285886765, "rewards/accuracy_reward_conf_tag": 0.537109375, "rewards/format_reward_conf_tag": 1.0, "step": 265 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 638.0, "completions/max_terminated_length": 638.0, "completions/mean_length": 128.80078125, "completions/mean_terminated_length": 128.80078125, "completions/min_length": 55.0, "completions/min_terminated_length": 55.0, "epoch": 0.8512, "grad_norm": 0.260507196187973, "learning_rate": 1.708185053380783e-07, "loss": 0.001, "num_tokens": 188195370.0, "reward": 1.009765625, "reward_std": 0.11849410086870193, "rewards/accuracy_reward_conf_tag": 0.509765625, "rewards/format_reward_conf_tag": 1.0, "step": 266 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 532.0, "completions/max_terminated_length": 532.0, "completions/mean_length": 132.08984375, "completions/mean_terminated_length": 132.08984375, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.8544, "grad_norm": 0.3098877966403961, "learning_rate": 1.6725978647686832e-07, "loss": 0.0018, "num_tokens": 188898424.0, "reward": 1.0029296875, "reward_std": 0.10540418326854706, "rewards/accuracy_reward_conf_tag": 0.50390625, "rewards/format_reward_conf_tag": 0.998046875, "step": 267 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 451.0, "completions/max_terminated_length": 451.0, "completions/mean_length": 133.64453125, "completions/mean_terminated_length": 133.64453125, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 0.8576, "grad_norm": 0.8424309492111206, "learning_rate": 1.6370106761565835e-07, "loss": -0.0005, "num_tokens": 189603234.0, "reward": 1.05078125, "reward_std": 0.13835257291793823, "rewards/accuracy_reward_conf_tag": 0.55078125, "rewards/format_reward_conf_tag": 1.0, "step": 268 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 665.0, "completions/max_terminated_length": 665.0, "completions/mean_length": 131.474609375, "completions/mean_terminated_length": 131.474609375, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.8608, "grad_norm": 0.20501279830932617, "learning_rate": 1.601423487544484e-07, "loss": 0.0007, "num_tokens": 190310901.0, "reward": 1.1640625, "reward_std": 0.08462892472743988, "rewards/accuracy_reward_conf_tag": 0.6640625, "rewards/format_reward_conf_tag": 1.0, "step": 269 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 382.0, "completions/max_terminated_length": 382.0, "completions/mean_length": 132.26171875, "completions/mean_terminated_length": 132.26171875, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.864, "grad_norm": 0.2276669442653656, "learning_rate": 1.5658362989323843e-07, "loss": 0.0008, "num_tokens": 191037267.0, "reward": 1.12109375, "reward_std": 0.0862782895565033, "rewards/accuracy_reward_conf_tag": 0.62109375, "rewards/format_reward_conf_tag": 1.0, "step": 270 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 426.0, "completions/max_terminated_length": 426.0, "completions/mean_length": 140.5859375, "completions/mean_terminated_length": 140.5859375, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.8672, "grad_norm": 0.20950470864772797, "learning_rate": 1.5302491103202846e-07, "loss": 0.0059, "num_tokens": 191780447.0, "reward": 0.92578125, "reward_std": 0.09186190366744995, "rewards/accuracy_reward_conf_tag": 0.42578125, "rewards/format_reward_conf_tag": 1.0, "step": 271 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 512.0, "completions/max_terminated_length": 512.0, "completions/mean_length": 128.564453125, "completions/mean_terminated_length": 128.564453125, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.8704, "grad_norm": 0.2311246246099472, "learning_rate": 1.494661921708185e-07, "loss": -0.0011, "num_tokens": 192514872.0, "reward": 1.0546875, "reward_std": 0.10061199218034744, "rewards/accuracy_reward_conf_tag": 0.5546875, "rewards/format_reward_conf_tag": 1.0, "step": 272 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 406.0, "completions/max_terminated_length": 406.0, "completions/mean_length": 137.228515625, "completions/mean_terminated_length": 137.228515625, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.8736, "grad_norm": 0.2300596535205841, "learning_rate": 1.4590747330960855e-07, "loss": 0.0027, "num_tokens": 193238885.0, "reward": 0.984375, "reward_std": 0.095276840031147, "rewards/accuracy_reward_conf_tag": 0.484375, "rewards/format_reward_conf_tag": 1.0, "step": 273 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 500.0, "completions/max_terminated_length": 500.0, "completions/mean_length": 132.6796875, "completions/mean_terminated_length": 132.6796875, "completions/min_length": 55.0, "completions/min_terminated_length": 55.0, "epoch": 0.8768, "grad_norm": 0.25587886571884155, "learning_rate": 1.4234875444839858e-07, "loss": -0.0016, "num_tokens": 193957137.0, "reward": 0.986328125, "reward_std": 0.1152089387178421, "rewards/accuracy_reward_conf_tag": 0.486328125, "rewards/format_reward_conf_tag": 1.0, "step": 274 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 405.0, "completions/max_terminated_length": 405.0, "completions/mean_length": 120.36328125, "completions/mean_terminated_length": 120.36328125, "completions/min_length": 52.0, "completions/min_terminated_length": 52.0, "epoch": 0.88, "grad_norm": 0.21815632283687592, "learning_rate": 1.387900355871886e-07, "loss": 0.0057, "num_tokens": 194629323.0, "reward": 1.060546875, "reward_std": 0.08364100009202957, "rewards/accuracy_reward_conf_tag": 0.560546875, "rewards/format_reward_conf_tag": 1.0, "step": 275 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 540.0, "completions/max_terminated_length": 540.0, "completions/mean_length": 130.677734375, "completions/mean_terminated_length": 130.677734375, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.8832, "grad_norm": 0.20837943255901337, "learning_rate": 1.3523131672597866e-07, "loss": -0.002, "num_tokens": 195334222.0, "reward": 0.9306640625, "reward_std": 0.09304757416248322, "rewards/accuracy_reward_conf_tag": 0.431640625, "rewards/format_reward_conf_tag": 0.998046875, "step": 276 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 399.0, "completions/max_terminated_length": 399.0, "completions/mean_length": 137.671875, "completions/mean_terminated_length": 137.671875, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.8864, "grad_norm": 0.2613351345062256, "learning_rate": 1.3167259786476866e-07, "loss": 0.0003, "num_tokens": 196065062.0, "reward": 1.08203125, "reward_std": 0.10106877237558365, "rewards/accuracy_reward_conf_tag": 0.58203125, "rewards/format_reward_conf_tag": 1.0, "step": 277 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 407.0, "completions/max_terminated_length": 407.0, "completions/mean_length": 137.93359375, "completions/mean_terminated_length": 137.93359375, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 0.8896, "grad_norm": 0.3071613013744354, "learning_rate": 1.2811387900355872e-07, "loss": 0.0031, "num_tokens": 196782428.0, "reward": 1.015625, "reward_std": 0.16346396505832672, "rewards/accuracy_reward_conf_tag": 0.515625, "rewards/format_reward_conf_tag": 1.0, "step": 278 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 607.0, "completions/max_terminated_length": 607.0, "completions/mean_length": 137.916015625, "completions/mean_terminated_length": 137.916015625, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.8928, "grad_norm": 0.21012061834335327, "learning_rate": 1.2455516014234875e-07, "loss": 0.0021, "num_tokens": 197517377.0, "reward": 1.0166015625, "reward_std": 0.1000141054391861, "rewards/accuracy_reward_conf_tag": 0.517578125, "rewards/format_reward_conf_tag": 0.998046875, "step": 279 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 524.0, "completions/max_terminated_length": 524.0, "completions/mean_length": 135.2890625, "completions/mean_terminated_length": 135.2890625, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 0.896, "grad_norm": 0.2502855062484741, "learning_rate": 1.2099644128113878e-07, "loss": -0.0002, "num_tokens": 198222629.0, "reward": 1.017578125, "reward_std": 0.11737515777349472, "rewards/accuracy_reward_conf_tag": 0.517578125, "rewards/format_reward_conf_tag": 1.0, "step": 280 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 758.0, "completions/max_terminated_length": 758.0, "completions/mean_length": 134.283203125, "completions/mean_terminated_length": 134.283203125, "completions/min_length": 64.0, "completions/min_terminated_length": 64.0, "epoch": 0.8992, "grad_norm": 0.27686265110969543, "learning_rate": 1.1743772241992882e-07, "loss": -0.0033, "num_tokens": 198921430.0, "reward": 0.966796875, "reward_std": 0.11224833130836487, "rewards/accuracy_reward_conf_tag": 0.466796875, "rewards/format_reward_conf_tag": 1.0, "step": 281 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 378.0, "completions/max_terminated_length": 378.0, "completions/mean_length": 136.4140625, "completions/mean_terminated_length": 136.4140625, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.9024, "grad_norm": 0.2457011491060257, "learning_rate": 1.1387900355871885e-07, "loss": 0.0056, "num_tokens": 199636650.0, "reward": 0.9921875, "reward_std": 0.11264772713184357, "rewards/accuracy_reward_conf_tag": 0.4921875, "rewards/format_reward_conf_tag": 1.0, "step": 282 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 504.0, "completions/max_terminated_length": 504.0, "completions/mean_length": 138.45703125, "completions/mean_terminated_length": 138.45703125, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.9056, "grad_norm": 0.2855152189731598, "learning_rate": 1.103202846975089e-07, "loss": 0.0032, "num_tokens": 200351876.0, "reward": 1.1083984375, "reward_std": 0.15104307234287262, "rewards/accuracy_reward_conf_tag": 0.609375, "rewards/format_reward_conf_tag": 0.998046875, "step": 283 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 729.0, "completions/max_terminated_length": 729.0, "completions/mean_length": 143.796875, "completions/mean_terminated_length": 143.796875, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.9088, "grad_norm": 0.28002673387527466, "learning_rate": 1.0676156583629893e-07, "loss": 0.0082, "num_tokens": 201082116.0, "reward": 0.9921875, "reward_std": 0.151961088180542, "rewards/accuracy_reward_conf_tag": 0.4921875, "rewards/format_reward_conf_tag": 1.0, "step": 284 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 337.0, "completions/max_terminated_length": 337.0, "completions/mean_length": 130.859375, "completions/mean_terminated_length": 130.859375, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.912, "grad_norm": 0.22129687666893005, "learning_rate": 1.0320284697508896e-07, "loss": -0.0044, "num_tokens": 201803196.0, "reward": 0.9873046875, "reward_std": 0.10054770857095718, "rewards/accuracy_reward_conf_tag": 0.48828125, "rewards/format_reward_conf_tag": 0.998046875, "step": 285 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 450.0, "completions/max_terminated_length": 450.0, "completions/mean_length": 138.8359375, "completions/mean_terminated_length": 138.8359375, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 0.9152, "grad_norm": 0.2536018490791321, "learning_rate": 9.9644128113879e-08, "loss": -0.0031, "num_tokens": 202523944.0, "reward": 0.994140625, "reward_std": 0.1057380810379982, "rewards/accuracy_reward_conf_tag": 0.494140625, "rewards/format_reward_conf_tag": 1.0, "step": 286 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 570.0, "completions/max_terminated_length": 570.0, "completions/mean_length": 139.84375, "completions/mean_terminated_length": 139.84375, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 0.9184, "grad_norm": 0.20366545021533966, "learning_rate": 9.608540925266903e-08, "loss": -0.0026, "num_tokens": 203252656.0, "reward": 1.080078125, "reward_std": 0.060823142528533936, "rewards/accuracy_reward_conf_tag": 0.580078125, "rewards/format_reward_conf_tag": 1.0, "step": 287 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 532.0, "completions/max_terminated_length": 532.0, "completions/mean_length": 137.982421875, "completions/mean_terminated_length": 137.982421875, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.9216, "grad_norm": 0.2818716764450073, "learning_rate": 9.252669039145906e-08, "loss": 0.0007, "num_tokens": 203976887.0, "reward": 1.0224609375, "reward_std": 0.10212098807096481, "rewards/accuracy_reward_conf_tag": 0.5234375, "rewards/format_reward_conf_tag": 0.998046875, "step": 288 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 409.0, "completions/max_terminated_length": 409.0, "completions/mean_length": 139.19921875, "completions/mean_terminated_length": 139.19921875, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.9248, "grad_norm": 0.24427424371242523, "learning_rate": 8.896797153024912e-08, "loss": 0.0003, "num_tokens": 204663757.0, "reward": 0.96875, "reward_std": 0.07878133654594421, "rewards/accuracy_reward_conf_tag": 0.46875, "rewards/format_reward_conf_tag": 1.0, "step": 289 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 573.0, "completions/max_terminated_length": 573.0, "completions/mean_length": 137.80859375, "completions/mean_terminated_length": 137.80859375, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.928, "grad_norm": 0.2052888423204422, "learning_rate": 8.540925266903915e-08, "loss": 0.002, "num_tokens": 205382699.0, "reward": 1.029296875, "reward_std": 0.07995961606502533, "rewards/accuracy_reward_conf_tag": 0.529296875, "rewards/format_reward_conf_tag": 1.0, "step": 290 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 479.0, "completions/max_terminated_length": 479.0, "completions/mean_length": 131.904296875, "completions/mean_terminated_length": 131.904296875, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.9312, "grad_norm": 0.31603914499282837, "learning_rate": 8.185053380782917e-08, "loss": 0.0013, "num_tokens": 206098186.0, "reward": 0.93359375, "reward_std": 0.14637748897075653, "rewards/accuracy_reward_conf_tag": 0.43359375, "rewards/format_reward_conf_tag": 1.0, "step": 291 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 355.0, "completions/max_terminated_length": 355.0, "completions/mean_length": 137.095703125, "completions/mean_terminated_length": 137.095703125, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.9344, "grad_norm": 0.24164921045303345, "learning_rate": 7.829181494661922e-08, "loss": 0.0022, "num_tokens": 206839555.0, "reward": 1.033203125, "reward_std": 0.09001900255680084, "rewards/accuracy_reward_conf_tag": 0.533203125, "rewards/format_reward_conf_tag": 1.0, "step": 292 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/max_terminated_length": 461.0, "completions/mean_length": 135.4296875, "completions/mean_terminated_length": 135.4296875, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.9376, "grad_norm": 0.23846471309661865, "learning_rate": 7.473309608540925e-08, "loss": 0.0, "num_tokens": 207519159.0, "reward": 0.9951171875, "reward_std": 0.09475381672382355, "rewards/accuracy_reward_conf_tag": 0.49609375, "rewards/format_reward_conf_tag": 0.998046875, "step": 293 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 530.0, "completions/max_terminated_length": 530.0, "completions/mean_length": 142.353515625, "completions/mean_terminated_length": 142.353515625, "completions/min_length": 65.0, "completions/min_terminated_length": 65.0, "epoch": 0.9408, "grad_norm": 0.27026620507240295, "learning_rate": 7.117437722419929e-08, "loss": 0.0068, "num_tokens": 208250244.0, "reward": 1.068359375, "reward_std": 0.16071240603923798, "rewards/accuracy_reward_conf_tag": 0.568359375, "rewards/format_reward_conf_tag": 1.0, "step": 294 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 576.0, "completions/max_terminated_length": 576.0, "completions/mean_length": 135.0390625, "completions/mean_terminated_length": 135.0390625, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 0.944, "grad_norm": 0.3178434669971466, "learning_rate": 6.761565836298933e-08, "loss": -0.0002, "num_tokens": 208943288.0, "reward": 1.068359375, "reward_std": 0.11987947672605515, "rewards/accuracy_reward_conf_tag": 0.568359375, "rewards/format_reward_conf_tag": 1.0, "step": 295 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 464.0, "completions/max_terminated_length": 464.0, "completions/mean_length": 142.169921875, "completions/mean_terminated_length": 142.169921875, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.9472, "grad_norm": 0.2595904469490051, "learning_rate": 6.405693950177936e-08, "loss": -0.0022, "num_tokens": 209643231.0, "reward": 1.0078125, "reward_std": 0.07411079853773117, "rewards/accuracy_reward_conf_tag": 0.5078125, "rewards/format_reward_conf_tag": 1.0, "step": 296 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 434.0, "completions/max_terminated_length": 434.0, "completions/mean_length": 132.931640625, "completions/mean_terminated_length": 132.931640625, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.9504, "grad_norm": 0.33172011375427246, "learning_rate": 6.049822064056939e-08, "loss": -0.0007, "num_tokens": 210365900.0, "reward": 1.041015625, "reward_std": 0.11961549520492554, "rewards/accuracy_reward_conf_tag": 0.541015625, "rewards/format_reward_conf_tag": 1.0, "step": 297 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 465.0, "completions/max_terminated_length": 465.0, "completions/mean_length": 138.744140625, "completions/mean_terminated_length": 138.744140625, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 0.9536, "grad_norm": 0.43559926748275757, "learning_rate": 5.6939501779359424e-08, "loss": 0.0, "num_tokens": 211094905.0, "reward": 1.033203125, "reward_std": 0.11225028336048126, "rewards/accuracy_reward_conf_tag": 0.533203125, "rewards/format_reward_conf_tag": 1.0, "step": 298 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 377.0, "completions/max_terminated_length": 377.0, "completions/mean_length": 141.1171875, "completions/mean_terminated_length": 141.1171875, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 0.9568, "grad_norm": 0.2028070092201233, "learning_rate": 5.3380782918149466e-08, "loss": -0.0035, "num_tokens": 211801477.0, "reward": 1.021484375, "reward_std": 0.06832009553909302, "rewards/accuracy_reward_conf_tag": 0.521484375, "rewards/format_reward_conf_tag": 1.0, "step": 299 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 518.0, "completions/max_terminated_length": 518.0, "completions/mean_length": 141.8984375, "completions/mean_terminated_length": 141.8984375, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 0.96, "grad_norm": 0.21485182642936707, "learning_rate": 4.98220640569395e-08, "loss": -0.0005, "num_tokens": 212502841.0, "reward": 1.001953125, "reward_std": 0.07397978752851486, "rewards/accuracy_reward_conf_tag": 0.501953125, "rewards/format_reward_conf_tag": 1.0, "step": 300 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 559.0, "completions/max_terminated_length": 559.0, "completions/mean_length": 144.912109375, "completions/mean_terminated_length": 144.912109375, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.9632, "grad_norm": 0.26152896881103516, "learning_rate": 4.626334519572953e-08, "loss": 0.0046, "num_tokens": 213212836.0, "reward": 0.978515625, "reward_std": 0.13164877891540527, "rewards/accuracy_reward_conf_tag": 0.478515625, "rewards/format_reward_conf_tag": 1.0, "step": 301 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 438.0, "completions/max_terminated_length": 438.0, "completions/mean_length": 130.357421875, "completions/mean_terminated_length": 130.357421875, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.9664, "grad_norm": 0.24077603220939636, "learning_rate": 4.270462633451957e-08, "loss": 0.0002, "num_tokens": 213912603.0, "reward": 1.0341796875, "reward_std": 0.08963698148727417, "rewards/accuracy_reward_conf_tag": 0.53515625, "rewards/format_reward_conf_tag": 0.998046875, "step": 302 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1101.0, "completions/max_terminated_length": 1101.0, "completions/mean_length": 142.625, "completions/mean_terminated_length": 142.625, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 0.9696, "grad_norm": 0.2666417062282562, "learning_rate": 3.914590747330961e-08, "loss": -0.0014, "num_tokens": 214628307.0, "reward": 1.056640625, "reward_std": 0.13808491826057434, "rewards/accuracy_reward_conf_tag": 0.556640625, "rewards/format_reward_conf_tag": 1.0, "step": 303 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 1176.0, "completions/max_terminated_length": 1176.0, "completions/mean_length": 143.8515625, "completions/mean_terminated_length": 144.13307189941406, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.9728, "grad_norm": 0.26070520281791687, "learning_rate": 3.5587188612099644e-08, "loss": -0.0014, "num_tokens": 215345047.0, "reward": 1.0087890625, "reward_std": 0.07727016508579254, "rewards/accuracy_reward_conf_tag": 0.51171875, "rewards/format_reward_conf_tag": 0.994140625, "step": 304 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 451.0, "completions/max_terminated_length": 451.0, "completions/mean_length": 139.166015625, "completions/mean_terminated_length": 139.4383544921875, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.976, "grad_norm": 0.30899903178215027, "learning_rate": 3.202846975088968e-08, "loss": 0.0039, "num_tokens": 216044764.0, "reward": 1.0908203125, "reward_std": 0.14591380953788757, "rewards/accuracy_reward_conf_tag": 0.591796875, "rewards/format_reward_conf_tag": 0.998046875, "step": 305 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 438.0, "completions/max_terminated_length": 438.0, "completions/mean_length": 131.318359375, "completions/mean_terminated_length": 131.318359375, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.9792, "grad_norm": 0.37437206506729126, "learning_rate": 2.8469750889679712e-08, "loss": -0.0001, "num_tokens": 216762751.0, "reward": 0.95703125, "reward_std": 0.11014340817928314, "rewards/accuracy_reward_conf_tag": 0.45703125, "rewards/format_reward_conf_tag": 1.0, "step": 306 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 462.0, "completions/max_terminated_length": 462.0, "completions/mean_length": 135.419921875, "completions/mean_terminated_length": 135.419921875, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.9824, "grad_norm": 0.19477730989456177, "learning_rate": 2.491103202846975e-08, "loss": -0.0008, "num_tokens": 217495214.0, "reward": 1.072265625, "reward_std": 0.06549368053674698, "rewards/accuracy_reward_conf_tag": 0.572265625, "rewards/format_reward_conf_tag": 1.0, "step": 307 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 538.0, "completions/max_terminated_length": 538.0, "completions/mean_length": 152.345703125, "completions/mean_terminated_length": 152.345703125, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.9856, "grad_norm": 0.2829309105873108, "learning_rate": 2.1352313167259786e-08, "loss": 0.003, "num_tokens": 218226655.0, "reward": 0.990234375, "reward_std": 0.15097317099571228, "rewards/accuracy_reward_conf_tag": 0.490234375, "rewards/format_reward_conf_tag": 1.0, "step": 308 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 413.0, "completions/max_terminated_length": 413.0, "completions/mean_length": 138.583984375, "completions/mean_terminated_length": 138.583984375, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 0.9888, "grad_norm": 0.19521859288215637, "learning_rate": 1.7793594306049822e-08, "loss": -0.0003, "num_tokens": 218934202.0, "reward": 0.982421875, "reward_std": 0.08219750225543976, "rewards/accuracy_reward_conf_tag": 0.482421875, "rewards/format_reward_conf_tag": 1.0, "step": 309 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 418.0, "completions/max_terminated_length": 418.0, "completions/mean_length": 137.216796875, "completions/mean_terminated_length": 137.216796875, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.992, "grad_norm": 0.2574136555194855, "learning_rate": 1.4234875444839856e-08, "loss": 0.0034, "num_tokens": 219650329.0, "reward": 1.0517578125, "reward_std": 0.10580358654260635, "rewards/accuracy_reward_conf_tag": 0.552734375, "rewards/format_reward_conf_tag": 0.998046875, "step": 310 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 431.0, "completions/max_terminated_length": 431.0, "completions/mean_length": 134.279296875, "completions/mean_terminated_length": 134.279296875, "completions/min_length": 65.0, "completions/min_terminated_length": 65.0, "epoch": 0.9952, "grad_norm": 0.2528088390827179, "learning_rate": 1.0676156583629893e-08, "loss": 0.0028, "num_tokens": 220359216.0, "reward": 1.02734375, "reward_std": 0.1255941092967987, "rewards/accuracy_reward_conf_tag": 0.52734375, "rewards/format_reward_conf_tag": 1.0, "step": 311 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.0, "completions/max_terminated_length": 239.0, "completions/mean_length": 120.03125, "completions/mean_terminated_length": 120.03125, "completions/min_length": 70.0, "completions/min_terminated_length": 70.0, "epoch": 0.9984, "grad_norm": 0.2555108964443207, "learning_rate": 7.117437722419928e-09, "loss": 0.0029, "num_tokens": 221078914.0, "reward": 1.013671875, "reward_std": 0.1146785318851471, "rewards/accuracy_reward_conf_tag": 0.513671875, "rewards/format_reward_conf_tag": 1.0, "step": 312 }, { "epoch": 0.9984, "step": 312, "total_flos": 0.0, "train_loss": 0.001158178178882689, "train_runtime": 16424.5215, "train_samples_per_second": 1.218, "train_steps_per_second": 0.019 } ], "logging_steps": 1, "max_steps": 313, "num_input_tokens_seen": 221078914, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }