{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.32, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.26171875, "completions/max_length": 1504.0, "completions/max_terminated_length": 1504.0, "completions/mean_length": 303.0, "completions/mean_terminated_length": 410.4126892089844, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0016, "grad_norm": 0.005753066390752792, "learning_rate": 1e-07, "loss": -0.0217, "num_tokens": 392512.0, "reward": 0.04026263207197189, "reward_std": 0.09501844644546509, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.018024610355496407, "rewards/confidence_one_or_zero": 0.0625, "rewards/format_reward": 0.0625, "rewards/mean_confidence_reward": 0.26346302032470703, "step": 1 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.30859375, "completions/max_length": 1514.0, "completions/max_terminated_length": 1514.0, "completions/mean_length": 341.23828125, "completions/mean_terminated_length": 493.5423889160156, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.006315574515610933, "learning_rate": 2e-07, "loss": -0.0254, "num_tokens": 816933.0, "reward": 0.07134318351745605, "reward_std": 0.16403131186962128, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.029404297471046448, "rewards/confidence_one_or_zero": 0.07421875, "rewards/format_reward": 0.11328125, "rewards/mean_confidence_reward": 0.3325389623641968, "step": 2 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.2421875, "completions/max_length": 1514.0, "completions/max_terminated_length": 1514.0, "completions/mean_length": 321.2421875, "completions/mean_terminated_length": 423.9071960449219, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0048, "grad_norm": 0.005065033212304115, "learning_rate": 3e-07, "loss": -0.0145, "num_tokens": 1229651.0, "reward": 0.047541044652462006, "reward_std": 0.10478618741035461, "rewards/accuracy_reward": 0.00390625, "rewards/brier_reward": 0.02086273394525051, "rewards/confidence_one_or_zero": 0.06640625, "rewards/format_reward": 0.0703125, "rewards/mean_confidence_reward": 0.23426908254623413, "step": 3 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.27734375, "completions/max_length": 1514.0, "completions/max_terminated_length": 1514.0, "completions/mean_length": 366.34765625, "completions/mean_terminated_length": 506.9459533691406, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.006735912058502436, "learning_rate": 4e-07, "loss": -0.0488, "num_tokens": 1666212.0, "reward": 0.04722738265991211, "reward_std": 0.10730428993701935, "rewards/accuracy_reward": 0.00390625, "rewards/brier_reward": 0.020235449075698853, "rewards/confidence_one_or_zero": 0.04296875, "rewards/format_reward": 0.0703125, "rewards/mean_confidence_reward": 0.23549126088619232, "step": 4 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25390625, "completions/max_length": 1522.0, "completions/max_terminated_length": 1522.0, "completions/mean_length": 337.83203125, "completions/mean_terminated_length": 452.8010559082031, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008, "grad_norm": 0.005995773710310459, "learning_rate": 5e-07, "loss": -0.0337, "num_tokens": 2104033.0, "reward": 0.05744408816099167, "reward_std": 0.13524429500102997, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.017231373116374016, "rewards/confidence_one_or_zero": 0.0546875, "rewards/format_reward": 0.09765625, "rewards/mean_confidence_reward": 0.22458529472351074, "step": 5 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.30078125, "completions/max_length": 1535.0, "completions/max_terminated_length": 1535.0, "completions/mean_length": 308.78515625, "completions/mean_terminated_length": 441.614501953125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.005563064943999052, "learning_rate": 6e-07, "loss": -0.0162, "num_tokens": 2526522.0, "reward": 0.03998662531375885, "reward_std": 0.10862339287996292, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.017472656443715096, "rewards/confidence_one_or_zero": 0.078125, "rewards/format_reward": 0.0625, "rewards/mean_confidence_reward": 0.21755936741828918, "step": 6 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.26953125, "completions/max_length": 1533.0, "completions/max_terminated_length": 1533.0, "completions/mean_length": 319.87890625, "completions/mean_terminated_length": 437.9090881347656, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0112, "grad_norm": 0.005541341844946146, "learning_rate": 7e-07, "loss": -0.0218, "num_tokens": 2943667.0, "reward": 0.04273135960102081, "reward_std": 0.09829111397266388, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.01905578374862671, "rewards/confidence_one_or_zero": 0.0625, "rewards/format_reward": 0.06640625, "rewards/mean_confidence_reward": 0.2791077792644501, "step": 7 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.28515625, "completions/max_length": 1501.0, "completions/max_terminated_length": 1501.0, "completions/mean_length": 291.1328125, "completions/mean_terminated_length": 407.26776123046875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.00616063317283988, "learning_rate": 8e-07, "loss": -0.0247, "num_tokens": 3338573.0, "reward": 0.05297835171222687, "reward_std": 0.12713265419006348, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.020018475130200386, "rewards/confidence_one_or_zero": 0.07421875, "rewards/format_reward": 0.0859375, "rewards/mean_confidence_reward": 0.2880028486251831, "step": 8 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.30078125, "completions/max_length": 1511.0, "completions/max_terminated_length": 1511.0, "completions/mean_length": 331.2890625, "completions/mean_terminated_length": 473.7988586425781, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0144, "grad_norm": 0.004324339330196381, "learning_rate": 9e-07, "loss": -0.0157, "num_tokens": 3772351.0, "reward": 0.03322942554950714, "reward_std": 0.08382241427898407, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.01567695289850235, "rewards/confidence_one_or_zero": 0.078125, "rewards/format_reward": 0.05078125, "rewards/mean_confidence_reward": 0.24392913281917572, "step": 9 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.27734375, "completions/max_length": 1512.0, "completions/max_terminated_length": 1512.0, "completions/mean_length": 323.51953125, "completions/mean_terminated_length": 447.68109130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.008059307001531124, "learning_rate": 1e-06, "loss": -0.0296, "num_tokens": 4190820.0, "reward": 0.04703688248991966, "reward_std": 0.11515301465988159, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.01594802923500538, "rewards/confidence_one_or_zero": 0.07421875, "rewards/format_reward": 0.078125, "rewards/mean_confidence_reward": 0.29039186239242554, "step": 10 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.30078125, "completions/max_length": 1532.0, "completions/max_terminated_length": 1532.0, "completions/mean_length": 283.08203125, "completions/mean_terminated_length": 404.854736328125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0176, "grad_norm": 0.0061668287962675095, "learning_rate": 1e-06, "loss": -0.0361, "num_tokens": 4610681.0, "reward": 0.0702010989189148, "reward_std": 0.14318351447582245, "rewards/accuracy_reward": 0.00390625, "rewards/brier_reward": 0.031026437878608704, "rewards/confidence_one_or_zero": 0.0625, "rewards/format_reward": 0.10546875, "rewards/mean_confidence_reward": 0.3190605640411377, "step": 11 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.29296875, "completions/max_length": 1497.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 319.8203125, "completions/mean_terminated_length": 452.3425598144531, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 0.00830077100545168, "learning_rate": 1e-06, "loss": -0.0344, "num_tokens": 5030875.0, "reward": 0.06890039145946503, "reward_std": 0.16091418266296387, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.03233125060796738, "rewards/confidence_one_or_zero": 0.0859375, "rewards/format_reward": 0.10546875, "rewards/mean_confidence_reward": 0.30535900592803955, "step": 12 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.2890625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1536.0, "completions/mean_length": 342.921875, "completions/mean_terminated_length": 482.3516540527344, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0208, "grad_norm": 0.006423098035156727, "learning_rate": 1e-06, "loss": -0.0332, "num_tokens": 5467367.0, "reward": 0.06870196759700775, "reward_std": 0.16580435633659363, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.03193442523479462, "rewards/confidence_one_or_zero": 0.07421875, "rewards/format_reward": 0.10546875, "rewards/mean_confidence_reward": 0.30217790603637695, "step": 13 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.2890625, "completions/max_length": 1525.0, "completions/max_terminated_length": 1525.0, "completions/mean_length": 292.16015625, "completions/mean_terminated_length": 410.9505615234375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0224, "grad_norm": 0.0063196225091814995, "learning_rate": 1e-06, "loss": 0.0062, "num_tokens": 5889400.0, "reward": 0.06891889870166779, "reward_std": 0.1583699733018875, "rewards/accuracy_reward": 0.0078125, "rewards/brier_reward": 0.032368291169404984, "rewards/confidence_one_or_zero": 0.08203125, "rewards/format_reward": 0.09765625, "rewards/mean_confidence_reward": 0.29947715997695923, "step": 14 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.26953125, "completions/max_length": 1534.0, "completions/max_terminated_length": 1534.0, "completions/mean_length": 370.01171875, "completions/mean_terminated_length": 506.5401306152344, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.024, "grad_norm": 0.007168356329202652, "learning_rate": 1e-06, "loss": -0.0267, "num_tokens": 6327707.0, "reward": 0.09601341187953949, "reward_std": 0.19943499565124512, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.03968217968940735, "rewards/confidence_one_or_zero": 0.078125, "rewards/format_reward": 0.15234375, "rewards/mean_confidence_reward": 0.3757632374763489, "step": 15 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.234375, "completions/max_length": 1535.0, "completions/max_terminated_length": 1535.0, "completions/mean_length": 409.3203125, "completions/mean_terminated_length": 534.6224365234375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0256, "grad_norm": 0.007660416420549154, "learning_rate": 1e-06, "loss": -0.0294, "num_tokens": 6791245.0, "reward": 0.09660420566797256, "reward_std": 0.19251175224781036, "rewards/accuracy_reward": 0.0078125, "rewards/brier_reward": 0.040863730013370514, "rewards/confidence_one_or_zero": 0.09375, "rewards/format_reward": 0.14453125, "rewards/mean_confidence_reward": 0.37647533416748047, "step": 16 }, { "calib/answer_extract_rate": 0.00390625, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.28125, "completions/max_length": 1452.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 287.19140625, "completions/mean_terminated_length": 399.5706481933594, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0272, "grad_norm": 0.009069916792213917, "learning_rate": 1e-06, "loss": -0.0313, "num_tokens": 7177862.0, "reward": 0.08393032848834991, "reward_std": 0.1837606132030487, "rewards/accuracy_reward": 0.00390625, "rewards/brier_reward": 0.038953568786382675, "rewards/confidence_one_or_zero": 0.07421875, "rewards/format_reward": 0.125, "rewards/mean_confidence_reward": 0.35352301597595215, "step": 17 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.2734375, "completions/max_length": 1489.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 332.26171875, "completions/mean_terminated_length": 457.30645751953125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0288, "grad_norm": 0.007633299566805363, "learning_rate": 1e-06, "loss": -0.0315, "num_tokens": 7603041.0, "reward": 0.11554718762636185, "reward_std": 0.23108074069023132, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.047499630600214005, "rewards/confidence_one_or_zero": 0.08984375, "rewards/format_reward": 0.18359375, "rewards/mean_confidence_reward": 0.4107900559902191, "step": 18 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.265625, "completions/max_length": 1534.0, "completions/max_terminated_length": 1534.0, "completions/mean_length": 359.8671875, "completions/mean_terminated_length": 490.0318908691406, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0304, "grad_norm": 0.009029170498251915, "learning_rate": 1e-06, "loss": -0.028, "num_tokens": 8038847.0, "reward": 0.11090146005153656, "reward_std": 0.21172785758972168, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.04211445152759552, "rewards/confidence_one_or_zero": 0.09375, "rewards/format_reward": 0.1796875, "rewards/mean_confidence_reward": 0.3890570402145386, "step": 19 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.29296875, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 312.59375, "completions/mean_terminated_length": 442.1215515136719, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 0.00870381947606802, "learning_rate": 1e-06, "loss": -0.0515, "num_tokens": 8462351.0, "reward": 0.13207530975341797, "reward_std": 0.2408745288848877, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.06102453172206879, "rewards/confidence_one_or_zero": 0.09765625, "rewards/format_reward": 0.203125, "rewards/mean_confidence_reward": 0.44412317872047424, "step": 20 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1536.0, "completions/max_terminated_length": 1536.0, "completions/mean_length": 327.46484375, "completions/mean_terminated_length": 436.61981201171875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0336, "grad_norm": 0.008368119597434998, "learning_rate": 1e-06, "loss": -0.0343, "num_tokens": 8889134.0, "reward": 0.15713617205619812, "reward_std": 0.26373666524887085, "rewards/accuracy_reward": 0.00390625, "rewards/brier_reward": 0.07208360731601715, "rewards/confidence_one_or_zero": 0.1328125, "rewards/format_reward": 0.23828125, "rewards/mean_confidence_reward": 0.47515082359313965, "step": 21 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.234375, "completions/max_length": 1532.0, "completions/max_terminated_length": 1532.0, "completions/mean_length": 321.921875, "completions/mean_terminated_length": 420.4693908691406, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0352, "grad_norm": 0.009171784855425358, "learning_rate": 1e-06, "loss": -0.0399, "num_tokens": 9311170.0, "reward": 0.15757590532302856, "reward_std": 0.2669645845890045, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.07296313345432281, "rewards/confidence_one_or_zero": 0.12890625, "rewards/format_reward": 0.2421875, "rewards/mean_confidence_reward": 0.465215802192688, "step": 22 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.00390625, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.00390625, "calib/step_conf_rate": 0.00390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1511.0, "completions/max_terminated_length": 1511.0, "completions/mean_length": 338.9375, "completions/mean_terminated_length": 451.91668701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0368, "grad_norm": 0.009062502533197403, "learning_rate": 1e-06, "loss": -0.0839, "num_tokens": 9743634.0, "reward": 0.1568487286567688, "reward_std": 0.2625208795070648, "rewards/accuracy_reward": 0.00390625, "rewards/brier_reward": 0.0558837354183197, "rewards/confidence_one_or_zero": 0.10546875, "rewards/format_reward": 0.25390625, "rewards/mean_confidence_reward": 0.5000218152999878, "step": 23 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.17578125, "completions/max_length": 1535.0, "completions/max_terminated_length": 1535.0, "completions/mean_length": 404.09765625, "completions/mean_terminated_length": 490.2796325683594, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0384, "grad_norm": 0.010168269276618958, "learning_rate": 1e-06, "loss": -0.0685, "num_tokens": 10181659.0, "reward": 0.2268688827753067, "reward_std": 0.33130136132240295, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.12170526385307312, "rewards/confidence_one_or_zero": 0.1171875, "rewards/format_reward": 0.33203125, "rewards/mean_confidence_reward": 0.5028989911079407, "step": 24 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 333.47265625, "completions/mean_terminated_length": 410.4278869628906, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.04, "grad_norm": 0.009031484834849834, "learning_rate": 1e-06, "loss": -0.0465, "num_tokens": 10615460.0, "reward": 0.22537299990653992, "reward_std": 0.2971491515636444, "rewards/accuracy_reward": 0.00390625, "rewards/brier_reward": 0.09527580440044403, "rewards/confidence_one_or_zero": 0.14453125, "rewards/format_reward": 0.3515625, "rewards/mean_confidence_reward": 0.5729029178619385, "step": 25 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15234375, "completions/max_length": 1519.0, "completions/max_terminated_length": 1519.0, "completions/mean_length": 378.61328125, "completions/mean_terminated_length": 446.65899658203125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0416, "grad_norm": 0.008382085710763931, "learning_rate": 1e-06, "loss": -0.0364, "num_tokens": 11065961.0, "reward": 0.2754327058792114, "reward_std": 0.3278188109397888, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.12898893654346466, "rewards/confidence_one_or_zero": 0.1171875, "rewards/format_reward": 0.421875, "rewards/mean_confidence_reward": 0.5870228409767151, "step": 26 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.21484375, "completions/max_length": 1437.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 353.63671875, "completions/mean_terminated_length": 450.4029846191406, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0432, "grad_norm": 0.008282415568828583, "learning_rate": 1e-06, "loss": -0.0669, "num_tokens": 11489188.0, "reward": 0.2620762586593628, "reward_std": 0.3158631920814514, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.11008860170841217, "rewards/confidence_one_or_zero": 0.11328125, "rewards/format_reward": 0.4140625, "rewards/mean_confidence_reward": 0.6116750836372375, "step": 27 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.171875, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 368.2109375, "completions/mean_terminated_length": 444.632080078125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0448, "grad_norm": 0.010257712565362453, "learning_rate": 1e-06, "loss": -0.0683, "num_tokens": 11918138.0, "reward": 0.30954355001449585, "reward_std": 0.33774739503860474, "rewards/accuracy_reward": 0.0078125, "rewards/brier_reward": 0.13471046090126038, "rewards/confidence_one_or_zero": 0.171875, "rewards/format_reward": 0.4765625, "rewards/mean_confidence_reward": 0.654219388961792, "step": 28 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.14453125, "completions/max_length": 1456.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 322.359375, "completions/mean_terminated_length": 376.8218994140625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0464, "grad_norm": 0.00957749504595995, "learning_rate": 1e-06, "loss": -0.0394, "num_tokens": 12349814.0, "reward": 0.29460030794143677, "reward_std": 0.32603347301483154, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.13607406616210938, "rewards/confidence_one_or_zero": 0.15234375, "rewards/format_reward": 0.453125, "rewards/mean_confidence_reward": 0.5973583459854126, "step": 29 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 1522.0, "completions/max_terminated_length": 1522.0, "completions/mean_length": 389.828125, "completions/mean_terminated_length": 435.7904052734375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.048, "grad_norm": 0.007783412467688322, "learning_rate": 1e-06, "loss": -0.0552, "num_tokens": 12779682.0, "reward": 0.3716452419757843, "reward_std": 0.3282296657562256, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.1612575352191925, "rewards/confidence_one_or_zero": 0.140625, "rewards/format_reward": 0.58203125, "rewards/mean_confidence_reward": 0.7023523449897766, "step": 30 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 348.17578125, "completions/mean_terminated_length": 401.5, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0496, "grad_norm": 0.009112906642258167, "learning_rate": 1e-06, "loss": -0.0457, "num_tokens": 13204127.0, "reward": 0.3648914098739624, "reward_std": 0.3676462769508362, "rewards/accuracy_reward": 0.015625, "rewards/brier_reward": 0.17900002002716064, "rewards/confidence_one_or_zero": 0.12890625, "rewards/format_reward": 0.53515625, "rewards/mean_confidence_reward": 0.6544030904769897, "step": 31 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 1477.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 291.328125, "completions/mean_terminated_length": 339.0, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0512, "grad_norm": 0.010887404903769493, "learning_rate": 1e-06, "loss": -0.0865, "num_tokens": 13605243.0, "reward": 0.413626492023468, "reward_std": 0.35561421513557434, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.2256888449192047, "rewards/confidence_one_or_zero": 0.14453125, "rewards/format_reward": 0.6015625, "rewards/mean_confidence_reward": 0.6688516139984131, "step": 32 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 1473.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 350.7734375, "completions/mean_terminated_length": 385.3991394042969, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0528, "grad_norm": 0.008478617295622826, "learning_rate": 1e-06, "loss": -0.0285, "num_tokens": 14036193.0, "reward": 0.4472588896751404, "reward_std": 0.32160085439682007, "rewards/accuracy_reward": 0.0078125, "rewards/brier_reward": 0.21873486042022705, "rewards/confidence_one_or_zero": 0.1484375, "rewards/format_reward": 0.66796875, "rewards/mean_confidence_reward": 0.6860401630401611, "step": 33 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 1349.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 311.640625, "completions/mean_terminated_length": 340.940185546875, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.0544, "grad_norm": 0.009975390508770943, "learning_rate": 1e-06, "loss": -0.0722, "num_tokens": 14457565.0, "reward": 0.483206570148468, "reward_std": 0.3413535952568054, "rewards/accuracy_reward": 0.00390625, "rewards/brier_reward": 0.25547391176223755, "rewards/confidence_one_or_zero": 0.171875, "rewards/format_reward": 0.70703125, "rewards/mean_confidence_reward": 0.6938413381576538, "step": 34 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 1353.0, "completions/max_terminated_length": 1353.0, "completions/mean_length": 342.3984375, "completions/mean_terminated_length": 363.7095642089844, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.056, "grad_norm": 0.010337012819945812, "learning_rate": 1e-06, "loss": -0.0628, "num_tokens": 14890075.0, "reward": 0.5229313373565674, "reward_std": 0.34047645330429077, "rewards/accuracy_reward": 0.01953125, "rewards/brier_reward": 0.28414231538772583, "rewards/confidence_one_or_zero": 0.14453125, "rewards/format_reward": 0.7421875, "rewards/mean_confidence_reward": 0.6841410398483276, "step": 35 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 1427.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 345.375, "completions/mean_terminated_length": 371.4958190917969, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0576, "grad_norm": 0.008786818943917751, "learning_rate": 1e-06, "loss": -0.041, "num_tokens": 15312395.0, "reward": 0.5578696727752686, "reward_std": 0.35204410552978516, "rewards/accuracy_reward": 0.0234375, "rewards/brier_reward": 0.3149563670158386, "rewards/confidence_one_or_zero": 0.15234375, "rewards/format_reward": 0.77734375, "rewards/mean_confidence_reward": 0.6901431083679199, "step": 36 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 1487.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 319.609375, "completions/mean_terminated_length": 338.0991516113281, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0592, "grad_norm": 0.009227329865098, "learning_rate": 1e-06, "loss": -0.0323, "num_tokens": 15736471.0, "reward": 0.5536789894104004, "reward_std": 0.33590322732925415, "rewards/accuracy_reward": 0.00390625, "rewards/brier_reward": 0.33391907811164856, "rewards/confidence_one_or_zero": 0.10546875, "rewards/format_reward": 0.76953125, "rewards/mean_confidence_reward": 0.6370420455932617, "step": 37 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1482.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 318.7578125, "completions/mean_terminated_length": 340.00836181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0608, "grad_norm": 0.008682608604431152, "learning_rate": 1e-06, "loss": -0.0358, "num_tokens": 16163457.0, "reward": 0.5562059879302979, "reward_std": 0.3175504505634308, "rewards/accuracy_reward": 0.01171875, "rewards/brier_reward": 0.307722806930542, "rewards/confidence_one_or_zero": 0.171875, "rewards/format_reward": 0.79296875, "rewards/mean_confidence_reward": 0.6644600629806519, "step": 38 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 292.265625, "completions/mean_terminated_length": 310.4564514160156, "completions/min_length": 0.0, "completions/min_terminated_length": 18.0, "epoch": 0.0624, "grad_norm": 0.010437216609716415, "learning_rate": 1e-06, "loss": -0.0107, "num_tokens": 16574525.0, "reward": 0.5814734697341919, "reward_std": 0.33318889141082764, "rewards/accuracy_reward": 0.015625, "rewards/brier_reward": 0.35825785994529724, "rewards/confidence_one_or_zero": 0.12109375, "rewards/format_reward": 0.7890625, "rewards/mean_confidence_reward": 0.651659369468689, "step": 39 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 1528.0, "completions/max_terminated_length": 1528.0, "completions/mean_length": 282.6953125, "completions/mean_terminated_length": 290.6425476074219, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.064, "grad_norm": 0.009016141295433044, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 16978535.0, "reward": 0.6290339231491089, "reward_std": 0.29086869955062866, "rewards/accuracy_reward": 0.0078125, "rewards/brier_reward": 0.3947851061820984, "rewards/confidence_one_or_zero": 0.14453125, "rewards/format_reward": 0.85546875, "rewards/mean_confidence_reward": 0.6435203552246094, "step": 40 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.00390625, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.00390625, "calib/step_conf_rate": 0.00390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 301.30859375, "completions/mean_terminated_length": 308.5400085449219, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0656, "grad_norm": 0.008498923853039742, "learning_rate": 1e-06, "loss": -0.0508, "num_tokens": 17405094.0, "reward": 0.6749566197395325, "reward_std": 0.2964378893375397, "rewards/accuracy_reward": 0.01953125, "rewards/brier_reward": 0.451474130153656, "rewards/confidence_one_or_zero": 0.1875, "rewards/format_reward": 0.87890625, "rewards/mean_confidence_reward": 0.6043362617492676, "step": 41 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 1510.0, "completions/max_terminated_length": 1510.0, "completions/mean_length": 239.7265625, "completions/mean_terminated_length": 257.8571472167969, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.0672, "grad_norm": 0.009880481287837029, "learning_rate": 1e-06, "loss": -0.0041, "num_tokens": 17799256.0, "reward": 0.6685827970504761, "reward_std": 0.3361000120639801, "rewards/accuracy_reward": 0.046875, "rewards/brier_reward": 0.4426327645778656, "rewards/confidence_one_or_zero": 0.15234375, "rewards/format_reward": 0.84765625, "rewards/mean_confidence_reward": 0.6032766699790955, "step": 42 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 289.0703125, "completions/mean_terminated_length": 294.8287048339844, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0688, "grad_norm": 0.0102525120601058, "learning_rate": 1e-06, "loss": 0.009, "num_tokens": 18222346.0, "reward": 0.6848199367523193, "reward_std": 0.3227686882019043, "rewards/accuracy_reward": 0.01953125, "rewards/brier_reward": 0.49073219299316406, "rewards/confidence_one_or_zero": 0.16015625, "rewards/format_reward": 0.859375, "rewards/mean_confidence_reward": 0.5575064420700073, "step": 43 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 1517.0, "completions/max_terminated_length": 1517.0, "completions/mean_length": 265.3046875, "completions/mean_terminated_length": 276.08941650390625, "completions/min_length": 0.0, "completions/min_terminated_length": 30.0, "epoch": 0.0704, "grad_norm": 0.008614201098680496, "learning_rate": 1e-06, "loss": -0.0023, "num_tokens": 18641896.0, "reward": 0.7085627317428589, "reward_std": 0.2768844962120056, "rewards/accuracy_reward": 0.015625, "rewards/brier_reward": 0.5069676637649536, "rewards/confidence_one_or_zero": 0.17578125, "rewards/format_reward": 0.89453125, "rewards/mean_confidence_reward": 0.5584971308708191, "step": 44 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1428.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 221.6640625, "completions/mean_terminated_length": 226.07968139648438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.072, "grad_norm": 0.010863802395761013, "learning_rate": 1e-06, "loss": -0.0058, "num_tokens": 19036490.0, "reward": 0.7233068943023682, "reward_std": 0.29288819432258606, "rewards/accuracy_reward": 0.02734375, "rewards/brier_reward": 0.5286435484886169, "rewards/confidence_one_or_zero": 0.19140625, "rewards/format_reward": 0.890625, "rewards/mean_confidence_reward": 0.5390398502349854, "step": 45 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 1287.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 217.9453125, "completions/mean_terminated_length": 224.07228088378906, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.0736, "grad_norm": 0.0098550571128726, "learning_rate": 1e-06, "loss": -0.0099, "num_tokens": 19410388.0, "reward": 0.7251278162002563, "reward_std": 0.2924645245075226, "rewards/accuracy_reward": 0.0234375, "rewards/brier_reward": 0.5518166422843933, "rewards/confidence_one_or_zero": 0.2109375, "rewards/format_reward": 0.875, "rewards/mean_confidence_reward": 0.49754637479782104, "step": 46 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1231.0, "completions/max_terminated_length": 1231.0, "completions/mean_length": 214.70703125, "completions/mean_terminated_length": 217.2529754638672, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.0752, "grad_norm": 0.010724488645792007, "learning_rate": 1e-06, "loss": 0.0084, "num_tokens": 19791513.0, "reward": 0.7865052223205566, "reward_std": 0.25504371523857117, "rewards/accuracy_reward": 0.01171875, "rewards/brier_reward": 0.639415442943573, "rewards/confidence_one_or_zero": 0.22265625, "rewards/format_reward": 0.921875, "rewards/mean_confidence_reward": 0.43946534395217896, "step": 47 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1502.0, "completions/max_terminated_length": 1502.0, "completions/mean_length": 226.703125, "completions/mean_terminated_length": 228.48818969726562, "completions/min_length": 0.0, "completions/min_terminated_length": 36.0, "epoch": 0.0768, "grad_norm": 0.008438576012849808, "learning_rate": 1e-06, "loss": 0.0097, "num_tokens": 20175853.0, "reward": 0.8347652554512024, "reward_std": 0.22036978602409363, "rewards/accuracy_reward": 0.03125, "rewards/brier_reward": 0.681247889995575, "rewards/confidence_one_or_zero": 0.2578125, "rewards/format_reward": 0.95703125, "rewards/mean_confidence_reward": 0.4102952182292938, "step": 48 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1461.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 223.40625, "completions/mean_terminated_length": 225.1653594970703, "completions/min_length": 0.0, "completions/min_terminated_length": 14.0, "epoch": 0.0784, "grad_norm": 0.009187168441712856, "learning_rate": 1e-06, "loss": 0.0115, "num_tokens": 20584133.0, "reward": 0.8064360022544861, "reward_std": 0.25242382287979126, "rewards/accuracy_reward": 0.01953125, "rewards/brier_reward": 0.6714644432067871, "rewards/confidence_one_or_zero": 0.296875, "rewards/format_reward": 0.921875, "rewards/mean_confidence_reward": 0.39295095205307007, "step": 49 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 172.87109375, "completions/mean_terminated_length": 174.23228454589844, "completions/min_length": 0.0, "completions/min_terminated_length": 31.0, "epoch": 0.08, "grad_norm": 0.008574232459068298, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 20968516.0, "reward": 0.8943137526512146, "reward_std": 0.19495204091072083, "rewards/accuracy_reward": 0.046875, "rewards/brier_reward": 0.7730011940002441, "rewards/confidence_one_or_zero": 0.375, "rewards/format_reward": 0.96875, "rewards/mean_confidence_reward": 0.30389389395713806, "step": 50 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 168.61328125, "completions/mean_terminated_length": 171.28968811035156, "completions/min_length": 0.0, "completions/min_terminated_length": 34.0, "epoch": 0.0816, "grad_norm": 0.0096592977643013, "learning_rate": 1e-06, "loss": 0.0246, "num_tokens": 21351529.0, "reward": 0.8908511400222778, "reward_std": 0.2035958170890808, "rewards/accuracy_reward": 0.04296875, "rewards/brier_reward": 0.7856073379516602, "rewards/confidence_one_or_zero": 0.44921875, "rewards/format_reward": 0.953125, "rewards/mean_confidence_reward": 0.24615687131881714, "step": 51 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1532.0, "completions/max_terminated_length": 1532.0, "completions/mean_length": 151.8984375, "completions/mean_terminated_length": 153.094482421875, "completions/min_length": 0.0, "completions/min_terminated_length": 31.0, "epoch": 0.0832, "grad_norm": 0.008558180183172226, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 21737799.0, "reward": 0.9212564826011658, "reward_std": 0.15918582677841187, "rewards/accuracy_reward": 0.03125, "rewards/brier_reward": 0.8503240346908569, "rewards/confidence_one_or_zero": 0.578125, "rewards/format_reward": 0.9609375, "rewards/mean_confidence_reward": 0.18343669176101685, "step": 52 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1047.0, "completions/max_terminated_length": 1047.0, "completions/mean_length": 131.4921875, "completions/mean_terminated_length": 132.0078582763672, "completions/min_length": 0.0, "completions/min_terminated_length": 27.0, "epoch": 0.0848, "grad_norm": 0.008157819509506226, "learning_rate": 1e-06, "loss": 0.0157, "num_tokens": 22110085.0, "reward": 0.9251226186752319, "reward_std": 0.1277318149805069, "rewards/accuracy_reward": 0.0234375, "rewards/brier_reward": 0.8502437472343445, "rewards/confidence_one_or_zero": 0.58984375, "rewards/format_reward": 0.9765625, "rewards/mean_confidence_reward": 0.18238280713558197, "step": 53 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1391.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 129.55078125, "completions/mean_terminated_length": 130.57086181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 36.0, "epoch": 0.0864, "grad_norm": 0.006626965943723917, "learning_rate": 1e-06, "loss": -0.0099, "num_tokens": 22478362.0, "reward": 0.9469143152236938, "reward_std": 0.11642387509346008, "rewards/accuracy_reward": 0.01953125, "rewards/brier_reward": 0.897733211517334, "rewards/confidence_one_or_zero": 0.69921875, "rewards/format_reward": 0.9765625, "rewards/mean_confidence_reward": 0.11613567173480988, "step": 54 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 795.0, "completions/max_terminated_length": 795.0, "completions/mean_length": 120.4140625, "completions/mean_terminated_length": 121.3622055053711, "completions/min_length": 0.0, "completions/min_terminated_length": 23.0, "epoch": 0.088, "grad_norm": 0.005671264138072729, "learning_rate": 1e-06, "loss": -0.0109, "num_tokens": 22861796.0, "reward": 0.96063232421875, "reward_std": 0.09957661479711533, "rewards/accuracy_reward": 0.03125, "rewards/brier_reward": 0.9134503602981567, "rewards/confidence_one_or_zero": 0.8203125, "rewards/format_reward": 0.9765625, "rewards/mean_confidence_reward": 0.07218749821186066, "step": 55 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.00390625, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.00390625, "calib/nonempty_step_conf_rate": 0.00390625, "calib/step_conf_rate": 0.00390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 712.0, "completions/max_terminated_length": 712.0, "completions/mean_length": 107.37109375, "completions/mean_terminated_length": 107.37109375, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "epoch": 0.0896, "grad_norm": 0.005763660185039043, "learning_rate": 1e-06, "loss": -0.0026, "num_tokens": 23236467.0, "reward": 0.9770891666412354, "reward_std": 0.06000019609928131, "rewards/accuracy_reward": 0.02734375, "rewards/brier_reward": 0.9385515451431274, "rewards/confidence_one_or_zero": 0.8984375, "rewards/format_reward": 0.98828125, "rewards/mean_confidence_reward": 0.04484374821186066, "step": 56 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1508.0, "completions/max_terminated_length": 1508.0, "completions/mean_length": 107.41796875, "completions/mean_terminated_length": 107.83922576904297, "completions/min_length": 0.0, "completions/min_terminated_length": 26.0, "epoch": 0.0912, "grad_norm": 0.005554537288844585, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 23593294.0, "reward": 0.974513053894043, "reward_std": 0.07732105255126953, "rewards/accuracy_reward": 0.03515625, "rewards/brier_reward": 0.9333992004394531, "rewards/confidence_one_or_zero": 0.91015625, "rewards/format_reward": 0.98046875, "rewards/mean_confidence_reward": 0.03757812827825546, "step": 57 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1242.0, "completions/max_terminated_length": 1242.0, "completions/mean_length": 121.54296875, "completions/mean_terminated_length": 121.54296875, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.0928, "grad_norm": 0.005623976234346628, "learning_rate": 1e-06, "loss": 0.0206, "num_tokens": 23965777.0, "reward": 0.9896925091743469, "reward_std": 0.02897283062338829, "rewards/accuracy_reward": 0.08984375, "rewards/brier_reward": 0.8973519802093506, "rewards/confidence_one_or_zero": 0.9453125, "rewards/format_reward": 0.9921875, "rewards/mean_confidence_reward": 0.013710937462747097, "step": 58 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 725.0, "completions/max_terminated_length": 725.0, "completions/mean_length": 101.3359375, "completions/mean_terminated_length": 101.3359375, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.0944, "grad_norm": 0.005760283675044775, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 24336647.0, "reward": 0.980745255947113, "reward_std": 0.05597168207168579, "rewards/accuracy_reward": 0.02734375, "rewards/brier_reward": 0.949769914150238, "rewards/confidence_one_or_zero": 0.96875, "rewards/format_reward": 0.984375, "rewards/mean_confidence_reward": 0.016249999403953552, "step": 59 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 654.0, "completions/max_terminated_length": 654.0, "completions/mean_length": 90.69140625, "completions/mean_terminated_length": 90.69140625, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.096, "grad_norm": 0.007192954421043396, "learning_rate": 1e-06, "loss": 0.0036, "num_tokens": 24693912.0, "reward": 0.9823489785194397, "reward_std": 0.050364814698696136, "rewards/accuracy_reward": 0.078125, "rewards/brier_reward": 0.9021960496902466, "rewards/confidence_one_or_zero": 0.97265625, "rewards/format_reward": 0.984375, "rewards/mean_confidence_reward": 0.009453125298023224, "step": 60 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 791.0, "completions/max_terminated_length": 791.0, "completions/mean_length": 97.859375, "completions/mean_terminated_length": 97.859375, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.0976, "grad_norm": 0.0014389591524377465, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 25065868.0, "reward": 0.9979599714279175, "reward_std": 0.005772889591753483, "rewards/accuracy_reward": 0.078125, "rewards/brier_reward": 0.9177929759025574, "rewards/confidence_one_or_zero": 0.984375, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.005703125149011612, "step": 61 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1016.0, "completions/max_terminated_length": 1016.0, "completions/mean_length": 92.25, "completions/mean_terminated_length": 92.25, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "epoch": 0.0992, "grad_norm": 0.00016888575919438154, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 25424396.0, "reward": 0.9998132586479187, "reward_std": 0.0005308896070346236, "rewards/accuracy_reward": 0.0625, "rewards/brier_reward": 0.9371246099472046, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0012109375093132257, "step": 62 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 801.0, "completions/max_terminated_length": 801.0, "completions/mean_length": 87.5859375, "completions/mean_terminated_length": 87.5859375, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 0.1008, "grad_norm": 0.002231016056612134, "learning_rate": 1e-06, "loss": -0.0017, "num_tokens": 25789338.0, "reward": 0.995339035987854, "reward_std": 0.013185895048081875, "rewards/accuracy_reward": 0.0390625, "rewards/brier_reward": 0.9555199146270752, "rewards/confidence_one_or_zero": 0.9921875, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0033984375186264515, "step": 63 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 490.0, "completions/max_terminated_length": 490.0, "completions/mean_length": 76.48046875, "completions/mean_terminated_length": 76.48046875, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.1024, "grad_norm": 0.004412069451063871, "learning_rate": 1e-06, "loss": -0.0008, "num_tokens": 26162277.0, "reward": 0.9921835660934448, "reward_std": 0.02211090549826622, "rewards/accuracy_reward": 0.05078125, "rewards/brier_reward": 0.9413964748382568, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 0.9921875, "rewards/mean_confidence_reward": 0.00019531250291038305, "step": 64 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 616.0, "completions/max_terminated_length": 616.0, "completions/mean_length": 79.14453125, "completions/mean_terminated_length": 79.14453125, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "epoch": 0.104, "grad_norm": 1.28965211843024e-06, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 26526962.0, "reward": 0.9999991655349731, "reward_std": 4.981606707588071e-06, "rewards/accuracy_reward": 0.0390625, "rewards/brier_reward": 0.9609339833259583, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.00011718749738065526, "step": 65 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 742.0, "completions/max_terminated_length": 742.0, "completions/mean_length": 89.03515625, "completions/mean_terminated_length": 89.38431549072266, "completions/min_length": 0.0, "completions/min_terminated_length": 35.0, "epoch": 0.1056, "grad_norm": 0.0006708145374432206, "learning_rate": 1e-06, "loss": -0.002, "num_tokens": 26899483.0, "reward": 0.9960939288139343, "reward_std": 0.011050763539969921, "rewards/accuracy_reward": 0.04296875, "rewards/brier_reward": 0.9531234502792358, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 7.812499825377017e-05, "step": 66 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 912.0, "completions/max_terminated_length": 912.0, "completions/mean_length": 90.73046875, "completions/mean_terminated_length": 90.73046875, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 0.1072, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 27273822.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.02734375, "rewards/brier_reward": 0.97265625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 67 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 531.0, "completions/max_terminated_length": 531.0, "completions/mean_length": 75.40234375, "completions/mean_terminated_length": 75.40234375, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 0.1088, "grad_norm": 0.0036680991761386395, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 27633941.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.046875, "rewards/brier_reward": 0.94921875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 68 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 933.0, "completions/max_terminated_length": 933.0, "completions/mean_length": 88.01953125, "completions/mean_terminated_length": 88.01953125, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 0.1104, "grad_norm": 0.006830692756921053, "learning_rate": 1e-06, "loss": 0.0181, "num_tokens": 27976130.0, "reward": 0.9921884536743164, "reward_std": 0.022097086533904076, "rewards/accuracy_reward": 0.09765625, "rewards/brier_reward": 0.89453125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.9921875, "rewards/mean_confidence_reward": 0.0, "step": 69 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 702.0, "completions/max_terminated_length": 702.0, "completions/mean_length": 79.59375, "completions/mean_terminated_length": 79.59375, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.112, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 28339722.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.08984375, "rewards/brier_reward": 0.91015625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 70 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 820.0, "completions/max_terminated_length": 820.0, "completions/mean_length": 72.95703125, "completions/mean_terminated_length": 72.95703125, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.1136, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 28692735.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.078125, "rewards/brier_reward": 0.921875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 71 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 580.0, "completions/max_terminated_length": 580.0, "completions/mean_length": 77.0, "completions/mean_terminated_length": 77.0, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.1152, "grad_norm": 0.004060762468725443, "learning_rate": 1e-06, "loss": 0.0092, "num_tokens": 29041839.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.015625, "rewards/brier_reward": 0.98046875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 72 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 769.0, "completions/max_terminated_length": 769.0, "completions/mean_length": 82.875, "completions/mean_terminated_length": 82.875, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.1168, "grad_norm": 0.002366835018619895, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 29406799.0, "reward": 0.9960944652557373, "reward_std": 0.01104910671710968, "rewards/accuracy_reward": 0.03515625, "rewards/brier_reward": 0.9609371423721313, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 3.9062499126885086e-05, "step": 73 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 545.0, "completions/max_terminated_length": 545.0, "completions/mean_length": 73.09765625, "completions/mean_terminated_length": 73.09765625, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "epoch": 0.1184, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 29751056.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.0546875, "rewards/brier_reward": 0.9453125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 74 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 463.0, "completions/max_terminated_length": 463.0, "completions/mean_length": 80.96484375, "completions/mean_terminated_length": 80.96484375, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.12, "grad_norm": 0.00017594861856196076, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 30121591.0, "reward": 0.9998788833618164, "reward_std": 0.0003452748933341354, "rewards/accuracy_reward": 0.02734375, "rewards/brier_reward": 0.972412109375, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0009765625, "step": 75 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 510.0, "completions/max_terminated_length": 510.0, "completions/mean_length": 70.1875, "completions/mean_terminated_length": 70.1875, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.1216, "grad_norm": 0.004419372417032719, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 30475583.0, "reward": 0.9960947036743164, "reward_std": 0.011048554442822933, "rewards/accuracy_reward": 0.08203125, "rewards/brier_reward": 0.9140625, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 76 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 454.0, "completions/max_terminated_length": 454.0, "completions/mean_length": 70.890625, "completions/mean_terminated_length": 70.890625, "completions/min_length": 9.0, "completions/min_terminated_length": 9.0, "epoch": 0.1232, "grad_norm": 0.0018329236190766096, "learning_rate": 1e-06, "loss": -0.0025, "num_tokens": 30834539.0, "reward": 0.9960947036743164, "reward_std": 0.011048554442822933, "rewards/accuracy_reward": 0.1015625, "rewards/brier_reward": 0.89453125, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 77 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 559.0, "completions/max_terminated_length": 559.0, "completions/mean_length": 72.89453125, "completions/mean_terminated_length": 72.89453125, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 0.1248, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 31189880.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.05859375, "rewards/brier_reward": 0.94140625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 78 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 476.0, "completions/max_terminated_length": 476.0, "completions/mean_length": 69.28515625, "completions/mean_terminated_length": 69.28515625, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "epoch": 0.1264, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 31547609.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.0390625, "rewards/brier_reward": 0.9609375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 79 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 546.0, "completions/max_terminated_length": 546.0, "completions/mean_length": 77.7265625, "completions/mean_terminated_length": 77.7265625, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "epoch": 0.128, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 31909627.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.04296875, "rewards/brier_reward": 0.95703125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 80 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 347.0, "completions/max_terminated_length": 347.0, "completions/mean_length": 69.625, "completions/mean_terminated_length": 69.625, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 0.1296, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 32267299.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.05078125, "rewards/brier_reward": 0.94921875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 81 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 565.0, "completions/max_terminated_length": 565.0, "completions/mean_length": 66.53125, "completions/mean_terminated_length": 66.53125, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.1312, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 32627651.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.05078125, "rewards/brier_reward": 0.94921875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 82 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 654.0, "completions/max_terminated_length": 654.0, "completions/mean_length": 71.55859375, "completions/mean_terminated_length": 71.55859375, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.1328, "grad_norm": 0.004377719480544329, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 32983826.0, "reward": 0.9921884536743164, "reward_std": 0.022097086533904076, "rewards/accuracy_reward": 0.046875, "rewards/brier_reward": 0.9453125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.9921875, "rewards/mean_confidence_reward": 0.0, "step": 83 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1187.0, "completions/max_terminated_length": 1187.0, "completions/mean_length": 73.47265625, "completions/mean_terminated_length": 73.47265625, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "epoch": 0.1344, "grad_norm": 0.004862726666033268, "learning_rate": 1e-06, "loss": 0.018, "num_tokens": 33352691.0, "reward": 0.9921884536743164, "reward_std": 0.022097086533904076, "rewards/accuracy_reward": 0.0625, "rewards/brier_reward": 0.9296875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.9921875, "rewards/mean_confidence_reward": 0.0, "step": 84 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 338.0, "completions/max_terminated_length": 338.0, "completions/mean_length": 69.0, "completions/mean_terminated_length": 69.0, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.136, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 33705051.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.0859375, "rewards/brier_reward": 0.9140625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 85 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 636.0, "completions/max_terminated_length": 636.0, "completions/mean_length": 69.3828125, "completions/mean_terminated_length": 69.3828125, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 0.1376, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 34053397.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.04296875, "rewards/brier_reward": 0.95703125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 86 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 596.0, "completions/max_terminated_length": 596.0, "completions/mean_length": 78.84375, "completions/mean_terminated_length": 78.84375, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.1392, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 34412237.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.08984375, "rewards/brier_reward": 0.91015625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 87 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 415.0, "completions/max_terminated_length": 415.0, "completions/mean_length": 65.546875, "completions/mean_terminated_length": 65.546875, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 0.1408, "grad_norm": 0.007279723882675171, "learning_rate": 1e-06, "loss": -0.0011, "num_tokens": 34761065.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.0625, "rewards/brier_reward": 0.93359375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 88 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 752.0, "completions/max_terminated_length": 752.0, "completions/mean_length": 69.90625, "completions/mean_terminated_length": 69.90625, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.1424, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 35123865.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.04296875, "rewards/brier_reward": 0.95703125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 89 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 886.0, "completions/max_terminated_length": 886.0, "completions/mean_length": 71.12890625, "completions/mean_terminated_length": 71.12890625, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.144, "grad_norm": 0.0048187910579144955, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 35476994.0, "reward": 0.9921884536743164, "reward_std": 0.022097086533904076, "rewards/accuracy_reward": 0.05859375, "rewards/brier_reward": 0.93359375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.9921875, "rewards/mean_confidence_reward": 0.0, "step": 90 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 375.0, "completions/max_terminated_length": 375.0, "completions/mean_length": 65.71484375, "completions/mean_terminated_length": 65.71484375, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "epoch": 0.1456, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 35820049.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.09375, "rewards/brier_reward": 0.90625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 91 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 528.0, "completions/max_terminated_length": 528.0, "completions/mean_length": 71.38671875, "completions/mean_terminated_length": 71.38671875, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 0.1472, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 36176772.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.04296875, "rewards/brier_reward": 0.95703125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 92 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 395.0, "completions/max_terminated_length": 395.0, "completions/mean_length": 69.9609375, "completions/mean_terminated_length": 69.9609375, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.1488, "grad_norm": 0.005049441009759903, "learning_rate": 1e-06, "loss": 0.0016, "num_tokens": 36533554.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.0625, "rewards/brier_reward": 0.93359375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 93 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 674.0, "completions/max_terminated_length": 674.0, "completions/mean_length": 66.8515625, "completions/mean_terminated_length": 66.8515625, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 0.1504, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 36884772.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.0859375, "rewards/brier_reward": 0.9140625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 94 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 693.0, "completions/max_terminated_length": 693.0, "completions/mean_length": 70.703125, "completions/mean_terminated_length": 70.703125, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.152, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 37250480.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.03515625, "rewards/brier_reward": 0.96484375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 95 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 612.0, "completions/max_terminated_length": 612.0, "completions/mean_length": 78.703125, "completions/mean_terminated_length": 78.703125, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.1536, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 37624020.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.05078125, "rewards/brier_reward": 0.94921875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 96 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 428.0, "completions/max_terminated_length": 428.0, "completions/mean_length": 75.44140625, "completions/mean_terminated_length": 75.44140625, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.1552, "grad_norm": 0.006449607666581869, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 37976149.0, "reward": 0.9921884536743164, "reward_std": 0.02209709770977497, "rewards/accuracy_reward": 0.0625, "rewards/brier_reward": 0.9296875, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 0.9921875, "rewards/mean_confidence_reward": 0.0, "step": 97 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 650.0, "completions/max_terminated_length": 650.0, "completions/mean_length": 80.921875, "completions/mean_terminated_length": 80.921875, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 0.1568, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 38323969.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.04296875, "rewards/brier_reward": 0.95703125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 98 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 559.0, "completions/max_terminated_length": 559.0, "completions/mean_length": 88.71875, "completions/mean_terminated_length": 88.71875, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "epoch": 0.1584, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 38693705.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.0390625, "rewards/brier_reward": 0.9609375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 99 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 723.0, "completions/max_terminated_length": 723.0, "completions/mean_length": 106.94140625, "completions/mean_terminated_length": 106.94140625, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 39079994.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.05859375, "rewards/brier_reward": 0.94140625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 100 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 583.0, "completions/max_terminated_length": 583.0, "completions/mean_length": 118.9921875, "completions/mean_terminated_length": 118.9921875, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.1616, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 39460704.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.03515625, "rewards/brier_reward": 0.96484375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 101 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 685.0, "completions/max_terminated_length": 685.0, "completions/mean_length": 120.30078125, "completions/mean_terminated_length": 120.30078125, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.1632, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 39830373.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.078125, "rewards/brier_reward": 0.921875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 102 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 646.0, "completions/max_terminated_length": 646.0, "completions/mean_length": 141.27734375, "completions/mean_terminated_length": 141.27734375, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.1648, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 40221460.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.046875, "rewards/brier_reward": 0.953125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 103 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 710.0, "completions/max_terminated_length": 710.0, "completions/mean_length": 156.98828125, "completions/mean_terminated_length": 156.98828125, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 0.1664, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 40598393.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.03125, "rewards/brier_reward": 0.96875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 104 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1091.0, "completions/max_terminated_length": 1091.0, "completions/mean_length": 171.81640625, "completions/mean_terminated_length": 171.81640625, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "epoch": 0.168, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 40992818.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.02734375, "rewards/brier_reward": 0.97265625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 105 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 818.0, "completions/max_terminated_length": 818.0, "completions/mean_length": 199.01953125, "completions/mean_terminated_length": 199.01953125, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.1696, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 41400047.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.04296875, "rewards/brier_reward": 0.95703125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 106 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 819.0, "completions/max_terminated_length": 819.0, "completions/mean_length": 205.16796875, "completions/mean_terminated_length": 205.16796875, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.1712, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 41803914.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.0546875, "rewards/brier_reward": 0.9453125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 107 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 827.0, "completions/max_terminated_length": 827.0, "completions/mean_length": 200.7578125, "completions/mean_terminated_length": 200.7578125, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 0.1728, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 42197876.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.05078125, "rewards/brier_reward": 0.94921875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 108 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 910.0, "completions/max_terminated_length": 910.0, "completions/mean_length": 207.0859375, "completions/mean_terminated_length": 208.71653747558594, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.1744, "grad_norm": 0.0013323465827852488, "learning_rate": 1e-06, "loss": -0.0121, "num_tokens": 42583978.0, "reward": 0.9882822036743164, "reward_std": 0.033145640045404434, "rewards/accuracy_reward": 0.04296875, "rewards/brier_reward": 0.9453125, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 0.98828125, "rewards/mean_confidence_reward": 0.0, "step": 109 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 887.0, "completions/max_terminated_length": 887.0, "completions/mean_length": 229.625, "completions/mean_terminated_length": 229.625, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.176, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 42986818.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.05078125, "rewards/brier_reward": 0.94921875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 110 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1155.0, "completions/max_terminated_length": 1155.0, "completions/mean_length": 238.19140625, "completions/mean_terminated_length": 238.19140625, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "epoch": 0.1776, "grad_norm": 0.001869988744147122, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 43389203.0, "reward": 0.9960947036743164, "reward_std": 0.011048554442822933, "rewards/accuracy_reward": 0.046875, "rewards/brier_reward": 0.94921875, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 111 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 700.0, "completions/max_terminated_length": 700.0, "completions/mean_length": 220.421875, "completions/mean_terminated_length": 220.421875, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 0.1792, "grad_norm": 0.0012250031577423215, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 43793759.0, "reward": 0.9960165619850159, "reward_std": 0.01126952189952135, "rewards/accuracy_reward": 0.01953125, "rewards/brier_reward": 0.9764062166213989, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0007812500116415322, "step": 112 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 750.0, "completions/max_terminated_length": 750.0, "completions/mean_length": 242.1171875, "completions/mean_terminated_length": 242.1171875, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 0.1808, "grad_norm": 0.0009556888253428042, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 44178789.0, "reward": 0.9980478286743164, "reward_std": 0.005524259991943836, "rewards/accuracy_reward": 0.02734375, "rewards/brier_reward": 0.96875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.00390625, "step": 113 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 616.0, "completions/max_terminated_length": 616.0, "completions/mean_length": 216.3515625, "completions/mean_terminated_length": 216.3515625, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.1824, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 44567215.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.0390625, "rewards/brier_reward": 0.9609375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 114 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 833.0, "completions/max_terminated_length": 833.0, "completions/mean_length": 206.95703125, "completions/mean_terminated_length": 207.76864624023438, "completions/min_length": 0.0, "completions/min_terminated_length": 34.0, "epoch": 0.184, "grad_norm": 0.0012391918571665883, "learning_rate": 1e-06, "loss": -0.0049, "num_tokens": 44962508.0, "reward": 0.9921884536743164, "reward_std": 0.022097062319517136, "rewards/accuracy_reward": 0.0390625, "rewards/brier_reward": 0.94921875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0078125, "step": 115 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 760.0, "completions/max_terminated_length": 760.0, "completions/mean_length": 225.88671875, "completions/mean_terminated_length": 226.77256774902344, "completions/min_length": 0.0, "completions/min_terminated_length": 34.0, "epoch": 0.1856, "grad_norm": 0.0006183154764585197, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 45362343.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.046875, "rewards/brier_reward": 0.94921875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 116 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1122.0, "completions/max_terminated_length": 1122.0, "completions/mean_length": 224.234375, "completions/mean_terminated_length": 224.234375, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.1872, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 45749667.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.0546875, "rewards/brier_reward": 0.9453125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 117 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 853.0, "completions/max_terminated_length": 853.0, "completions/mean_length": 214.21875, "completions/mean_terminated_length": 214.21875, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.1888, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 46136491.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.0390625, "rewards/brier_reward": 0.9609375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 118 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 808.0, "completions/max_terminated_length": 808.0, "completions/mean_length": 209.6796875, "completions/mean_terminated_length": 209.6796875, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 0.1904, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 46527353.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.0234375, "rewards/brier_reward": 0.9765625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 119 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1133.0, "completions/max_terminated_length": 1133.0, "completions/mean_length": 202.578125, "completions/mean_terminated_length": 204.17323303222656, "completions/min_length": 0.0, "completions/min_terminated_length": 34.0, "epoch": 0.192, "grad_norm": 0.001083175651729107, "learning_rate": 1e-06, "loss": -0.0054, "num_tokens": 46903165.0, "reward": 0.9921884536743164, "reward_std": 0.022097086533904076, "rewards/accuracy_reward": 0.05078125, "rewards/brier_reward": 0.94140625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.9921875, "rewards/mean_confidence_reward": 0.0, "step": 120 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 634.0, "completions/max_terminated_length": 634.0, "completions/mean_length": 177.58203125, "completions/mean_terminated_length": 177.58203125, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.1936, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 47267402.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.0859375, "rewards/brier_reward": 0.9140625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 121 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 844.0, "completions/max_terminated_length": 844.0, "completions/mean_length": 209.1328125, "completions/mean_terminated_length": 209.1328125, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.1952, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 47663876.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.04296875, "rewards/brier_reward": 0.95703125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 122 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 709.0, "completions/max_terminated_length": 709.0, "completions/mean_length": 227.12109375, "completions/mean_terminated_length": 228.01177978515625, "completions/min_length": 0.0, "completions/min_terminated_length": 36.0, "epoch": 0.1968, "grad_norm": 0.0005709947436116636, "learning_rate": 1e-06, "loss": -0.0032, "num_tokens": 48065971.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.08984375, "rewards/brier_reward": 0.90625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 123 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 698.0, "completions/max_terminated_length": 698.0, "completions/mean_length": 197.13671875, "completions/mean_terminated_length": 197.13671875, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "epoch": 0.1984, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 48455606.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.0625, "rewards/brier_reward": 0.9375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 124 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 601.0, "completions/max_terminated_length": 601.0, "completions/mean_length": 173.9921875, "completions/mean_terminated_length": 173.9921875, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.2, "grad_norm": 0.0014897036598995328, "learning_rate": 1e-06, "loss": 0.0033, "num_tokens": 48846340.0, "reward": 0.9980478286743164, "reward_std": 0.005524259991943836, "rewards/accuracy_reward": 0.01171875, "rewards/brier_reward": 0.984375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.00390625, "step": 125 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 923.0, "completions/max_terminated_length": 923.0, "completions/mean_length": 186.07421875, "completions/mean_terminated_length": 186.07421875, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "epoch": 0.2016, "grad_norm": 0.0018973132828250527, "learning_rate": 1e-06, "loss": -0.002, "num_tokens": 49242423.0, "reward": 0.9941415786743164, "reward_std": 0.016572803258895874, "rewards/accuracy_reward": 0.0234375, "rewards/brier_reward": 0.96875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.00390625, "step": 126 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1248.0, "completions/max_terminated_length": 1248.0, "completions/mean_length": 184.8125, "completions/mean_terminated_length": 185.53726196289062, "completions/min_length": 0.0, "completions/min_terminated_length": 34.0, "epoch": 0.2032, "grad_norm": 0.0006578292814083397, "learning_rate": 1e-06, "loss": -0.0035, "num_tokens": 49638551.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.03125, "rewards/brier_reward": 0.96484375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 127 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 778.0, "completions/max_terminated_length": 778.0, "completions/mean_length": 153.65625, "completions/mean_terminated_length": 153.65625, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 0.2048, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 50025647.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.05859375, "rewards/brier_reward": 0.94140625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 128 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 667.0, "completions/max_terminated_length": 667.0, "completions/mean_length": 173.5390625, "completions/mean_terminated_length": 173.5390625, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.2064, "grad_norm": 0.0015325110871344805, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 50417145.0, "reward": 0.9980478286743164, "reward_std": 0.005524259991943836, "rewards/accuracy_reward": 0.07421875, "rewards/brier_reward": 0.921875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.00390625, "step": 129 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 656.0, "completions/max_terminated_length": 656.0, "completions/mean_length": 159.5390625, "completions/mean_terminated_length": 159.5390625, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.208, "grad_norm": 0.0027299756184220314, "learning_rate": 1e-06, "loss": 0.0036, "num_tokens": 50782203.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.046875, "rewards/brier_reward": 0.94921875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 130 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 897.0, "completions/max_terminated_length": 897.0, "completions/mean_length": 174.40234375, "completions/mean_terminated_length": 174.40234375, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.2096, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 51173466.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.07421875, "rewards/brier_reward": 0.92578125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 131 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 807.0, "completions/max_terminated_length": 807.0, "completions/mean_length": 203.46875, "completions/mean_terminated_length": 203.46875, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 0.2112, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 51570122.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.06640625, "rewards/brier_reward": 0.93359375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 132 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 782.0, "completions/max_terminated_length": 782.0, "completions/mean_length": 201.28515625, "completions/mean_terminated_length": 201.28515625, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 0.2128, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 51958379.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.02734375, "rewards/brier_reward": 0.97265625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 133 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 648.0, "completions/max_terminated_length": 648.0, "completions/mean_length": 191.67578125, "completions/mean_terminated_length": 191.67578125, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.2144, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 52341992.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.03125, "rewards/brier_reward": 0.96875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 134 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 616.0, "completions/max_terminated_length": 616.0, "completions/mean_length": 212.33203125, "completions/mean_terminated_length": 213.1647186279297, "completions/min_length": 0.0, "completions/min_terminated_length": 33.0, "epoch": 0.216, "grad_norm": 0.0007441657362505794, "learning_rate": 1e-06, "loss": -0.0037, "num_tokens": 52729437.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.03125, "rewards/brier_reward": 0.96484375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 135 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1209.0, "completions/max_terminated_length": 1209.0, "completions/mean_length": 202.16015625, "completions/mean_terminated_length": 202.16015625, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.2176, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 53138966.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.03125, "rewards/brier_reward": 0.96875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 136 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 669.0, "completions/max_terminated_length": 669.0, "completions/mean_length": 204.73046875, "completions/mean_terminated_length": 204.73046875, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.2192, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 53537097.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.09375, "rewards/brier_reward": 0.90625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 137 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 882.0, "completions/max_terminated_length": 882.0, "completions/mean_length": 211.83984375, "completions/mean_terminated_length": 211.83984375, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.2208, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 53940168.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.03515625, "rewards/brier_reward": 0.96484375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 138 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 709.0, "completions/max_terminated_length": 709.0, "completions/mean_length": 217.58203125, "completions/mean_terminated_length": 218.435302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 39.0, "epoch": 0.2224, "grad_norm": 0.0006282851682044566, "learning_rate": 1e-06, "loss": -0.0034, "num_tokens": 54337637.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.05859375, "rewards/brier_reward": 0.9375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 139 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 817.0, "completions/max_terminated_length": 817.0, "completions/mean_length": 235.42578125, "completions/mean_terminated_length": 235.42578125, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.224, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 54745834.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.0546875, "rewards/brier_reward": 0.9453125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 140 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1011.0, "completions/max_terminated_length": 1011.0, "completions/mean_length": 214.875, "completions/mean_terminated_length": 214.875, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.2256, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 55149106.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.046875, "rewards/brier_reward": 0.953125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 141 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 894.0, "completions/max_terminated_length": 894.0, "completions/mean_length": 237.59765625, "completions/mean_terminated_length": 237.59765625, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.2272, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 55568331.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.046875, "rewards/brier_reward": 0.953125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 142 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 786.0, "completions/max_terminated_length": 786.0, "completions/mean_length": 210.86328125, "completions/mean_terminated_length": 210.86328125, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.2288, "grad_norm": 0.0012927583884447813, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 55980312.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.08984375, "rewards/brier_reward": 0.90625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 143 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1181.0, "completions/max_terminated_length": 1181.0, "completions/mean_length": 213.9765625, "completions/mean_terminated_length": 213.9765625, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "epoch": 0.2304, "grad_norm": 0.001867619575932622, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 56373322.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.0390625, "rewards/brier_reward": 0.95703125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 144 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 933.0, "completions/max_terminated_length": 933.0, "completions/mean_length": 239.46484375, "completions/mean_terminated_length": 240.4039306640625, "completions/min_length": 0.0, "completions/min_terminated_length": 35.0, "epoch": 0.232, "grad_norm": 0.0005441193352453411, "learning_rate": 1e-06, "loss": -0.0035, "num_tokens": 56791865.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.015625, "rewards/brier_reward": 0.98046875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 145 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 890.0, "completions/max_terminated_length": 890.0, "completions/mean_length": 220.12890625, "completions/mean_terminated_length": 220.12890625, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.2336, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 57197338.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.046875, "rewards/brier_reward": 0.953125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 146 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 683.0, "completions/max_terminated_length": 683.0, "completions/mean_length": 207.7890625, "completions/mean_terminated_length": 208.6039276123047, "completions/min_length": 0.0, "completions/min_terminated_length": 33.0, "epoch": 0.2352, "grad_norm": 0.0005851782043464482, "learning_rate": 1e-06, "loss": -0.0032, "num_tokens": 57585476.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.0703125, "rewards/brier_reward": 0.92578125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 147 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 715.0, "completions/max_terminated_length": 715.0, "completions/mean_length": 206.08203125, "completions/mean_terminated_length": 206.08203125, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 0.2368, "grad_norm": 5.514766598935239e-05, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 57981521.0, "reward": 0.9999228119850159, "reward_std": 0.00022097892360761762, "rewards/accuracy_reward": 0.0390625, "rewards/brier_reward": 0.9607812166213989, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0007812500116415322, "step": 148 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 628.0, "completions/max_terminated_length": 628.0, "completions/mean_length": 215.3046875, "completions/mean_terminated_length": 215.3046875, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 0.2384, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 58385359.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.01171875, "rewards/brier_reward": 0.98828125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 149 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 800.0, "completions/max_terminated_length": 800.0, "completions/mean_length": 211.234375, "completions/mean_terminated_length": 212.06275939941406, "completions/min_length": 0.0, "completions/min_terminated_length": 33.0, "epoch": 0.24, "grad_norm": 0.0006218485650606453, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 58765427.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.05078125, "rewards/brier_reward": 0.9453125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 150 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1196.0, "completions/max_terminated_length": 1196.0, "completions/mean_length": 230.0390625, "completions/mean_terminated_length": 230.0390625, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.2416, "grad_norm": 0.0008855098858475685, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 59164717.0, "reward": 0.9952874779701233, "reward_std": 0.01333173643797636, "rewards/accuracy_reward": 0.03515625, "rewards/brier_reward": 0.9593230485916138, "rewards/confidence_one_or_zero": 0.9921875, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0030859375838190317, "step": 151 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 918.0, "completions/max_terminated_length": 918.0, "completions/mean_length": 229.26171875, "completions/mean_terminated_length": 229.26171875, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.2432, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 59550792.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.0546875, "rewards/brier_reward": 0.9453125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 152 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 882.0, "completions/max_terminated_length": 882.0, "completions/mean_length": 208.0078125, "completions/mean_terminated_length": 208.0078125, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 0.2448, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 59946482.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.05859375, "rewards/brier_reward": 0.94140625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 153 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1041.0, "completions/max_terminated_length": 1041.0, "completions/mean_length": 230.6875, "completions/mean_terminated_length": 230.6875, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 0.2464, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 60348042.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.046875, "rewards/brier_reward": 0.953125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 154 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 711.0, "completions/max_terminated_length": 711.0, "completions/mean_length": 223.625, "completions/mean_terminated_length": 223.625, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.248, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 60753058.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.015625, "rewards/brier_reward": 0.984375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 155 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 221.984375, "completions/mean_terminated_length": 222.85491943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 32.0, "epoch": 0.2496, "grad_norm": 0.0004327092319726944, "learning_rate": 1e-06, "loss": -0.0023, "num_tokens": 61151358.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.03515625, "rewards/brier_reward": 0.9609375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 156 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 822.0, "completions/max_terminated_length": 822.0, "completions/mean_length": 215.8046875, "completions/mean_terminated_length": 216.65098571777344, "completions/min_length": 0.0, "completions/min_terminated_length": 36.0, "epoch": 0.2512, "grad_norm": 0.0005879381787963212, "learning_rate": 1e-06, "loss": -0.0029, "num_tokens": 61542484.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.09375, "rewards/brier_reward": 0.90234375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 157 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1006.0, "completions/max_terminated_length": 1006.0, "completions/mean_length": 242.6328125, "completions/mean_terminated_length": 242.6328125, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.2528, "grad_norm": 0.0002549047057982534, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 61962934.0, "reward": 0.9990164041519165, "reward_std": 0.002784787444397807, "rewards/accuracy_reward": 0.02734375, "rewards/brier_reward": 0.9706870913505554, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0027734374161809683, "step": 158 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 768.0, "completions/max_terminated_length": 768.0, "completions/mean_length": 209.625, "completions/mean_terminated_length": 209.625, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.2544, "grad_norm": 0.0014845837140455842, "learning_rate": 1e-06, "loss": -0.0019, "num_tokens": 62359782.0, "reward": 0.995782196521759, "reward_std": 0.01193243358284235, "rewards/accuracy_reward": 0.0078125, "rewards/brier_reward": 0.9876562356948853, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0015625000232830644, "step": 159 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 670.0, "completions/max_terminated_length": 670.0, "completions/mean_length": 211.0703125, "completions/mean_terminated_length": 211.89805603027344, "completions/min_length": 0.0, "completions/min_terminated_length": 36.0, "epoch": 0.256, "grad_norm": 0.002763985889032483, "learning_rate": 1e-06, "loss": -0.0024, "num_tokens": 62747432.0, "reward": 0.9921884536743164, "reward_std": 0.022097086533904076, "rewards/accuracy_reward": 0.06640625, "rewards/brier_reward": 0.92578125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.9921875, "rewards/mean_confidence_reward": 0.0, "step": 160 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 880.0, "completions/max_terminated_length": 880.0, "completions/mean_length": 203.00390625, "completions/mean_terminated_length": 203.00390625, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.2576, "grad_norm": 0.002416276140138507, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 63138033.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.046875, "rewards/brier_reward": 0.94921875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 161 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 774.0, "completions/max_terminated_length": 774.0, "completions/mean_length": 195.89453125, "completions/mean_terminated_length": 195.89453125, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.2592, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 63518966.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.05078125, "rewards/brier_reward": 0.94921875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 162 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 883.0, "completions/max_terminated_length": 883.0, "completions/mean_length": 195.33984375, "completions/mean_terminated_length": 196.10589599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 32.0, "epoch": 0.2608, "grad_norm": 0.00068190653109923, "learning_rate": 1e-06, "loss": -0.0042, "num_tokens": 63907333.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.0625, "rewards/brier_reward": 0.93359375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 163 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 611.0, "completions/max_terminated_length": 611.0, "completions/mean_length": 192.30859375, "completions/mean_terminated_length": 192.30859375, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "epoch": 0.2624, "grad_norm": 0.0013250088086351752, "learning_rate": 1e-06, "loss": -0.0029, "num_tokens": 64304188.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.046875, "rewards/brier_reward": 0.94921875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 164 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 744.0, "completions/max_terminated_length": 744.0, "completions/mean_length": 173.0234375, "completions/mean_terminated_length": 173.0234375, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "epoch": 0.264, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 64678394.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.08203125, "rewards/brier_reward": 0.91796875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 165 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 614.0, "completions/max_terminated_length": 614.0, "completions/mean_length": 177.8984375, "completions/mean_terminated_length": 177.8984375, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.2656, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 65065976.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.02734375, "rewards/brier_reward": 0.97265625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 166 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 868.0, "completions/max_terminated_length": 868.0, "completions/mean_length": 201.9609375, "completions/mean_terminated_length": 202.75296020507812, "completions/min_length": 0.0, "completions/min_terminated_length": 33.0, "epoch": 0.2672, "grad_norm": 0.000607511552516371, "learning_rate": 1e-06, "loss": -0.0034, "num_tokens": 65473902.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.01953125, "rewards/brier_reward": 0.9765625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 167 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 896.0, "completions/max_terminated_length": 896.0, "completions/mean_length": 194.75, "completions/mean_terminated_length": 195.51373291015625, "completions/min_length": 0.0, "completions/min_terminated_length": 36.0, "epoch": 0.2688, "grad_norm": 0.0029569705948233604, "learning_rate": 1e-06, "loss": -0.0016, "num_tokens": 65863102.0, "reward": 0.9921884536743164, "reward_std": 0.022097086533904076, "rewards/accuracy_reward": 0.046875, "rewards/brier_reward": 0.9453125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.9921875, "rewards/mean_confidence_reward": 0.0, "step": 168 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 733.0, "completions/max_terminated_length": 733.0, "completions/mean_length": 204.63671875, "completions/mean_terminated_length": 204.63671875, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.2704, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 66262393.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.03515625, "rewards/brier_reward": 0.96484375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 169 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 639.0, "completions/max_terminated_length": 639.0, "completions/mean_length": 194.66015625, "completions/mean_terminated_length": 194.66015625, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 0.272, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 66633106.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.04296875, "rewards/brier_reward": 0.95703125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 170 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 717.0, "completions/max_terminated_length": 717.0, "completions/mean_length": 204.015625, "completions/mean_terminated_length": 204.015625, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.2736, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 67027142.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.05859375, "rewards/brier_reward": 0.94140625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 171 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 756.0, "completions/max_terminated_length": 756.0, "completions/mean_length": 196.80859375, "completions/mean_terminated_length": 196.80859375, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.2752, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 67429453.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.02734375, "rewards/brier_reward": 0.97265625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 172 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 839.0, "completions/max_terminated_length": 839.0, "completions/mean_length": 194.81640625, "completions/mean_terminated_length": 194.81640625, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.2768, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 67821702.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.0546875, "rewards/brier_reward": 0.9453125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 173 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 654.0, "completions/max_terminated_length": 654.0, "completions/mean_length": 209.265625, "completions/mean_terminated_length": 209.265625, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 0.2784, "grad_norm": 0.000906936707906425, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 68223738.0, "reward": 0.9980478286743164, "reward_std": 0.005524259991943836, "rewards/accuracy_reward": 0.01171875, "rewards/brier_reward": 0.984375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.00390625, "step": 174 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 624.0, "completions/max_terminated_length": 624.0, "completions/mean_length": 186.72265625, "completions/mean_terminated_length": 186.72265625, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.28, "grad_norm": 0.0019213669002056122, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 68601755.0, "reward": 0.9943320155143738, "reward_std": 0.0160341989248991, "rewards/accuracy_reward": 0.04296875, "rewards/brier_reward": 0.9495996236801147, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0037109374534338713, "step": 175 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 824.0, "completions/max_terminated_length": 824.0, "completions/mean_length": 198.609375, "completions/mean_terminated_length": 198.609375, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 0.2816, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 68981303.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.05078125, "rewards/brier_reward": 0.94921875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 176 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 711.0, "completions/max_terminated_length": 711.0, "completions/mean_length": 184.6328125, "completions/mean_terminated_length": 184.6328125, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 0.2832, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 69372873.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.05859375, "rewards/brier_reward": 0.94140625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 177 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 710.0, "completions/max_terminated_length": 710.0, "completions/mean_length": 192.1171875, "completions/mean_terminated_length": 192.1171875, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.2848, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 69755615.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.0234375, "rewards/brier_reward": 0.9765625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 178 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 590.0, "completions/max_terminated_length": 590.0, "completions/mean_length": 202.31640625, "completions/mean_terminated_length": 202.31640625, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "epoch": 0.2864, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 70147784.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.05078125, "rewards/brier_reward": 0.94921875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 179 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 841.0, "completions/max_terminated_length": 841.0, "completions/mean_length": 183.59375, "completions/mean_terminated_length": 183.59375, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.288, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 70521872.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.03515625, "rewards/brier_reward": 0.96484375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 180 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 626.0, "completions/max_terminated_length": 626.0, "completions/mean_length": 180.97265625, "completions/mean_terminated_length": 180.97265625, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.2896, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 70908449.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.02734375, "rewards/brier_reward": 0.97265625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 181 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 769.0, "completions/max_terminated_length": 769.0, "completions/mean_length": 211.77734375, "completions/mean_terminated_length": 211.77734375, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.2912, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 71296752.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.01953125, "rewards/brier_reward": 0.98046875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 182 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 819.0, "completions/max_terminated_length": 819.0, "completions/mean_length": 183.44921875, "completions/mean_terminated_length": 183.44921875, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.2928, "grad_norm": 7.136356725823134e-05, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 71673451.0, "reward": 0.9998689293861389, "reward_std": 0.0003734485653694719, "rewards/accuracy_reward": 0.08203125, "rewards/brier_reward": 0.917704701423645, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.001015624962747097, "step": 183 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 695.0, "completions/max_terminated_length": 695.0, "completions/mean_length": 189.8828125, "completions/mean_terminated_length": 189.8828125, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "epoch": 0.2944, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 72061885.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.05078125, "rewards/brier_reward": 0.94921875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 184 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 969.0, "completions/max_terminated_length": 969.0, "completions/mean_length": 187.76171875, "completions/mean_terminated_length": 187.76171875, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 0.296, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 72450952.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.03515625, "rewards/brier_reward": 0.96484375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 185 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 887.0, "completions/max_terminated_length": 887.0, "completions/mean_length": 207.46875, "completions/mean_terminated_length": 208.28236389160156, "completions/min_length": 0.0, "completions/min_terminated_length": 35.0, "epoch": 0.2976, "grad_norm": 0.0005568548804149032, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 72836712.0, "reward": 0.9958820343017578, "reward_std": 0.011650143191218376, "rewards/accuracy_reward": 0.03125, "rewards/brier_reward": 0.964418351650238, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0012890625512227416, "step": 186 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 792.0, "completions/max_terminated_length": 792.0, "completions/mean_length": 194.98046875, "completions/mean_terminated_length": 194.98046875, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 0.2992, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 73223731.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.0546875, "rewards/brier_reward": 0.9453125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 187 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 633.0, "completions/max_terminated_length": 633.0, "completions/mean_length": 193.58984375, "completions/mean_terminated_length": 193.58984375, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 0.3008, "grad_norm": 0.0024952238891273737, "learning_rate": 1e-06, "loss": 0.0031, "num_tokens": 73628866.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.0234375, "rewards/brier_reward": 0.97265625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 188 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 852.0, "completions/max_terminated_length": 852.0, "completions/mean_length": 190.9921875, "completions/mean_terminated_length": 190.9921875, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.3024, "grad_norm": 0.00012831162894144654, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 74010240.0, "reward": 0.9997882843017578, "reward_std": 0.0006015999824739993, "rewards/accuracy_reward": 0.0625, "rewards/brier_reward": 0.937074601650238, "rewards/confidence_one_or_zero": 0.99609375, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0012890625512227416, "step": 189 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 561.0, "completions/max_terminated_length": 561.0, "completions/mean_length": 170.89453125, "completions/mean_terminated_length": 171.56471252441406, "completions/min_length": 0.0, "completions/min_terminated_length": 33.0, "epoch": 0.304, "grad_norm": 0.0007622085977345705, "learning_rate": 1e-06, "loss": -0.0035, "num_tokens": 74393053.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.0546875, "rewards/brier_reward": 0.94140625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 190 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 716.0, "completions/max_terminated_length": 716.0, "completions/mean_length": 196.12890625, "completions/mean_terminated_length": 196.12890625, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 0.3056, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 74787998.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.04296875, "rewards/brier_reward": 0.95703125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 191 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 721.0, "completions/max_terminated_length": 721.0, "completions/mean_length": 186.24609375, "completions/mean_terminated_length": 186.24609375, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.3072, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 75184333.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.03125, "rewards/brier_reward": 0.96875, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 192 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 907.0, "completions/max_terminated_length": 907.0, "completions/mean_length": 190.98046875, "completions/mean_terminated_length": 190.98046875, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 0.3088, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 75581816.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.03515625, "rewards/brier_reward": 0.96484375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 193 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 600.0, "completions/max_terminated_length": 600.0, "completions/mean_length": 180.5234375, "completions/mean_terminated_length": 180.5234375, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "epoch": 0.3104, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 75974278.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.03515625, "rewards/brier_reward": 0.96484375, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 194 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1145.0, "completions/max_terminated_length": 1145.0, "completions/mean_length": 188.93359375, "completions/mean_terminated_length": 189.6745147705078, "completions/min_length": 0.0, "completions/min_terminated_length": 39.0, "epoch": 0.312, "grad_norm": 0.0033796713687479496, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 76363445.0, "reward": 0.9882822036743164, "reward_std": 0.03314562886953354, "rewards/accuracy_reward": 0.03515625, "rewards/brier_reward": 0.953125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.98828125, "rewards/mean_confidence_reward": 0.0, "step": 195 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 806.0, "completions/max_terminated_length": 806.0, "completions/mean_length": 201.71484375, "completions/mean_terminated_length": 201.71484375, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 0.3136, "grad_norm": 0.001726991031318903, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 76759284.0, "reward": 0.9960947036743164, "reward_std": 0.011048543266952038, "rewards/accuracy_reward": 0.05078125, "rewards/brier_reward": 0.9453125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.0, "step": 196 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 621.0, "completions/max_terminated_length": 621.0, "completions/mean_length": 194.421875, "completions/mean_terminated_length": 194.421875, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 0.3152, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 77152680.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.04296875, "rewards/brier_reward": 0.95703125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 197 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 593.0, "completions/max_terminated_length": 593.0, "completions/mean_length": 176.81640625, "completions/mean_terminated_length": 176.81640625, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 0.3168, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 77542057.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.046875, "rewards/brier_reward": 0.953125, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 198 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 836.0, "completions/max_terminated_length": 836.0, "completions/mean_length": 197.02734375, "completions/mean_terminated_length": 197.02734375, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "epoch": 0.3184, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 77937704.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.0234375, "rewards/brier_reward": 0.9765625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 199 }, { "calib/answer_extract_rate": 0.0, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 592.0, "completions/max_terminated_length": 592.0, "completions/mean_length": 169.27734375, "completions/mean_terminated_length": 169.27734375, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 78296319.0, "reward": 1.0000009536743164, "reward_std": 0.0, "rewards/accuracy_reward": 0.09375, "rewards/brier_reward": 0.90625, "rewards/confidence_one_or_zero": 1.0, "rewards/format_reward": 1.0, "rewards/mean_confidence_reward": 0.0, "step": 200 }, { "epoch": 0.32, "step": 200, "total_flos": 0.0, "train_loss": -0.007609076579804963, "train_runtime": 8450.9766, "train_samples_per_second": 6.058, "train_steps_per_second": 0.024 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 78296319, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }