{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_brier/lambda": 0.1, "aux_brier/loss": -9.102586197069666e-06, "aux_brier/mean_r": 0.4665906016643231, "aux_brier/n_active_tok": 24.615384615384617, "aux_brier/n_step_records": 6.153846153846154, "aux_brier/std_r": 0.17818317848902482, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.5645009279251099, "learning_rate": 2.5000000000000004e-07, "loss": 0.0318, "num_tokens": 264685.0, "reward": 0.04124843701720238, "reward_std": 0.0838509351015091, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step": 0.04296875, "step": 1 }, { "aux_brier/lambda": 0.10000000000000003, "aux_brier/loss": 1.2052168732018822e-07, "aux_brier/mean_r": 0.43024390700616333, "aux_brier/n_active_tok": 28.42105263157895, "aux_brier/n_step_records": 7.105263157894737, "aux_brier/std_r": 0.16724381615456782, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.14317761361598969, "learning_rate": 5.000000000000001e-07, "loss": 0.0643, "num_tokens": 533467.0, "reward": 0.08358447253704071, "reward_std": 0.15892045199871063, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step": 0.08984375, "step": 2 }, { "aux_brier/lambda": 0.09999999999999999, "aux_brier/loss": 6.794580712045993e-06, "aux_brier/mean_r": 0.45482745360244403, "aux_brier/n_active_tok": 24.363636363636363, "aux_brier/n_step_records": 6.090909090909091, "aux_brier/std_r": 0.16174368424849075, "calib/answer_extract_rate": 0.05859375, "calib/auroc": 0.5714285714285714, "calib/avg_num_step_conf": 0.30078125, "calib/ece": 0.4153846153846156, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.9230769230769231, "calib/gap": -0.002142857142857224, "calib/mean_conf": 0.953846153846154, "calib/mu_c": 0.9528571428571427, "calib/mu_w": 0.955, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.0703125, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.4153846153846156, "calib/std_conf": 0.024349525832837542, "calib/step_conf_rate": 0.06640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2999.0, "completions/max_terminated_length": 2999.0, "completions/mean_length": 624.51953125, "completions/mean_terminated_length": 695.1173706054688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.019731556996703148, "learning_rate": 7.5e-07, "loss": 0.033, "num_tokens": 798600.0, "reward": 0.058033399283885956, "reward_std": 0.11475954204797745, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.029008593410253525, "rewards/format_reward_step": 0.046875, "step": 3 }, { "aux_brier/lambda": 0.09999999999999999, "aux_brier/loss": 3.005054905003135e-05, "aux_brier/mean_r": 0.46613133805138723, "aux_brier/n_active_tok": 25.714285714285715, "aux_brier/n_step_records": 6.428571428571429, "aux_brier/std_r": 0.15916945040225983, "calib/answer_extract_rate": 0.05859375, "calib/auroc": 0.28125, "calib/avg_num_step_conf": 0.17578125, "calib/ece": 0.6539999999999999, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.6, "calib/gap": -0.036250000000000004, "calib/mean_conf": 0.8539999999999999, "calib/mu_c": 0.825, "calib/mu_w": 0.86125, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.03515625, "calib/pce": 0.6539999999999999, "calib/std_conf": 0.1585685971433184, "calib/step_conf_rate": 0.03515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2845.0, "completions/max_terminated_length": 2845.0, "completions/mean_length": 671.32421875, "completions/mean_terminated_length": 716.0792236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.35197219252586365, "learning_rate": 1.0000000000000002e-06, "loss": -0.0052, "num_tokens": 1076627.0, "reward": 0.024561328813433647, "reward_std": 0.05980735272169113, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.012307812459766865, "rewards/format_reward_step": 0.02734375, "step": 4 }, { "aux_brier/lambda": 0.09999999999999999, "aux_brier/loss": -1.6236012197134997e-05, "aux_brier/mean_r": 0.430947482585907, "aux_brier/n_active_tok": 24.0, "aux_brier/n_step_records": 6.0, "aux_brier/std_r": 0.09453002782538533, "calib/answer_extract_rate": 0.04296875, "calib/avg_num_step_conf": 0.19140625, "calib/ece": 0.7899999999999999, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.01171875, "calib/frac_conf_gt_0.9": 0.7142857142857143, "calib/mean_conf": 0.79, "calib/mu_c": NaN, "calib/mu_w": 0.79, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.05859375, "calib/nonempty_step_conf_rate": 0.03125, "calib/pce": 0.7899999999999999, "calib/std_conf": 0.3361547262794322, "calib/step_conf_rate": 0.03125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 3021.0, "completions/max_terminated_length": 3021.0, "completions/mean_length": 734.9765625, "completions/mean_terminated_length": 832.539794921875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.005333333333333333, "grad_norm": 0.005658743903040886, "learning_rate": 1.25e-06, "loss": 0.0077, "num_tokens": 1371469.0, "reward": 0.00603193324059248, "reward_std": 0.013095740228891373, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0006902343593537807, "rewards/format_reward_step": 0.01171875, "step": 5 }, { "aux_brier/lambda": 0.10000000000000002, "aux_brier/loss": 9.501435876657638e-06, "aux_brier/mean_r": 0.498914211243391, "aux_brier/n_active_tok": 31.0, "aux_brier/n_step_records": 7.75, "aux_brier/std_r": 0.17871646173298358, "calib/answer_extract_rate": 0.1328125, "calib/auroc": 0.25, "calib/avg_num_step_conf": 0.640625, "calib/ece": 0.6983600000000001, "calib/final_conf_rate": 0.09765625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.76, "calib/gap": -0.044333333333333336, "calib/mean_conf": 0.9103600000000001, "calib/mu_c": 0.8766666666666666, "calib/mu_w": 0.9209999999999999, "calib/nonempty_final_conf_rate": 0.09765625, "calib/nonempty_reasoning_rate": 0.15625, "calib/nonempty_step_conf_rate": 0.1171875, "calib/pce": 0.6843600000000001, "calib/std_conf": 0.17328609407566434, "calib/step_conf_rate": 0.1171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3035.0, "completions/max_terminated_length": 3035.0, "completions/mean_length": 602.7265625, "completions/mean_terminated_length": 665.0775756835938, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0064, "grad_norm": 0.1781889647245407, "learning_rate": 1.5e-06, "loss": 0.0369, "num_tokens": 1631719.0, "reward": 0.07612704485654831, "reward_std": 0.14089490473270416, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.031070686876773834, "rewards/format_reward_step": 0.08984375, "step": 6 }, { "aux_brier/lambda": 0.1, "aux_brier/loss": 2.105866712052664e-06, "aux_brier/mean_r": 0.49490843827907854, "aux_brier/n_active_tok": 29.846153846153847, "aux_brier/n_step_records": 7.461538461538462, "aux_brier/std_r": 0.17764290766074106, "calib/answer_extract_rate": 0.09765625, "calib/auroc": 0.6384615384615384, "calib/avg_num_step_conf": 0.40234375, "calib/ece": 0.6325000000000002, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.05859375, "calib/frac_conf_gt_0.9": 0.7777777777777778, "calib/gap": 0.057769230769230684, "calib/mean_conf": 0.9102777777777779, "calib/mu_c": 0.952, "calib/mu_w": 0.8942307692307693, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.109375, "calib/nonempty_step_conf_rate": 0.07421875, "calib/pce": 0.6325000000000002, "calib/std_conf": 0.17748608883063213, "calib/step_conf_rate": 0.07421875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 3053.0, "completions/max_terminated_length": 3053.0, "completions/mean_length": 693.28515625, "completions/mean_terminated_length": 778.4254150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.14674966037273407, "learning_rate": 1.75e-06, "loss": 0.0137, "num_tokens": 1916624.0, "reward": 0.054463449865579605, "reward_std": 0.13023746013641357, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.022541308775544167, "rewards/format_reward_step": 0.05859375, "step": 7 }, { "aux_brier/lambda": 0.1, "aux_brier/loss": -1.7897489714194643e-05, "aux_brier/mean_r": 0.4835428978715624, "aux_brier/n_active_tok": 20.285714285714285, "aux_brier/n_step_records": 5.071428571428571, "aux_brier/std_r": 0.153486252363239, "calib/answer_extract_rate": 0.0703125, "calib/auroc": 0.5625, "calib/avg_num_step_conf": 0.28125, "calib/ece": 0.6809375, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.875, "calib/gap": 0.04874999999999985, "calib/mean_conf": 0.9309375, "calib/mu_c": 0.9675, "calib/mu_w": 0.9187500000000002, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.1015625, "calib/nonempty_step_conf_rate": 0.078125, "calib/pce": 0.6809375, "calib/std_conf": 0.10568554108178659, "calib/step_conf_rate": 0.078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2990.0, "completions/max_terminated_length": 2990.0, "completions/mean_length": 567.046875, "completions/mean_terminated_length": 615.1016845703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.7247595191001892, "learning_rate": 2.0000000000000003e-06, "loss": 0.0239, "num_tokens": 2168300.0, "reward": 0.04034404456615448, "reward_std": 0.10075663030147552, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.020751170814037323, "rewards/format_reward_step": 0.0390625, "step": 8 }, { "aux_brier/lambda": 0.09999999999999999, "aux_brier/loss": -1.0477180734103086e-05, "aux_brier/mean_r": 0.5385760571807623, "aux_brier/n_active_tok": 20.5, "aux_brier/n_step_records": 5.125, "aux_brier/std_r": 0.1042425143532455, "calib/answer_extract_rate": 0.0546875, "calib/auroc": 0.4285714285714286, "calib/avg_num_step_conf": 0.16796875, "calib/ece": 0.84875, "calib/final_conf_rate": 0.03125, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.004285714285714337, "calib/mean_conf": 0.97375, "calib/mu_c": 0.97, "calib/mu_w": 0.9742857142857143, "calib/nonempty_final_conf_rate": 0.03125, "calib/nonempty_reasoning_rate": 0.078125, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.84875, "calib/std_conf": 0.011110243021644496, "calib/step_conf_rate": 0.05078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 3066.0, "completions/max_terminated_length": 3066.0, "completions/mean_length": 643.45703125, "completions/mean_terminated_length": 722.4780883789062, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.006357858423143625, "learning_rate": 2.25e-06, "loss": 0.0304, "num_tokens": 2440561.0, "reward": 0.01882334053516388, "reward_std": 0.041674330830574036, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.004980859346687794, "rewards/format_reward_step": 0.02734375, "step": 9 }, { "aux_brier/lambda": 0.10000000000000002, "aux_brier/loss": 1.272371829008634e-07, "aux_brier/mean_r": 0.47323180735111237, "aux_brier/n_active_tok": 37.25, "aux_brier/n_step_records": 9.3125, "aux_brier/std_r": 0.14950991631485522, "calib/answer_extract_rate": 0.0859375, "calib/auroc": 0.46875, "calib/avg_num_step_conf": 0.59375, "calib/ece": 0.8361111111111111, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 0.8333333333333334, "calib/gap": -0.17812499999999998, "calib/mean_conf": 0.8883333333333333, "calib/mu_c": 0.73, "calib/mu_w": 0.908125, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.10546875, "calib/nonempty_step_conf_rate": 0.08203125, "calib/pce": 0.8066666666666666, "calib/std_conf": 0.20097678141185035, "calib/step_conf_rate": 0.08203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3019.0, "completions/max_terminated_length": 3019.0, "completions/mean_length": 701.59765625, "completions/mean_terminated_length": 767.5598754882812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.1916794329881668, "learning_rate": 2.5e-06, "loss": 0.0066, "num_tokens": 2726970.0, "reward": 0.03645956888794899, "reward_std": 0.08734627068042755, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.013025781139731407, "rewards/format_reward_step": 0.05078125, "step": 10 }, { "aux_brier/lambda": 0.10000000000000003, "aux_brier/loss": -1.1409345462924586e-05, "aux_brier/mean_r": 0.487497550876517, "aux_brier/n_active_tok": 32.0, "aux_brier/n_step_records": 8.0, "aux_brier/std_r": 0.1807484465994333, "calib/answer_extract_rate": 0.1484375, "calib/auroc": 0.5874999999999999, "calib/avg_num_step_conf": 0.61328125, "calib/ece": 0.6446249999999999, "calib/final_conf_rate": 0.109375, "calib/format_rate": 0.09375, "calib/frac_conf_gt_0.9": 0.8214285714285714, "calib/gap": 0.006524999999999892, "calib/mean_conf": 0.9303392857142859, "calib/mu_c": 0.9349999999999999, "calib/mu_w": 0.928475, "calib/nonempty_final_conf_rate": 0.109375, "calib/nonempty_reasoning_rate": 0.16796875, "calib/nonempty_step_conf_rate": 0.12890625, "calib/pce": 0.6446249999999999, "calib/std_conf": 0.07888215142714802, "calib/step_conf_rate": 0.12890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2983.0, "completions/max_terminated_length": 2983.0, "completions/mean_length": 652.19140625, "completions/mean_terminated_length": 692.7842407226562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.3505798280239105, "learning_rate": 2.7500000000000004e-06, "loss": 0.0425, "num_tokens": 2998411.0, "reward": 0.08793935179710388, "reward_std": 0.13506345450878143, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.03925742208957672, "rewards/format_reward_step": 0.09375, "step": 11 }, { "aux_brier/lambda": 0.10000000000000003, "aux_brier/loss": -9.934263243215963e-06, "aux_brier/mean_r": 0.5341246542723282, "aux_brier/n_active_tok": 22.956521739130434, "aux_brier/n_step_records": 5.739130434782608, "aux_brier/std_r": 0.17240982081579126, "calib/answer_extract_rate": 0.125, "calib/auroc": 0.5130718954248366, "calib/avg_num_step_conf": 0.515625, "calib/ece": 0.5178461538461537, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.0859375, "calib/frac_conf_gt_0.9": 0.7307692307692307, "calib/gap": 0.1213333333333333, "calib/mean_conf": 0.864, "calib/mu_c": 0.9433333333333334, "calib/mu_w": 0.8220000000000001, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.15625, "calib/nonempty_step_conf_rate": 0.12109375, "calib/pce": 0.5178461538461537, "calib/std_conf": 0.2585105264930082, "calib/step_conf_rate": 0.12109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3017.0, "completions/max_terminated_length": 3017.0, "completions/mean_length": 575.91015625, "completions/mean_terminated_length": 627.3744506835938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.41277876496315, "learning_rate": 3e-06, "loss": 0.0412, "num_tokens": 3250020.0, "reward": 0.0893206000328064, "reward_std": 0.16197365522384644, "rewards/accuracy_reward_step": 0.03515625, "rewards/final_brier_reward_step": 0.04478242248296738, "rewards/format_reward_step": 0.0859375, "step": 12 }, { "aux_brier/lambda": 0.10000000000000003, "aux_brier/loss": -4.006175787067563e-08, "aux_brier/mean_r": 0.47388083013621246, "aux_brier/n_active_tok": 33.09090909090909, "aux_brier/n_step_records": 8.272727272727273, "aux_brier/std_r": 0.1905684159560637, "calib/answer_extract_rate": 0.16015625, "calib/auroc": 0.7373737373737373, "calib/avg_num_step_conf": 0.734375, "calib/ece": 0.8316666666666666, "calib/final_conf_rate": 0.140625, "calib/format_rate": 0.11328125, "calib/frac_conf_gt_0.9": 0.7777777777777778, "calib/gap": 0.06000000000000005, "calib/mean_conf": 0.9149999999999999, "calib/mu_c": 0.9700000000000001, "calib/mu_w": 0.91, "calib/nonempty_final_conf_rate": 0.140625, "calib/nonempty_reasoning_rate": 0.19140625, "calib/nonempty_step_conf_rate": 0.15234375, "calib/pce": 0.8316666666666666, "calib/std_conf": 0.11847409468365272, "calib/step_conf_rate": 0.15234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 3028.0, "completions/max_terminated_length": 3028.0, "completions/mean_length": 668.41015625, "completions/mean_terminated_length": 704.168701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.12139452993869781, "learning_rate": 3.2500000000000002e-06, "loss": 0.0534, "num_tokens": 3525725.0, "reward": 0.07909873127937317, "reward_std": 0.15631961822509766, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.027332423254847527, "rewards/format_reward_step": 0.11328125, "step": 13 }, { "aux_brier/lambda": 0.10000000000000003, "aux_brier/loss": 2.7218451820911954e-08, "aux_brier/mean_r": 0.440531387925148, "aux_brier/n_active_tok": 38.08695652173913, "aux_brier/n_step_records": 9.521739130434783, "aux_brier/std_r": 0.19528335122310597, "calib/answer_extract_rate": 0.203125, "calib/auroc": 0.46794871794871795, "calib/avg_num_step_conf": 0.859375, "calib/ece": 0.5763421052631579, "calib/final_conf_rate": 0.1484375, "calib/format_rate": 0.11328125, "calib/frac_conf_gt_0.9": 0.6842105263157895, "calib/gap": -0.008628205128205169, "calib/mean_conf": 0.8242368421052632, "calib/mu_c": 0.8183333333333334, "calib/mu_w": 0.8269615384615385, "calib/nonempty_final_conf_rate": 0.1484375, "calib/nonempty_reasoning_rate": 0.265625, "calib/nonempty_step_conf_rate": 0.19140625, "calib/pce": 0.5423947368421053, "calib/std_conf": 0.27736157611647866, "calib/step_conf_rate": 0.19140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3063.0, "completions/max_terminated_length": 3063.0, "completions/mean_length": 698.015625, "completions/mean_terminated_length": 766.91845703125, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.014933333333333333, "grad_norm": 0.4226285517215729, "learning_rate": 3.5e-06, "loss": 0.0676, "num_tokens": 3809817.0, "reward": 0.1168142557144165, "reward_std": 0.22101931273937225, "rewards/accuracy_reward_step": 0.046875, "rewards/final_brier_reward_step": 0.05319453030824661, "rewards/format_reward_step": 0.11328125, "step": 14 }, { "aux_brier/lambda": 0.10000000000000003, "aux_brier/loss": -3.0660661435681175e-07, "aux_brier/mean_r": 0.404715562860171, "aux_brier/n_active_tok": 32.833333333333336, "aux_brier/n_step_records": 8.208333333333334, "aux_brier/std_r": 0.20035543335446468, "calib/answer_extract_rate": 0.19140625, "calib/auroc": 0.3619047619047619, "calib/avg_num_step_conf": 0.78515625, "calib/ece": 0.7734146341463415, "calib/final_conf_rate": 0.16015625, "calib/format_rate": 0.12109375, "calib/frac_conf_gt_0.9": 0.8536585365853658, "calib/gap": -0.0024285714285714466, "calib/mean_conf": 0.9070731707317075, "calib/mu_c": 0.9049999999999999, "calib/mu_w": 0.9074285714285714, "calib/nonempty_final_conf_rate": 0.16015625, "calib/nonempty_reasoning_rate": 0.2265625, "calib/nonempty_step_conf_rate": 0.16796875, "calib/pce": 0.7670731707317073, "calib/std_conf": 0.1874116885028984, "calib/step_conf_rate": 0.16796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 656.3671875, "completions/mean_terminated_length": 685.8367309570312, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.016, "grad_norm": 0.18445859849452972, "learning_rate": 3.7500000000000005e-06, "loss": 0.0866, "num_tokens": 4085727.0, "reward": 0.09110702574253082, "reward_std": 0.17865130305290222, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.028490623459219933, "rewards/format_reward_step": 0.12109375, "step": 15 }, { "aux_brier/lambda": 0.10000000000000003, "aux_brier/loss": -6.621526758454621e-06, "aux_brier/mean_r": 0.5042844500679237, "aux_brier/n_active_tok": 35.23076923076923, "aux_brier/n_step_records": 8.807692307692308, "aux_brier/std_r": 0.2048667217963017, "calib/answer_extract_rate": 0.21875, "calib/auroc": 0.3823529411764706, "calib/avg_num_step_conf": 0.9140625, "calib/ece": 0.7242553191489363, "calib/final_conf_rate": 0.18359375, "calib/format_rate": 0.16015625, "calib/frac_conf_gt_0.9": 0.6808510638297872, "calib/gap": -0.1997511312217195, "calib/mean_conf": 0.886808510638298, "calib/mu_c": 0.7423076923076923, "calib/mu_w": 0.9420588235294118, "calib/nonempty_final_conf_rate": 0.18359375, "calib/nonempty_reasoning_rate": 0.25390625, "calib/nonempty_step_conf_rate": 0.203125, "calib/pce": 0.6672340425531915, "calib/std_conf": 0.210390785161234, "calib/step_conf_rate": 0.203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 643.0234375, "completions/mean_terminated_length": 697.5169677734375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.017066666666666667, "grad_norm": 0.18127691745758057, "learning_rate": 4.000000000000001e-06, "loss": 0.1015, "num_tokens": 4359189.0, "reward": 0.142335444688797, "reward_std": 0.24648767709732056, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.04590429738163948, "rewards/format_reward_step": 0.16015625, "step": 16 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.7248619893495132e-08, "aux_brier/mean_r": 0.4430529158562422, "aux_brier/n_active_tok": 44.0, "aux_brier/n_step_records": 11.0, "aux_brier/std_r": 0.2100737655418925, "calib/answer_extract_rate": 0.28515625, "calib/auroc": 0.44, "calib/avg_num_step_conf": 1.37890625, "calib/ece": 0.6570769230769231, "calib/final_conf_rate": 0.25390625, "calib/format_rate": 0.21484375, "calib/frac_conf_gt_0.9": 0.7384615384615385, "calib/gap": 0.004133333333333433, "calib/mean_conf": 0.8721538461538463, "calib/mu_c": 0.8753333333333333, "calib/mu_w": 0.8711999999999999, "calib/nonempty_final_conf_rate": 0.25390625, "calib/nonempty_reasoning_rate": 0.32421875, "calib/nonempty_step_conf_rate": 0.26953125, "calib/pce": 0.6492307692307693, "calib/std_conf": 0.2245781844853715, "calib/step_conf_rate": 0.26953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 3068.0, "completions/max_terminated_length": 3068.0, "completions/mean_length": 592.19921875, "completions/mean_terminated_length": 613.77734375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.018133333333333335, "grad_norm": 0.13673274219036102, "learning_rate": 4.25e-06, "loss": 0.1005, "num_tokens": 4614320.0, "reward": 0.19617079198360443, "reward_std": 0.2713271975517273, "rewards/accuracy_reward_step": 0.0703125, "rewards/final_brier_reward_step": 0.07374569773674011, "rewards/format_reward_step": 0.21484375, "step": 17 }, { "aux_brier/lambda": 0.10000000000000003, "aux_brier/loss": 6.56118595898647e-08, "aux_brier/mean_r": 0.5370493102073669, "aux_brier/n_active_tok": 38.4, "aux_brier/n_step_records": 9.6, "aux_brier/std_r": 0.20041450768709182, "calib/answer_extract_rate": 0.23828125, "calib/auroc": 0.5308823529411765, "calib/avg_num_step_conf": 0.953125, "calib/ece": 0.708409090909091, "calib/final_conf_rate": 0.171875, "calib/format_rate": 0.15234375, "calib/frac_conf_gt_0.9": 0.7045454545454546, "calib/gap": -0.03429411764705881, "calib/mean_conf": 0.9025000000000002, "calib/mu_c": 0.876, "calib/mu_w": 0.9102941176470588, "calib/nonempty_final_conf_rate": 0.171875, "calib/nonempty_reasoning_rate": 0.27734375, "calib/nonempty_step_conf_rate": 0.203125, "calib/pce": 0.6918181818181819, "calib/std_conf": 0.13600008355612406, "calib/step_conf_rate": 0.203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 3051.0, "completions/max_terminated_length": 3051.0, "completions/mean_length": 600.43359375, "completions/mean_terminated_length": 643.1422729492188, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0192, "grad_norm": 0.48188310861587524, "learning_rate": 4.5e-06, "loss": 0.0511, "num_tokens": 4878751.0, "reward": 0.13276375830173492, "reward_std": 0.21671216189861298, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.05449257791042328, "rewards/format_reward_step": 0.15234375, "step": 18 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 8.979131159033812e-09, "aux_brier/mean_r": 0.5055194477881154, "aux_brier/n_active_tok": 66.45161290322581, "aux_brier/n_step_records": 16.612903225806452, "aux_brier/std_r": 0.22263317729436582, "calib/answer_extract_rate": 0.453125, "calib/auroc": 0.30747126436781613, "calib/avg_num_step_conf": 2.06640625, "calib/ece": 0.7203961904761905, "calib/final_conf_rate": 0.41015625, "calib/format_rate": 0.34765625, "calib/frac_conf_gt_0.9": 0.7238095238095238, "calib/gap": -0.028485823754789252, "calib/mean_conf": 0.8918247619047619, "calib/mu_c": 0.8682222222222222, "calib/mu_w": 0.8967080459770115, "calib/nonempty_final_conf_rate": 0.41015625, "calib/nonempty_reasoning_rate": 0.51953125, "calib/nonempty_step_conf_rate": 0.421875, "calib/pce": 0.7203961904761905, "calib/std_conf": 0.17331358135497102, "calib/step_conf_rate": 0.421875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 3040.0, "completions/max_terminated_length": 3040.0, "completions/mean_length": 475.53515625, "completions/mean_terminated_length": 492.86236572265625, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.020266666666666665, "grad_norm": 0.3311012387275696, "learning_rate": 4.75e-06, "loss": 0.1153, "num_tokens": 5105248.0, "reward": 0.27182435989379883, "reward_std": 0.39847370982170105, "rewards/accuracy_reward_step": 0.0703125, "rewards/final_brier_reward_step": 0.1107349842786789, "rewards/format_reward_step": 0.34765625, "step": 19 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.4849104437201445e-08, "aux_brier/mean_r": 0.5030051426541421, "aux_brier/n_active_tok": 77.29032258064517, "aux_brier/n_step_records": 19.322580645161292, "aux_brier/std_r": 0.26265350657124675, "calib/answer_extract_rate": 0.51953125, "calib/auroc": 0.5727083333333334, "calib/avg_num_step_conf": 2.3984375, "calib/ece": 0.6617107438016527, "calib/final_conf_rate": 0.47265625, "calib/format_rate": 0.38671875, "calib/frac_conf_gt_0.9": 0.6446280991735537, "calib/gap": 0.07405916666666634, "calib/mean_conf": 0.8683223140495869, "calib/mu_c": 0.9270799999999998, "calib/mu_w": 0.8530208333333335, "calib/nonempty_final_conf_rate": 0.47265625, "calib/nonempty_reasoning_rate": 0.625, "calib/nonempty_step_conf_rate": 0.5234375, "calib/pce": 0.6617107438016527, "calib/std_conf": 0.2052438265699078, "calib/step_conf_rate": 0.5234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2799.0, "completions/max_terminated_length": 2799.0, "completions/mean_length": 443.4609375, "completions/mean_terminated_length": 455.9277038574219, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.021333333333333333, "grad_norm": 0.19372797012329102, "learning_rate": 5e-06, "loss": 0.1248, "num_tokens": 5323646.0, "reward": 0.3269345164299011, "reward_std": 0.4126846492290497, "rewards/accuracy_reward_step": 0.09765625, "rewards/final_brier_reward_step": 0.14367558062076569, "rewards/format_reward_step": 0.38671875, "step": 20 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.5966909414365524e-08, "aux_brier/mean_r": 0.459222630597651, "aux_brier/n_active_tok": 106.5, "aux_brier/n_step_records": 26.625, "aux_brier/std_r": 0.2647970230318606, "calib/answer_extract_rate": 0.66796875, "calib/auroc": 0.4911543564794339, "calib/avg_num_step_conf": 3.41796875, "calib/ece": 0.6863963906581743, "calib/final_conf_rate": 0.61328125, "calib/format_rate": 0.55078125, "calib/frac_conf_gt_0.9": 0.7197452229299363, "calib/gap": -0.05793426949727243, "calib/mean_conf": 0.8842830148619958, "calib/mu_c": 0.840371052631579, "calib/mu_w": 0.8983053221288514, "calib/nonempty_final_conf_rate": 0.61328125, "calib/nonempty_reasoning_rate": 0.75390625, "calib/nonempty_step_conf_rate": 0.6796875, "calib/pce": 0.6643205944798303, "calib/std_conf": 0.19766529344420242, "calib/step_conf_rate": 0.6796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2803.0, "completions/max_terminated_length": 2803.0, "completions/mean_length": 453.3671875, "completions/mean_terminated_length": 467.9919128417969, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.0224, "grad_norm": 0.07202180474996567, "learning_rate": 4.9722222222222224e-06, "loss": 0.1197, "num_tokens": 5542668.0, "reward": 0.47843945026397705, "reward_std": 0.46075934171676636, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.1871953159570694, "rewards/format_reward_step": 0.55078125, "step": 21 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.0624713752712704e-08, "aux_brier/mean_r": 0.5024625388905406, "aux_brier/n_active_tok": 124.125, "aux_brier/n_step_records": 31.03125, "aux_brier/std_r": 0.24132200283929706, "calib/answer_extract_rate": 0.69921875, "calib/auroc": 0.4243589743589744, "calib/avg_num_step_conf": 4.12109375, "calib/ece": 0.631592380952381, "calib/final_conf_rate": 0.68359375, "calib/format_rate": 0.59765625, "calib/frac_conf_gt_0.9": 0.6685714285714286, "calib/gap": -0.010744729344729143, "calib/mean_conf": 0.878944761904762, "calib/mu_c": 0.870962962962963, "calib/mu_w": 0.8817076923076922, "calib/nonempty_final_conf_rate": 0.68359375, "calib/nonempty_reasoning_rate": 0.81640625, "calib/nonempty_step_conf_rate": 0.73828125, "calib/pce": 0.626697142857143, "calib/std_conf": 0.1736974969330771, "calib/step_conf_rate": 0.73828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2448.0, "completions/max_terminated_length": 2448.0, "completions/mean_length": 379.83203125, "completions/mean_terminated_length": 382.8228454589844, "completions/min_length": 0.0, "completions/min_terminated_length": 13.0, "epoch": 0.023466666666666667, "grad_norm": 0.2948322594165802, "learning_rate": 4.944444444444445e-06, "loss": 0.158, "num_tokens": 5741721.0, "reward": 0.5376076102256775, "reward_std": 0.5166065692901611, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.23636791110038757, "rewards/format_reward_step": 0.59765625, "step": 22 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.414528118057426e-09, "aux_brier/mean_r": 0.4795295763760805, "aux_brier/n_active_tok": 113.125, "aux_brier/n_step_records": 28.28125, "aux_brier/std_r": 0.2549027865752578, "calib/answer_extract_rate": 0.75, "calib/auroc": 0.5606639839034205, "calib/avg_num_step_conf": 3.62890625, "calib/ece": 0.6868875706214687, "calib/final_conf_rate": 0.69140625, "calib/format_rate": 0.6015625, "calib/frac_conf_gt_0.9": 0.7288135593220338, "calib/gap": 0.06581519114688117, "calib/mean_conf": 0.8846276836158191, "calib/mu_c": 0.9374285714285713, "calib/mu_w": 0.8716133802816901, "calib/nonempty_final_conf_rate": 0.69140625, "calib/nonempty_reasoning_rate": 0.859375, "calib/nonempty_step_conf_rate": 0.77734375, "calib/pce": 0.6868875706214687, "calib/std_conf": 0.19784997561141277, "calib/step_conf_rate": 0.77734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2188.0, "completions/max_terminated_length": 2188.0, "completions/mean_length": 348.171875, "completions/mean_terminated_length": 353.69842529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.024533333333333334, "grad_norm": 0.32816872000694275, "learning_rate": 4.9166666666666665e-06, "loss": 0.0558, "num_tokens": 5934789.0, "reward": 0.5142276287078857, "reward_std": 0.4823266267776489, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.21316054463386536, "rewards/format_reward_step": 0.6015625, "step": 23 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.0465101713849094e-08, "aux_brier/mean_r": 0.4993862584233284, "aux_brier/n_active_tok": 137.125, "aux_brier/n_step_records": 34.28125, "aux_brier/std_r": 0.27149031683802605, "calib/answer_extract_rate": 0.7890625, "calib/auroc": 0.5002932551319649, "calib/avg_num_step_conf": 4.3359375, "calib/ece": 0.7444795918367347, "calib/final_conf_rate": 0.765625, "calib/format_rate": 0.66796875, "calib/frac_conf_gt_0.9": 0.6989795918367347, "calib/gap": -0.003604692082111338, "calib/mean_conf": 0.893357142857143, "calib/mu_c": 0.8903225806451613, "calib/mu_w": 0.8939272727272727, "calib/nonempty_final_conf_rate": 0.765625, "calib/nonempty_reasoning_rate": 0.90234375, "calib/nonempty_step_conf_rate": 0.81640625, "calib/pce": 0.7398367346938776, "calib/std_conf": 0.17031371114529104, "calib/step_conf_rate": 0.81640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3009.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 349.859375, "completions/mean_terminated_length": 351.23138427734375, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.0256, "grad_norm": 0.4393552243709564, "learning_rate": 4.888888888888889e-06, "loss": 0.0603, "num_tokens": 6128865.0, "reward": 0.5113178491592407, "reward_std": 0.4203681945800781, "rewards/accuracy_reward_step": 0.12890625, "rewards/final_brier_reward_step": 0.1937090903520584, "rewards/format_reward_step": 0.66796875, "step": 24 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.080888460848085e-08, "aux_brier/mean_r": 0.46861199755221605, "aux_brier/n_active_tok": 149.625, "aux_brier/n_step_records": 37.40625, "aux_brier/std_r": 0.24172480730339885, "calib/answer_extract_rate": 0.9296875, "calib/auroc": 0.5606768400290667, "calib/avg_num_step_conf": 4.78125, "calib/ece": 0.6441814159292036, "calib/final_conf_rate": 0.8828125, "calib/format_rate": 0.8203125, "calib/frac_conf_gt_0.9": 0.6858407079646017, "calib/gap": 0.034937714107754814, "calib/mean_conf": 0.8891371681415929, "calib/mu_c": 0.9152631578947369, "calib/mu_w": 0.8803254437869821, "calib/nonempty_final_conf_rate": 0.8828125, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.9140625, "calib/pce": 0.6405530973451328, "calib/std_conf": 0.17658599584413187, "calib/step_conf_rate": 0.9140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2853.0, "completions/max_terminated_length": 2853.0, "completions/mean_length": 294.16796875, "completions/mean_terminated_length": 295.32159423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 42.0, "epoch": 0.02666666666666667, "grad_norm": 0.24862326681613922, "learning_rate": 4.861111111111111e-06, "loss": 0.0723, "num_tokens": 6307396.0, "reward": 0.7160451412200928, "reward_std": 0.5090502500534058, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.31730538606643677, "rewards/format_reward_step": 0.8203125, "step": 25 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.9125422205955598e-08, "aux_brier/mean_r": 0.4840311659500003, "aux_brier/n_active_tok": 146.25, "aux_brier/n_step_records": 36.5625, "aux_brier/std_r": 0.2474837265908718, "calib/answer_extract_rate": 0.87890625, "calib/auroc": 0.4972375690607735, "calib/avg_num_step_conf": 4.62109375, "calib/ece": 0.7192286363636364, "calib/final_conf_rate": 0.859375, "calib/format_rate": 0.796875, "calib/frac_conf_gt_0.9": 0.7318181818181818, "calib/gap": 0.03853496245927179, "calib/mean_conf": 0.8965013636363637, "calib/mu_c": 0.9282051282051282, "calib/mu_w": 0.8896701657458564, "calib/nonempty_final_conf_rate": 0.859375, "calib/nonempty_reasoning_rate": 0.953125, "calib/nonempty_step_conf_rate": 0.9140625, "calib/pce": 0.7192286363636364, "calib/std_conf": 0.16700126822587183, "calib/step_conf_rate": 0.9140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2855.0, "completions/max_terminated_length": 2855.0, "completions/mean_length": 304.125, "completions/mean_terminated_length": 306.5196838378906, "completions/min_length": 0.0, "completions/min_terminated_length": 20.0, "epoch": 0.027733333333333332, "grad_norm": 0.8835781812667847, "learning_rate": 4.833333333333333e-06, "loss": 0.0319, "num_tokens": 6490492.0, "reward": 0.6220261454582214, "reward_std": 0.448641836643219, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.2537296414375305, "rewards/format_reward_step": 0.796875, "step": 26 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.601770781457402e-08, "aux_brier/mean_r": 0.5215900270268321, "aux_brier/n_active_tok": 160.375, "aux_brier/n_step_records": 40.09375, "aux_brier/std_r": 0.24954669224098325, "calib/answer_extract_rate": 0.91015625, "calib/auroc": 0.5441506051098162, "calib/avg_num_step_conf": 5.09765625, "calib/ece": 0.6770626388888888, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.859375, "calib/frac_conf_gt_0.9": 0.6625, "calib/gap": 0.03545814283579851, "calib/mean_conf": 0.8687293055555556, "calib/mu_c": 0.897391304347826, "calib/mu_w": 0.8619331615120275, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.9453125, "calib/pce": 0.6770626388888888, "calib/std_conf": 0.18192718188034782, "calib/step_conf_rate": 0.9453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2099.0, "completions/max_terminated_length": 2099.0, "completions/mean_length": 300.140625, "completions/mean_terminated_length": 300.140625, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.0288, "grad_norm": 0.17994718253612518, "learning_rate": 4.805555555555556e-06, "loss": 0.0655, "num_tokens": 6672544.0, "reward": 0.6840643286705017, "reward_std": 0.42421311140060425, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.29875725507736206, "rewards/format_reward_step": 0.859375, "step": 27 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.0590220523318834e-08, "aux_brier/mean_r": 0.500780388712883, "aux_brier/n_active_tok": 147.0, "aux_brier/n_step_records": 36.75, "aux_brier/std_r": 0.24309765687212348, "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.4769244884851821, "calib/avg_num_step_conf": 4.61328125, "calib/ece": 0.6230042372881356, "calib/final_conf_rate": 0.921875, "calib/format_rate": 0.8671875, "calib/frac_conf_gt_0.9": 0.6652542372881356, "calib/gap": -0.005542435085787667, "calib/mean_conf": 0.8834279661016948, "calib/mu_c": 0.8793650793650793, "calib/mu_w": 0.884907514450867, "calib/nonempty_final_conf_rate": 0.921875, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.91796875, "calib/pce": 0.6197415254237287, "calib/std_conf": 0.16116088097575107, "calib/step_conf_rate": 0.91796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2786.0, "completions/max_terminated_length": 2786.0, "completions/mean_length": 290.82421875, "completions/mean_terminated_length": 290.82421875, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "epoch": 0.029866666666666666, "grad_norm": 1.0136277675628662, "learning_rate": 4.777777777777778e-06, "loss": 0.064, "num_tokens": 6853939.0, "reward": 0.771499752998352, "reward_std": 0.4849082827568054, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.35162419080734253, "rewards/format_reward_step": 0.8671875, "step": 28 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.683060218971022e-09, "aux_brier/mean_r": 0.4772613560780883, "aux_brier/n_active_tok": 159.0, "aux_brier/n_step_records": 39.75, "aux_brier/std_r": 0.24786118045449257, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.4210186513629842, "calib/avg_num_step_conf": 5.03515625, "calib/ece": 0.7571507573221756, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.875, "calib/frac_conf_gt_0.9": 0.7364016736401674, "calib/gap": -0.0068561053084648105, "calib/mean_conf": 0.8994101715481172, "calib/mu_c": 0.8935294117647059, "calib/mu_w": 0.9003855170731707, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.7571507573221756, "calib/std_conf": 0.15954339212531934, "calib/step_conf_rate": 0.953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2037.0, "completions/max_terminated_length": 2037.0, "completions/mean_length": 298.56640625, "completions/mean_terminated_length": 298.56640625, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "epoch": 0.030933333333333334, "grad_norm": 0.10229931026697159, "learning_rate": 4.75e-06, "loss": 0.0563, "num_tokens": 7037500.0, "reward": 0.6297242045402527, "reward_std": 0.36027663946151733, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.23764701187610626, "rewards/format_reward_step": 0.875, "step": 29 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.570245974387042e-08, "aux_brier/mean_r": 0.49467434268444777, "aux_brier/n_active_tok": 157.5, "aux_brier/n_step_records": 39.375, "aux_brier/std_r": 0.25863818172365427, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5382694541231127, "calib/avg_num_step_conf": 4.98828125, "calib/ece": 0.7196153846153844, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.7004048582995951, "calib/gap": 0.03159233449477361, "calib/mean_conf": 0.889493927125506, "calib/mu_c": 0.9157142857142857, "calib/mu_w": 0.8841219512195121, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.7195344129554654, "calib/std_conf": 0.17370995581297372, "calib/step_conf_rate": 0.95703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2564.0, "completions/max_terminated_length": 2564.0, "completions/mean_length": 310.22265625, "completions/mean_terminated_length": 310.22265625, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "epoch": 0.032, "grad_norm": 0.9310511350631714, "learning_rate": 4.722222222222222e-06, "loss": 0.0003, "num_tokens": 7223901.0, "reward": 0.695044755935669, "reward_std": 0.37188243865966797, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.2958042025566101, "rewards/format_reward_step": 0.9140625, "step": 30 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.1022978224639601e-09, "aux_brier/mean_r": 0.4644114878028631, "aux_brier/n_active_tok": 163.25, "aux_brier/n_step_records": 40.8125, "aux_brier/std_r": 0.24233023542910814, "calib/answer_extract_rate": 0.93359375, "calib/auroc": 0.49856701940035275, "calib/avg_num_step_conf": 5.10546875, "calib/ece": 0.7009071729957806, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.88671875, "calib/frac_conf_gt_0.9": 0.729957805907173, "calib/gap": 0.03461309523809519, "calib/mean_conf": 0.9034388185654009, "calib/mu_c": 0.9310416666666667, "calib/mu_w": 0.8964285714285715, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.7009071729957806, "calib/std_conf": 0.14707015408666976, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2278.0, "completions/max_terminated_length": 2278.0, "completions/mean_length": 301.1171875, "completions/mean_terminated_length": 301.1171875, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.03306666666666667, "grad_norm": 0.325433611869812, "learning_rate": 4.694444444444445e-06, "loss": 0.038, "num_tokens": 7406899.0, "reward": 0.7081201076507568, "reward_std": 0.3796444237232208, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.29341787099838257, "rewards/format_reward_step": 0.88671875, "step": 31 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.8724363379241105e-09, "aux_brier/mean_r": 0.5441410019993782, "aux_brier/n_active_tok": 166.875, "aux_brier/n_step_records": 41.71875, "aux_brier/std_r": 0.253931765910238, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.44706806282722505, "calib/avg_num_step_conf": 5.30859375, "calib/ece": 0.6712033195020747, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.6224066390041494, "calib/gap": 0.008658638743455405, "calib/mean_conf": 0.8749377593360996, "calib/mu_c": 0.8817999999999999, "calib/mu_w": 0.8731413612565445, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.6693360995850622, "calib/std_conf": 0.1636642660348378, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1950.0, "completions/max_terminated_length": 1950.0, "completions/mean_length": 300.421875, "completions/mean_terminated_length": 301.6000061035156, "completions/min_length": 0.0, "completions/min_terminated_length": 75.0, "epoch": 0.034133333333333335, "grad_norm": 0.19576984643936157, "learning_rate": 4.666666666666667e-06, "loss": 0.0616, "num_tokens": 7590511.0, "reward": 0.7490607500076294, "reward_std": 0.3445599675178528, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.3399929702281952, "rewards/format_reward_step": 0.9296875, "step": 32 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.4268494991375178e-08, "aux_brier/mean_r": 0.5107473731040955, "aux_brier/n_active_tok": 161.625, "aux_brier/n_step_records": 40.40625, "aux_brier/std_r": 0.2583832503296435, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5713531894934334, "calib/avg_num_step_conf": 5.10546875, "calib/ece": 0.6868281124497991, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.6104417670682731, "calib/gap": 0.0499005393996248, "calib/mean_conf": 0.8514867469879518, "calib/mu_c": 0.893170731707317, "calib/mu_w": 0.8432701923076922, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.6868281124497991, "calib/std_conf": 0.20701952303279791, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2055.0, "completions/max_terminated_length": 2055.0, "completions/mean_length": 290.37109375, "completions/mean_terminated_length": 291.50982666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.0352, "grad_norm": 0.48981305956840515, "learning_rate": 4.638888888888889e-06, "loss": 0.0255, "num_tokens": 7771718.0, "reward": 0.7095664739608765, "reward_std": 0.30987101793289185, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.3382660150527954, "rewards/format_reward_step": 0.9296875, "step": 33 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.904765679379029e-08, "aux_brier/mean_r": 0.583881065249443, "aux_brier/n_active_tok": 164.375, "aux_brier/n_step_records": 41.09375, "aux_brier/std_r": 0.25083809066563845, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5058752775721689, "calib/avg_num_step_conf": 5.15625, "calib/ece": 0.6248995983935743, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.5140562248995983, "calib/gap": 0.0061278682457437395, "calib/mean_conf": 0.8393574297188755, "calib/mu_c": 0.8441071428571428, "calib/mu_w": 0.8379792746113991, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.6196787148594378, "calib/std_conf": 0.18964665081059243, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1984.0, "completions/max_terminated_length": 1984.0, "completions/mean_length": 267.8203125, "completions/mean_terminated_length": 267.8203125, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.03626666666666667, "grad_norm": 0.8077874183654785, "learning_rate": 4.611111111111112e-06, "loss": 0.0529, "num_tokens": 7945392.0, "reward": 0.8077145218849182, "reward_std": 0.39487701654434204, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.4027332067489624, "rewards/format_reward_step": 0.96875, "step": 34 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.6753998905171272e-08, "aux_brier/mean_r": 0.5733567727729678, "aux_brier/n_active_tok": 180.125, "aux_brier/n_step_records": 45.03125, "aux_brier/std_r": 0.2474320181645453, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5484252900781434, "calib/avg_num_step_conf": 5.68359375, "calib/ece": 0.6853036437246964, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.5587044534412956, "calib/gap": 0.045822874733601804, "calib/mean_conf": 0.8512955465587045, "calib/mu_c": 0.8895121951219513, "calib/mu_w": 0.8436893203883495, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.6853036437246964, "calib/std_conf": 0.1877760901197293, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2083.0, "completions/max_terminated_length": 2083.0, "completions/mean_length": 306.234375, "completions/mean_terminated_length": 306.234375, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 0.037333333333333336, "grad_norm": 1.0027748346328735, "learning_rate": 4.583333333333333e-06, "loss": 0.0344, "num_tokens": 8133044.0, "reward": 0.7308730483055115, "reward_std": 0.3583698272705078, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.3531796932220459, "rewards/format_reward_step": 0.94921875, "step": 35 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 9.42846162255151e-09, "aux_brier/mean_r": 0.6210600854828954, "aux_brier/n_active_tok": 170.125, "aux_brier/n_step_records": 42.53125, "aux_brier/std_r": 0.24454850843176246, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4746660859465738, "calib/avg_num_step_conf": 5.34765625, "calib/ece": 0.47052000000000005, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.396, "calib/gap": -0.006585365853658431, "calib/mean_conf": 0.7828399999999999, "calib/mu_c": 0.7784146341463415, "calib/mu_w": 0.7849999999999999, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.46268000000000004, "calib/std_conf": 0.21339244222792897, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1896.0, "completions/max_terminated_length": 1896.0, "completions/mean_length": 293.53515625, "completions/mean_terminated_length": 293.53515625, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.0384, "grad_norm": 0.18656954169273376, "learning_rate": 4.555555555555556e-06, "loss": 0.0579, "num_tokens": 8310901.0, "reward": 0.9289214611053467, "reward_std": 0.4526945948600769, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.5047484636306763, "rewards/format_reward_step": 0.96484375, "step": 36 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.624792260624842e-08, "aux_brier/mean_r": 0.6186469784006476, "aux_brier/n_active_tok": 161.125, "aux_brier/n_step_records": 40.28125, "aux_brier/std_r": 0.2561876643449068, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5631727107136943, "calib/avg_num_step_conf": 5.05078125, "calib/ece": 0.5173092369477912, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.39357429718875503, "calib/gap": 0.07312220566318905, "calib/mean_conf": 0.7655020080321285, "calib/mu_c": 0.819242424242424, "calib/mu_w": 0.7461202185792349, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.5088755020080322, "calib/std_conf": 0.24703187646118807, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1983.0, "completions/max_terminated_length": 1983.0, "completions/mean_length": 280.95703125, "completions/mean_terminated_length": 280.95703125, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "epoch": 0.039466666666666664, "grad_norm": 0.3276523947715759, "learning_rate": 4.527777777777778e-06, "loss": 0.0233, "num_tokens": 8489922.0, "reward": 0.8597831726074219, "reward_std": 0.3994288146495819, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.501632809638977, "rewards/format_reward_step": 0.953125, "step": 37 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.8400494062340584e-08, "aux_brier/mean_r": 0.6910385163500905, "aux_brier/n_active_tok": 173.625, "aux_brier/n_step_records": 43.40625, "aux_brier/std_r": 0.235653187148273, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5293839758125471, "calib/avg_num_step_conf": 5.5, "calib/ece": 0.526958, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.284, "calib/gap": 0.020413170823885163, "calib/mean_conf": 0.7199220000000001, "calib/mu_c": 0.735925925925926, "calib/mu_w": 0.7155127551020408, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.51544, "calib/std_conf": 0.25425096836787076, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1329.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 284.35546875, "completions/mean_terminated_length": 284.35546875, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 0.04053333333333333, "grad_norm": 0.22487637400627136, "learning_rate": 4.5e-06, "loss": 0.0591, "num_tokens": 8669605.0, "reward": 0.8187369108200073, "reward_std": 0.3844972848892212, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.5015101432800293, "rewards/format_reward_step": 0.96484375, "step": 38 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.716412101337994e-08, "aux_brier/mean_r": 0.7235443480312824, "aux_brier/n_active_tok": 172.875, "aux_brier/n_step_records": 43.21875, "aux_brier/std_r": 0.2324718649033457, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5164703874381293, "calib/avg_num_step_conf": 5.44921875, "calib/ece": 0.45558232931726916, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.285140562248996, "calib/gap": 0.03605222734254998, "calib/mean_conf": 0.7044979919678714, "calib/mu_c": 0.7314285714285713, "calib/mu_w": 0.6953763440860213, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.4535341365461848, "calib/std_conf": 0.25052044020363534, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1514.0, "completions/max_terminated_length": 1514.0, "completions/mean_length": 287.46484375, "completions/mean_terminated_length": 288.5921630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.0416, "grad_norm": 0.10107416659593582, "learning_rate": 4.472222222222223e-06, "loss": -0.001, "num_tokens": 8849284.0, "reward": 0.8515971899032593, "reward_std": 0.3746635913848877, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.5313886404037476, "rewards/format_reward_step": 0.9453125, "step": 39 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.759384257200459e-08, "aux_brier/mean_r": 0.7821152247488499, "aux_brier/n_active_tok": 168.25, "aux_brier/n_step_records": 42.0625, "aux_brier/std_r": 0.19819560437463224, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.47432950191570883, "calib/avg_num_step_conf": 5.3046875, "calib/ece": 0.4702016129032258, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.1774193548387097, "calib/gap": -0.018691844553913484, "calib/mean_conf": 0.6364112903225807, "calib/mu_c": 0.6211111111111112, "calib/mu_w": 0.6398029556650247, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4625806451612904, "calib/std_conf": 0.26290181265699764, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1478.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 276.796875, "completions/mean_terminated_length": 276.796875, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.042666666666666665, "grad_norm": 0.2239619940519333, "learning_rate": 4.444444444444444e-06, "loss": -0.006, "num_tokens": 9026904.0, "reward": 0.792715311050415, "reward_std": 0.3543362021446228, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.5458613634109497, "rewards/format_reward_step": 0.9609375, "step": 40 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.5530835445763955e-08, "aux_brier/mean_r": 0.796193016692996, "aux_brier/n_active_tok": 164.75, "aux_brier/n_step_records": 41.1875, "aux_brier/std_r": 0.18959426786750555, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5042929793276534, "calib/avg_num_step_conf": 5.16796875, "calib/ece": 0.27724000000000004, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.164, "calib/gap": 0.0055921009180369685, "calib/mean_conf": 0.5824400000000001, "calib/mu_c": 0.5857281553398057, "calib/mu_w": 0.5801360544217687, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.22384000000000004, "calib/std_conf": 0.2798400371640913, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2401.0, "completions/max_terminated_length": 2401.0, "completions/mean_length": 297.73828125, "completions/mean_terminated_length": 297.73828125, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "epoch": 0.04373333333333333, "grad_norm": 0.06125990301370621, "learning_rate": 4.416666666666667e-06, "loss": 0.0813, "num_tokens": 9210373.0, "reward": 1.0402311086654663, "reward_std": 0.43915098905563354, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.6296742558479309, "rewards/format_reward_step": 0.9609375, "step": 41 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.5605052118323783e-08, "aux_brier/mean_r": 0.8279183488339186, "aux_brier/n_active_tok": 162.875, "aux_brier/n_step_records": 40.71875, "aux_brier/std_r": 0.18691047129686922, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5667389906782384, "calib/avg_num_step_conf": 5.1171875, "calib/ece": 0.2761075697211155, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.099601593625498, "calib/gap": 0.06970475731276105, "calib/mean_conf": 0.5225617529880479, "calib/mu_c": 0.5733823529411763, "calib/mu_w": 0.5036775956284153, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26387649402390434, "calib/std_conf": 0.28826297887753355, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1736.0, "completions/max_terminated_length": 1736.0, "completions/mean_length": 245.7890625, "completions/mean_terminated_length": 245.7890625, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "epoch": 0.0448, "grad_norm": 0.2336869090795517, "learning_rate": 4.388888888888889e-06, "loss": -0.0032, "num_tokens": 9377663.0, "reward": 0.920749306678772, "reward_std": 0.3561636507511139, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.6673721075057983, "rewards/format_reward_step": 0.96875, "step": 42 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.07011273342561e-07, "aux_brier/mean_r": 0.8887287676334381, "aux_brier/n_active_tok": 169.75, "aux_brier/n_step_records": 42.4375, "aux_brier/std_r": 0.13937835465185344, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5863016050124453, "calib/avg_num_step_conf": 5.3203125, "calib/ece": 0.25392857142857145, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0873015873015873, "calib/gap": 0.07789631791262563, "calib/mean_conf": 0.469484126984127, "calib/mu_c": 0.5285245901639345, "calib/mu_w": 0.45062827225130886, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.24067460317460315, "calib/std_conf": 0.27795049902235747, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2016.0, "completions/max_terminated_length": 2016.0, "completions/mean_length": 295.72265625, "completions/mean_terminated_length": 295.72265625, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 0.04586666666666667, "grad_norm": 0.09244558215141296, "learning_rate": 4.361111111111112e-06, "loss": 0.0354, "num_tokens": 9558592.0, "reward": 0.9044816493988037, "reward_std": 0.31629693508148193, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.7038640975952148, "rewards/format_reward_step": 0.98046875, "step": 43 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.058977593977218e-08, "aux_brier/mean_r": 0.9013891965150833, "aux_brier/n_active_tok": 182.625, "aux_brier/n_step_records": 45.65625, "aux_brier/std_r": 0.1366663781227544, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.44288488700564965, "calib/avg_num_step_conf": 5.71875, "calib/ece": 0.2836653386454183, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.07569721115537849, "calib/gap": -0.057367584745762756, "calib/mean_conf": 0.42557768924302786, "calib/mu_c": 0.3816949152542373, "calib/mu_w": 0.4390625000000001, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.23709163346613543, "calib/std_conf": 0.2705343041121279, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1592.0, "completions/max_terminated_length": 1592.0, "completions/mean_length": 309.3046875, "completions/mean_terminated_length": 309.3046875, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 0.046933333333333334, "grad_norm": 0.026423679664731026, "learning_rate": 4.333333333333334e-06, "loss": 0.0495, "num_tokens": 9744094.0, "reward": 0.8851389288902283, "reward_std": 0.2901378273963928, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.6733683347702026, "rewards/format_reward_step": 0.97265625, "step": 44 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.3169754138619982e-07, "aux_brier/mean_r": 0.9033907111734152, "aux_brier/n_active_tok": 167.875, "aux_brier/n_step_records": 41.96875, "aux_brier/std_r": 0.13565030810423195, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5077584148961566, "calib/avg_num_step_conf": 5.24609375, "calib/ece": 0.20056451612903223, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.016129032258064516, "calib/gap": -0.0029776398504018364, "calib/mean_conf": 0.3462096774193549, "calib/mu_c": 0.3440845070422536, "calib/mu_w": 0.3470621468926554, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.13024193548387097, "calib/std_conf": 0.25285337055914786, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2129.0, "completions/max_terminated_length": 2129.0, "completions/mean_length": 311.640625, "completions/mean_terminated_length": 311.640625, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 0.048, "grad_norm": 0.056766387075185776, "learning_rate": 4.305555555555556e-06, "loss": 0.0236, "num_tokens": 9928922.0, "reward": 0.9266473054885864, "reward_std": 0.39333993196487427, "rewards/accuracy_reward_step": 0.27734375, "rewards/final_brier_reward_step": 0.6909644603729248, "rewards/format_reward_step": 0.953125, "step": 45 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.73603463794847e-07, "aux_brier/mean_r": 0.9203233141452074, "aux_brier/n_active_tok": 184.625, "aux_brier/n_step_records": 46.15625, "aux_brier/std_r": 0.1193378433235921, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.43169398907103823, "calib/avg_num_step_conf": 5.7890625, "calib/ece": 0.24682142857142858, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.047619047619047616, "calib/gap": -0.06515633167023044, "calib/mean_conf": 0.33992460317460316, "calib/mu_c": 0.29260869565217396, "calib/mu_w": 0.3577650273224044, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.15646825396825398, "calib/std_conf": 0.25365768371132563, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1465.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 312.875, "completions/mean_terminated_length": 312.875, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.04906666666666667, "grad_norm": 0.23408041894435883, "learning_rate": 4.277777777777778e-06, "loss": 0.0456, "num_tokens": 10113786.0, "reward": 0.9338980913162231, "reward_std": 0.30101126432418823, "rewards/accuracy_reward_step": 0.2734375, "rewards/final_brier_reward_step": 0.6887174844741821, "rewards/format_reward_step": 0.9765625, "step": 46 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.192369668561737e-07, "aux_brier/mean_r": 0.9377381801605225, "aux_brier/n_active_tok": 190.5, "aux_brier/n_step_records": 47.625, "aux_brier/std_r": 0.0988645100442227, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5192012908430819, "calib/avg_num_step_conf": 5.984375, "calib/ece": 0.1789285714285714, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.03968253968253968, "calib/gap": 0.014729326341266669, "calib/mean_conf": 0.27575396825396825, "calib/mu_c": 0.2865671641791045, "calib/mu_w": 0.2718378378378378, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.09440476190476191, "calib/std_conf": 0.23050975899555634, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1042.0, "completions/max_terminated_length": 1042.0, "completions/mean_length": 320.44140625, "completions/mean_terminated_length": 321.69805908203125, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.050133333333333335, "grad_norm": 0.15534737706184387, "learning_rate": 4.25e-06, "loss": -0.028, "num_tokens": 10301795.0, "reward": 0.9412600994110107, "reward_std": 0.3168777823448181, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.7416031360626221, "rewards/format_reward_step": 0.98046875, "step": 47 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.0683897054027973e-07, "aux_brier/mean_r": 0.9527103621512651, "aux_brier/n_active_tok": 174.75, "aux_brier/n_step_records": 43.6875, "aux_brier/std_r": 0.07518987158255186, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4866093366093366, "calib/avg_num_step_conf": 5.51171875, "calib/ece": 0.15785099601593625, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.0015961998361998797, "calib/mean_conf": 0.22230836653386454, "calib/mu_c": 0.2234848484848485, "calib/mu_w": 0.22188864864864863, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.05860557768924303, "calib/std_conf": 0.19427346410229035, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2620.0, "completions/max_terminated_length": 2620.0, "completions/mean_length": 295.82421875, "completions/mean_terminated_length": 295.82421875, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 0.0512, "grad_norm": 0.02304792031645775, "learning_rate": 4.222222222222223e-06, "loss": 0.0558, "num_tokens": 10481214.0, "reward": 0.937140703201294, "reward_std": 0.2706897556781769, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.7485626935958862, "rewards/format_reward_step": 0.9765625, "step": 48 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.166361513262661e-07, "aux_brier/mean_r": 0.946187186986208, "aux_brier/n_active_tok": 196.25, "aux_brier/n_step_records": 49.0625, "aux_brier/std_r": 0.08926301234168932, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5330502476999293, "calib/avg_num_step_conf": 6.29296875, "calib/ece": 0.21672064777327932, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.008097165991902834, "calib/gap": 0.030697806086341095, "calib/mean_conf": 0.21137651821862352, "calib/mu_c": 0.2308888888888889, "calib/mu_w": 0.2001910828025478, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03186234817813765, "calib/std_conf": 0.19012601812598928, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2560.0, "completions/max_terminated_length": 2560.0, "completions/mean_length": 336.3984375, "completions/mean_terminated_length": 339.0472412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.05226666666666667, "grad_norm": 0.11247003078460693, "learning_rate": 4.194444444444445e-06, "loss": 0.0271, "num_tokens": 10671868.0, "reward": 1.0026288032531738, "reward_std": 0.32875940203666687, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.6902027130126953, "rewards/format_reward_step": 0.95703125, "step": 49 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 9.116142907217295e-08, "aux_brier/mean_r": 0.9486544150859118, "aux_brier/n_active_tok": 230.875, "aux_brier/n_step_records": 57.71875, "aux_brier/std_r": 0.09179092763224617, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4349324145242512, "calib/avg_num_step_conf": 7.234375, "calib/ece": 0.30401190476190476, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.011904761904761904, "calib/gap": -0.06128107606679034, "calib/mean_conf": 0.2044801587301587, "calib/mu_c": 0.16703061224489796, "calib/mu_w": 0.2283116883116883, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05980158730158729, "calib/std_conf": 0.2061868749074533, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2431.0, "completions/max_terminated_length": 2431.0, "completions/mean_length": 430.69921875, "completions/mean_terminated_length": 430.69921875, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.05333333333333334, "grad_norm": 0.02688698098063469, "learning_rate": 4.166666666666667e-06, "loss": 0.1138, "num_tokens": 10887487.0, "reward": 1.0308661460876465, "reward_std": 0.3271676301956177, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6390897035598755, "rewards/format_reward_step": 0.9765625, "step": 50 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.7028624504922263e-07, "aux_brier/mean_r": 0.961110221222043, "aux_brier/n_active_tok": 204.625, "aux_brier/n_step_records": 51.15625, "aux_brier/std_r": 0.07517423389072064, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5988922841864017, "calib/avg_num_step_conf": 6.45703125, "calib/ece": 0.1960263157894737, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.016194331983805668, "calib/gap": 0.02218292589763174, "calib/mean_conf": 0.17239473684210524, "calib/mu_c": 0.18766233766233764, "calib/mu_w": 0.1654794117647059, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.028340080971659923, "calib/std_conf": 0.17546301689012933, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2552.0, "completions/max_terminated_length": 2552.0, "completions/mean_length": 376.6953125, "completions/mean_terminated_length": 379.6614074707031, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.0544, "grad_norm": 0.17208071053028107, "learning_rate": 4.138888888888889e-06, "loss": 0.0557, "num_tokens": 11093217.0, "reward": 0.9575573801994324, "reward_std": 0.2885648310184479, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.7130419015884399, "rewards/format_reward_step": 0.95703125, "step": 51 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.3009054812314105e-08, "aux_brier/mean_r": 0.9536644276231527, "aux_brier/n_active_tok": 182.625, "aux_brier/n_step_records": 45.65625, "aux_brier/std_r": 0.09595581494795624, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5271350306155334, "calib/avg_num_step_conf": 5.73046875, "calib/ece": 0.31847222222222205, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005066387367064162, "calib/mean_conf": 0.13049603174603178, "calib/mu_c": 0.13341121495327105, "calib/mu_w": 0.12834482758620688, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.012182539682539682, "calib/std_conf": 0.1386136430794062, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2326.0, "completions/max_terminated_length": 2326.0, "completions/mean_length": 358.49609375, "completions/mean_terminated_length": 358.49609375, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.055466666666666664, "grad_norm": 0.20132383704185486, "learning_rate": 4.111111111111111e-06, "loss": 0.0894, "num_tokens": 11292944.0, "reward": 1.0649170875549316, "reward_std": 0.3719336986541748, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.6346686482429504, "rewards/format_reward_step": 0.9765625, "step": 52 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 9.958885474059542e-07, "aux_brier/mean_r": 0.9550592508167028, "aux_brier/n_active_tok": 199.75, "aux_brier/n_step_records": 49.9375, "aux_brier/std_r": 0.09451603806519415, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.505686485120364, "calib/avg_num_step_conf": 6.30078125, "calib/ece": 0.3805555555555554, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.007936507936507936, "calib/gap": -0.020843874391862, "calib/mean_conf": 0.14412698412698416, "calib/mu_c": 0.1331260504201681, "calib/mu_w": 0.1539699248120301, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.02623015873015873, "calib/std_conf": 0.15765589332602928, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3004.0, "completions/max_terminated_length": 3004.0, "completions/mean_length": 398.421875, "completions/mean_terminated_length": 398.421875, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.05653333333333333, "grad_norm": 0.1364300400018692, "learning_rate": 4.083333333333334e-06, "loss": 0.031, "num_tokens": 11500764.0, "reward": 1.101509928703308, "reward_std": 0.29608970880508423, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5935396552085876, "rewards/format_reward_step": 0.9765625, "step": 53 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.238592622967218e-07, "aux_brier/mean_r": 0.960156099870801, "aux_brier/n_active_tok": 184.75, "aux_brier/n_step_records": 46.1875, "aux_brier/std_r": 0.08469618789968081, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4676079734219269, "calib/avg_num_step_conf": 5.7734375, "calib/ece": 0.3886274509803922, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.0037246216315983682, "calib/mean_conf": 0.1116078431372549, "calib/mu_c": 0.11349206349206349, "calib/mu_w": 0.10976744186046512, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003058823529411765, "calib/std_conf": 0.133687384600157, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2505.0, "completions/max_terminated_length": 2505.0, "completions/mean_length": 332.48046875, "completions/mean_terminated_length": 332.48046875, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.0576, "grad_norm": 0.204210564494133, "learning_rate": 4.055555555555556e-06, "loss": 0.019, "num_tokens": 11692111.0, "reward": 1.1365880966186523, "reward_std": 0.3041144013404846, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5854148864746094, "rewards/format_reward_step": 0.99609375, "step": 54 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.125977435096793e-06, "aux_brier/mean_r": 0.9559526536613703, "aux_brier/n_active_tok": 189.625, "aux_brier/n_step_records": 47.40625, "aux_brier/std_r": 0.08679731689335313, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5260857595154519, "calib/avg_num_step_conf": 5.97265625, "calib/ece": 0.2925236220472441, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.013067175992841865, "calib/mean_conf": 0.10905118110236221, "calib/mu_c": 0.10045977011494255, "calib/mu_w": 0.11352694610778441, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.029527559055118113, "calib/std_conf": 0.1356200739828324, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1762.0, "completions/max_terminated_length": 1762.0, "completions/mean_length": 356.78515625, "completions/mean_terminated_length": 358.184326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.058666666666666666, "grad_norm": 0.06138739734888077, "learning_rate": 4.027777777777779e-06, "loss": 0.0092, "num_tokens": 11891272.0, "reward": 1.006003499031067, "reward_std": 0.28058964014053345, "rewards/accuracy_reward_step": 0.33984375, "rewards/final_brier_reward_step": 0.6880766153335571, "rewards/format_reward_step": 0.98828125, "step": 55 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.937277767405826e-07, "aux_brier/mean_r": 0.9601368252187967, "aux_brier/n_active_tok": 198.625, "aux_brier/n_step_records": 49.65625, "aux_brier/std_r": 0.09625902345578652, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5453077995450877, "calib/avg_num_step_conf": 6.23046875, "calib/ece": 0.2386956692913386, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.007739371927507524, "calib/mean_conf": 0.09791850393700786, "calib/mu_c": 0.10331168831168831, "calib/mu_w": 0.09557231638418079, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.016732283464566927, "calib/std_conf": 0.1293169244185342, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2104.0, "completions/max_terminated_length": 2104.0, "completions/mean_length": 389.4453125, "completions/mean_terminated_length": 390.9725646972656, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.05973333333333333, "grad_norm": 0.5584831237792969, "learning_rate": 4.000000000000001e-06, "loss": 0.0456, "num_tokens": 12097810.0, "reward": 0.9787373542785645, "reward_std": 0.24299530684947968, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.727449357509613, "rewards/format_reward_step": 0.9921875, "step": 56 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.7471648068867474e-06, "aux_brier/mean_r": 0.9713607523590326, "aux_brier/n_active_tok": 194.25, "aux_brier/n_step_records": 48.5625, "aux_brier/std_r": 0.0657299466402037, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4891007287870901, "calib/avg_num_step_conf": 6.07421875, "calib/ece": 0.3910441767068273, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.011974882873503373, "calib/mean_conf": 0.07008032128514056, "calib/mu_c": 0.06353982300884957, "calib/mu_w": 0.07551470588235294, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.003654618473895582, "calib/std_conf": 0.09132224583657442, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2615.0, "completions/max_terminated_length": 2615.0, "completions/mean_length": 392.71484375, "completions/mean_terminated_length": 394.2549133300781, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.0608, "grad_norm": 0.08221420645713806, "learning_rate": 3.972222222222223e-06, "loss": 0.0511, "num_tokens": 12305137.0, "reward": 1.0669505596160889, "reward_std": 0.3339952826499939, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.572489857673645, "rewards/format_reward_step": 0.96484375, "step": 57 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.4384147526413926e-06, "aux_brier/mean_r": 0.9715827684849501, "aux_brier/n_active_tok": 227.5, "aux_brier/n_step_records": 56.875, "aux_brier/std_r": 0.0746084921520378, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5481687014428412, "calib/avg_num_step_conf": 7.44921875, "calib/ece": 0.29920855327868856, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00722200480947096, "calib/mean_conf": 0.06152915163934426, "calib/mu_c": 0.06623529411764706, "calib/mu_w": 0.0590132893081761, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006188524590163935, "calib/std_conf": 0.08642366508624892, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2412.0, "completions/max_terminated_length": 2412.0, "completions/mean_length": 473.35546875, "completions/mean_terminated_length": 480.86907958984375, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.06186666666666667, "grad_norm": 0.06282463669776917, "learning_rate": 3.944444444444445e-06, "loss": 0.0398, "num_tokens": 12532636.0, "reward": 0.9721814393997192, "reward_std": 0.30911701917648315, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.654350757598877, "rewards/format_reward_step": 0.953125, "step": 58 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.87842881334577e-06, "aux_brier/mean_r": 0.9620572626590729, "aux_brier/n_active_tok": 194.75, "aux_brier/n_step_records": 48.6875, "aux_brier/std_r": 0.07372519691853086, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4679160125588697, "calib/avg_num_step_conf": 6.1484375, "calib/ece": 0.36913147410358566, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.01195219123505976, "calib/gap": 0.0004322344322344185, "calib/mean_conf": 0.06513147410358565, "calib/mu_c": 0.06538461538461539, "calib/mu_w": 0.06495238095238097, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0099601593625498, "calib/std_conf": 0.1323152799648236, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2243.0, "completions/max_terminated_length": 2243.0, "completions/mean_length": 406.8671875, "completions/mean_terminated_length": 408.4627685546875, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.06293333333333333, "grad_norm": 0.13648149371147156, "learning_rate": 3.916666666666667e-06, "loss": 0.037, "num_tokens": 12743042.0, "reward": 1.0425814390182495, "reward_std": 0.25638192892074585, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.600013256072998, "rewards/format_reward_step": 0.97265625, "step": 59 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.848851273333345e-07, "aux_brier/mean_r": 0.9681909456849098, "aux_brier/n_active_tok": 187.5, "aux_brier/n_step_records": 46.875, "aux_brier/std_r": 0.07947756828070851, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.47326698695950586, "calib/avg_num_step_conf": 5.9140625, "calib/ece": 0.34575502008032133, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": -0.017937062457103622, "calib/mean_conf": 0.05111244979919679, "calib/mu_c": 0.039946808510638296, "calib/mu_w": 0.05788387096774192, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00967871485943775, "calib/std_conf": 0.09965484311687559, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2884.0, "completions/max_terminated_length": 2884.0, "completions/mean_length": 419.39453125, "completions/mean_terminated_length": 421.03924560546875, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.064, "grad_norm": 0.05861761420965195, "learning_rate": 3.88888888888889e-06, "loss": 0.0734, "num_tokens": 12959263.0, "reward": 1.0062377452850342, "reward_std": 0.31737709045410156, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.6187013387680054, "rewards/format_reward_step": 0.96875, "step": 60 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 8.383347991774981e-07, "aux_brier/mean_r": 0.9603624045848846, "aux_brier/n_active_tok": 206.625, "aux_brier/n_step_records": 51.65625, "aux_brier/std_r": 0.08457234721845452, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.46875, "calib/avg_num_step_conf": 6.515625, "calib/ece": 0.4553147410358566, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005514989837398378, "calib/mean_conf": 0.048828685258964145, "calib/mu_c": 0.04601626016260163, "calib/mu_w": 0.05153125000000001, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007051792828685259, "calib/std_conf": 0.08277776212398787, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2914.0, "completions/max_terminated_length": 2914.0, "completions/mean_length": 384.80078125, "completions/mean_terminated_length": 384.80078125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.06506666666666666, "grad_norm": 0.012749969959259033, "learning_rate": 3.861111111111112e-06, "loss": 0.0474, "num_tokens": 13161836.0, "reward": 1.1015645265579224, "reward_std": 0.23647527396678925, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5312580466270447, "rewards/format_reward_step": 0.9765625, "step": 61 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.5184355332850146e-06, "aux_brier/mean_r": 0.9696124047040939, "aux_brier/n_active_tok": 204.5, "aux_brier/n_step_records": 51.125, "aux_brier/std_r": 0.07563763821781322, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5045647093193106, "calib/avg_num_step_conf": 6.578125, "calib/ece": 0.30546153846153845, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008953403447268471, "calib/mean_conf": 0.03721052631578947, "calib/mu_c": 0.04311904761904761, "calib/mu_w": 0.03416564417177914, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0012955465587044534, "calib/std_conf": 0.06996016948448516, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2187.0, "completions/max_terminated_length": 2187.0, "completions/mean_length": 413.3359375, "completions/mean_terminated_length": 419.8968505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.06613333333333334, "grad_norm": 0.593379557132721, "learning_rate": 3.833333333333334e-06, "loss": 0.0366, "num_tokens": 13374730.0, "reward": 0.9733339548110962, "reward_std": 0.308281809091568, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.6511483192443848, "rewards/format_reward_step": 0.95703125, "step": 62 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.1979301490772087e-06, "aux_brier/mean_r": 0.9618304893374443, "aux_brier/n_active_tok": 196.5, "aux_brier/n_step_records": 49.125, "aux_brier/std_r": 0.08810602781704802, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.532903395630429, "calib/avg_num_step_conf": 6.25, "calib/ece": 0.4359109311740891, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0025671229270860796, "calib/mean_conf": 0.0337246963562753, "calib/mu_c": 0.035086206896551723, "calib/mu_w": 0.032519083969465644, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.052133559379460834, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2898.0, "completions/max_terminated_length": 2898.0, "completions/mean_length": 455.8828125, "completions/mean_terminated_length": 459.4724426269531, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.0672, "grad_norm": 0.4723755717277527, "learning_rate": 3.8055555555555556e-06, "loss": 0.1104, "num_tokens": 13600076.0, "reward": 1.068447470664978, "reward_std": 0.33388417959213257, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5394150018692017, "rewards/format_reward_step": 0.9609375, "step": 63 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 8.365504952811964e-06, "aux_brier/mean_r": 0.9629658255726099, "aux_brier/n_active_tok": 196.0, "aux_brier/n_step_records": 49.0, "aux_brier/std_r": 0.06713463319829316, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.54032474503489, "calib/avg_num_step_conf": 6.4765625, "calib/ece": 0.4112642276422764, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0040650406504065045, "calib/gap": -0.004680354267310796, "calib/mean_conf": 0.03832926829268292, "calib/mu_c": 0.0357037037037037, "calib/mu_w": 0.0403840579710145, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005284552845528456, "calib/std_conf": 0.07393686411442102, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2802.0, "completions/max_terminated_length": 2802.0, "completions/mean_length": 435.5234375, "completions/mean_terminated_length": 442.4365234375, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.06826666666666667, "grad_norm": 0.084785595536232, "learning_rate": 3.777777777777778e-06, "loss": 0.0216, "num_tokens": 13815346.0, "reward": 1.0429743528366089, "reward_std": 0.33013078570365906, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.5625226497650146, "rewards/format_reward_step": 0.9609375, "step": 64 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.2754387730179495e-06, "aux_brier/mean_r": 0.9693706203252077, "aux_brier/n_active_tok": 177.625, "aux_brier/n_step_records": 44.40625, "aux_brier/std_r": 0.06981034247201023, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5354100445009535, "calib/avg_num_step_conf": 5.56640625, "calib/ece": 0.41743873517786556, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005405594405594405, "calib/mean_conf": 0.030782608695652174, "calib/mu_c": 0.027727272727272725, "calib/mu_w": 0.03313286713286713, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.006719367588932806, "calib/std_conf": 0.0697802605099112, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2557.0, "completions/max_terminated_length": 2557.0, "completions/mean_length": 369.671875, "completions/mean_terminated_length": 369.671875, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.06933333333333333, "grad_norm": 0.028543710708618164, "learning_rate": 3.7500000000000005e-06, "loss": 0.0658, "num_tokens": 14015006.0, "reward": 1.0593637228012085, "reward_std": 0.24863088130950928, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5655798316001892, "rewards/format_reward_step": 0.9765625, "step": 65 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.1284482016326045e-06, "aux_brier/mean_r": 0.9776174630969763, "aux_brier/n_active_tok": 205.875, "aux_brier/n_step_records": 51.46875, "aux_brier/std_r": 0.06463263387195184, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4761224489795918, "calib/avg_num_step_conf": 6.453125, "calib/ece": 0.36782258064516127, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004032258064516129, "calib/gap": -0.010451700680272118, "calib/mean_conf": 0.03540322580645162, "calib/mu_c": 0.02908163265306122, "calib/mu_w": 0.03953333333333334, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.004032258064516129, "calib/std_conf": 0.07831799522900817, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2626.0, "completions/max_terminated_length": 2626.0, "completions/mean_length": 463.76171875, "completions/mean_terminated_length": 465.5804138183594, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.0704, "grad_norm": 0.3943735957145691, "learning_rate": 3.7222222222222225e-06, "loss": 0.0598, "num_tokens": 14240081.0, "reward": 1.0145626068115234, "reward_std": 0.2606889605522156, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.5973129272460938, "rewards/format_reward_step": 0.96484375, "step": 66 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.8116742162298536e-06, "aux_brier/mean_r": 0.96447335742414, "aux_brier/n_active_tok": 174.75, "aux_brier/n_step_records": 43.6875, "aux_brier/std_r": 0.08723837620618724, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4643442622950819, "calib/avg_num_step_conf": 5.5546875, "calib/ece": 0.4696746031746032, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": -0.015695081967213104, "calib/mean_conf": 0.030801587301587302, "calib/mu_c": 0.022704918032786885, "calib/mu_w": 0.03839999999999999, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.008174603174603175, "calib/std_conf": 0.07869218914480082, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2463.0, "completions/max_terminated_length": 2463.0, "completions/mean_length": 422.4375, "completions/mean_terminated_length": 424.0941467285156, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.07146666666666666, "grad_norm": 0.1314437985420227, "learning_rate": 3.694444444444445e-06, "loss": 0.0244, "num_tokens": 14453233.0, "reward": 1.0993558168411255, "reward_std": 0.247754767537117, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.522423505783081, "rewards/format_reward_step": 0.984375, "step": 67 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.061379815318734e-06, "aux_brier/mean_r": 0.980652455240488, "aux_brier/n_active_tok": 191.0, "aux_brier/n_step_records": 47.75, "aux_brier/std_r": 0.04698441309847112, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.44688311688311694, "calib/avg_num_step_conf": 6.03125, "calib/ece": 0.42776, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": -0.012435064935064936, "calib/mean_conf": 0.0216, "calib/mu_c": 0.014636363636363637, "calib/mu_w": 0.027071428571428573, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00468, "calib/std_conf": 0.06551824173464975, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2527.0, "completions/max_terminated_length": 2527.0, "completions/mean_length": 428.94921875, "completions/mean_terminated_length": 428.94921875, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.07253333333333334, "grad_norm": 0.13681000471115112, "learning_rate": 3.6666666666666666e-06, "loss": 0.1456, "num_tokens": 14667132.0, "reward": 1.0546975135803223, "reward_std": 0.2470514327287674, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5547277331352234, "rewards/format_reward_step": 0.97265625, "step": 68 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.1646586658985445e-06, "aux_brier/mean_r": 0.9609247855842113, "aux_brier/n_active_tok": 178.125, "aux_brier/n_step_records": 44.53125, "aux_brier/std_r": 0.09590224422208848, "calib/answer_extract_rate": 0.9296875, "calib/auroc": 0.49823047998230474, "calib/avg_num_step_conf": 5.7421875, "calib/ece": 0.40915254237288134, "calib/final_conf_rate": 0.921875, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.008397847083978466, "calib/mean_conf": 0.02144067796610169, "calib/mu_c": 0.016565656565656568, "calib/mu_w": 0.024963503649635035, "calib/nonempty_final_conf_rate": 0.921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.005550847457627118, "calib/std_conf": 0.05749392633697977, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2986.0, "completions/max_terminated_length": 2986.0, "completions/mean_length": 490.15625, "completions/mean_terminated_length": 503.93572998046875, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.0736, "grad_norm": 0.20296375453472137, "learning_rate": 3.638888888888889e-06, "loss": 0.0887, "num_tokens": 14897108.0, "reward": 0.9813294410705566, "reward_std": 0.3530961871147156, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.5425054430961609, "rewards/format_reward_step": 0.91796875, "step": 69 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.3152611430755634e-05, "aux_brier/mean_r": 0.9708932712674141, "aux_brier/n_active_tok": 195.25, "aux_brier/n_step_records": 48.8125, "aux_brier/std_r": 0.0706544549825594, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5127907769970667, "calib/avg_num_step_conf": 6.12890625, "calib/ece": 0.42109180327868856, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0014895422607271967, "calib/mean_conf": 0.019154098360655736, "calib/mu_c": 0.018317757009345795, "calib/mu_w": 0.019807299270072992, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0008606557377049181, "calib/std_conf": 0.02696463727794811, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3036.0, "completions/max_terminated_length": 3036.0, "completions/mean_length": 474.51171875, "completions/mean_terminated_length": 478.2480163574219, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.07466666666666667, "grad_norm": 0.015685321763157845, "learning_rate": 3.6111111111111115e-06, "loss": 0.0665, "num_tokens": 15125575.0, "reward": 1.0289678573608398, "reward_std": 0.2912682294845581, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.5455588698387146, "rewards/format_reward_step": 0.94921875, "step": 70 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.660947077627208e-06, "aux_brier/mean_r": 0.9693093933165073, "aux_brier/n_active_tok": 217.5, "aux_brier/n_step_records": 54.375, "aux_brier/std_r": 0.06705048960066051, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5066202566202567, "calib/avg_num_step_conf": 7.0625, "calib/ece": 0.38995951417004054, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.004048582995951417, "calib/gap": -0.010331695331695333, "calib/mean_conf": 0.024372469635627527, "calib/mu_c": 0.018181818181818184, "calib/mu_w": 0.028513513513513517, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006761133603238867, "calib/std_conf": 0.07421049318498833, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2672.0, "completions/max_terminated_length": 2672.0, "completions/mean_length": 486.51953125, "completions/mean_terminated_length": 490.35040283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.07573333333333333, "grad_norm": 0.031791944056749344, "learning_rate": 3.5833333333333335e-06, "loss": 0.0882, "num_tokens": 15354532.0, "reward": 1.0098564624786377, "reward_std": 0.3248251974582672, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.5784882307052612, "rewards/format_reward_step": 0.95703125, "step": 71 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.385538210944674e-07, "aux_brier/mean_r": 0.9742661193013191, "aux_brier/n_active_tok": 200.625, "aux_brier/n_step_records": 50.15625, "aux_brier/std_r": 0.0674651956433081, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4851190476190476, "calib/avg_num_step_conf": 6.31640625, "calib/ece": 0.4344763779527559, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0055828320802005, "calib/mean_conf": 0.018515748031496068, "calib/mu_c": 0.015438596491228073, "calib/mu_w": 0.021021428571428573, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0020866141732283467, "calib/std_conf": 0.040990177285409095, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1937.0, "completions/max_terminated_length": 1937.0, "completions/mean_length": 417.8984375, "completions/mean_terminated_length": 417.8984375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.0768, "grad_norm": 0.055013444274663925, "learning_rate": 3.555555555555556e-06, "loss": 0.0454, "num_tokens": 15565922.0, "reward": 1.0758342742919922, "reward_std": 0.25904011726379395, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5533369183540344, "rewards/format_reward_step": 0.984375, "step": 72 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.074364908360217e-06, "aux_brier/mean_r": 0.9756907764822245, "aux_brier/n_active_tok": 203.25, "aux_brier/n_step_records": 50.8125, "aux_brier/std_r": 0.06860575730752316, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5050731707317073, "calib/avg_num_step_conf": 6.37890625, "calib/ece": 0.485866935483871, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0004139837398373959, "calib/mean_conf": 0.018165322580645165, "calib/mu_c": 0.01796, "calib/mu_w": 0.018373983739837396, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02829782718307253, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2450.0, "completions/max_terminated_length": 2450.0, "completions/mean_length": 467.484375, "completions/mean_terminated_length": 467.484375, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.07786666666666667, "grad_norm": 0.01597970351576805, "learning_rate": 3.5277777777777784e-06, "loss": 0.0943, "num_tokens": 15792630.0, "reward": 1.0968843698501587, "reward_std": 0.2916446328163147, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.49691241979599, "rewards/format_reward_step": 0.96875, "step": 73 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.9740423112257766e-06, "aux_brier/mean_r": 0.9756334722042084, "aux_brier/n_active_tok": 204.125, "aux_brier/n_step_records": 51.03125, "aux_brier/std_r": 0.0775755551783277, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48560000000000003, "calib/avg_num_step_conf": 6.3984375, "calib/ece": 0.38628, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004733333333333329, "calib/mean_conf": 0.021240000000000002, "calib/mu_c": 0.018400000000000003, "calib/mu_w": 0.023133333333333332, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00376, "calib/std_conf": 0.053736974235622896, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2635.0, "completions/max_terminated_length": 2635.0, "completions/mean_length": 428.98046875, "completions/mean_terminated_length": 434.06719970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.07893333333333333, "grad_norm": 0.038290224969387054, "learning_rate": 3.5e-06, "loss": 0.0614, "num_tokens": 16006377.0, "reward": 1.0281691551208496, "reward_std": 0.27312523126602173, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.5970519185066223, "rewards/format_reward_step": 0.9765625, "step": 74 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.118100940658011e-06, "aux_brier/mean_r": 0.9475213903933764, "aux_brier/n_active_tok": 199.75, "aux_brier/n_step_records": 49.9375, "aux_brier/std_r": 0.11736969361845695, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4985180590019299, "calib/avg_num_step_conf": 6.26171875, "calib/ece": 0.6072289156626507, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0026985111662531022, "calib/mean_conf": 0.01927710843373494, "calib/mu_c": 0.01826923076923077, "calib/mu_w": 0.020967741935483872, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04188672233661531, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2719.0, "completions/max_terminated_length": 2719.0, "completions/mean_length": 417.76171875, "completions/mean_terminated_length": 417.76171875, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.08, "grad_norm": 0.015792354941368103, "learning_rate": 3.4722222222222224e-06, "loss": 0.048, "num_tokens": 16218076.0, "reward": 1.191572904586792, "reward_std": 0.27867376804351807, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.38347890973091125, "rewards/format_reward_step": 0.97265625, "step": 75 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.665835495520909e-06, "aux_brier/mean_r": 0.9634039793163538, "aux_brier/n_active_tok": 205.875, "aux_brier/n_step_records": 51.46875, "aux_brier/std_r": 0.08791857168216666, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.4433262711864407, "calib/avg_num_step_conf": 6.43359375, "calib/ece": 0.4641056910569105, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.008158103813559319, "calib/mean_conf": 0.018821138211382118, "calib/mu_c": 0.014576271186440682, "calib/mu_w": 0.022734375, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0016260162601626018, "calib/std_conf": 0.03622188453172582, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2531.0, "completions/max_terminated_length": 2531.0, "completions/mean_length": 470.32421875, "completions/mean_terminated_length": 474.0275573730469, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.08106666666666666, "grad_norm": 0.13059166073799133, "learning_rate": 3.444444444444445e-06, "loss": 0.13, "num_tokens": 16441535.0, "reward": 1.0665919780731201, "reward_std": 0.2876865863800049, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5085550546646118, "rewards/format_reward_step": 0.95703125, "step": 76 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.768571664484858e-06, "aux_brier/mean_r": 0.9557140190154314, "aux_brier/n_active_tok": 210.0, "aux_brier/n_step_records": 52.5, "aux_brier/std_r": 0.09832711402032146, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.47872410175714664, "calib/avg_num_step_conf": 6.73046875, "calib/ece": 0.48611336032388663, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005589430894308949, "calib/mean_conf": 0.0202834008097166, "calib/mu_c": 0.017499999999999995, "calib/mu_w": 0.023089430894308944, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0021862348178137655, "calib/std_conf": 0.039309843723114794, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3008.0, "completions/max_terminated_length": 3008.0, "completions/mean_length": 438.40625, "completions/mean_terminated_length": 443.6047668457031, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.08213333333333334, "grad_norm": 0.21531686186790466, "learning_rate": 3.416666666666667e-06, "loss": 0.0249, "num_tokens": 16658431.0, "reward": 1.0906803607940674, "reward_std": 0.3182448744773865, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.49553394317626953, "rewards/format_reward_step": 0.96484375, "step": 77 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.6879490613862522e-06, "aux_brier/mean_r": 0.9649478234350681, "aux_brier/n_active_tok": 202.875, "aux_brier/n_step_records": 50.71875, "aux_brier/std_r": 0.09124442109714437, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4816045807555614, "calib/avg_num_step_conf": 6.33984375, "calib/ece": 0.412289156626506, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001884296432802423, "calib/mean_conf": 0.017429718875502008, "calib/mu_c": 0.01635514018691589, "calib/mu_w": 0.018239436619718313, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02097570023438015, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3041.0, "completions/max_terminated_length": 3041.0, "completions/mean_length": 474.3203125, "completions/mean_terminated_length": 479.9446716308594, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.0832, "grad_norm": 0.04863213747739792, "learning_rate": 3.3888888888888893e-06, "loss": 0.0253, "num_tokens": 16887881.0, "reward": 1.0462058782577515, "reward_std": 0.2661869525909424, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.5676359534263611, "rewards/format_reward_step": 0.97265625, "step": 78 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.1245986737734626e-06, "aux_brier/mean_r": 0.9615942277014256, "aux_brier/n_active_tok": 215.0, "aux_brier/n_step_records": 53.75, "aux_brier/std_r": 0.07969562334346847, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5287981859410431, "calib/avg_num_step_conf": 6.734375, "calib/ece": 0.568452380952381, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": -0.007768707482993201, "calib/mean_conf": 0.02670634920634921, "calib/mu_c": 0.02346938775510204, "calib/mu_w": 0.031238095238095242, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005912698412698413, "calib/std_conf": 0.07395747034617287, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2119.0, "completions/max_terminated_length": 2119.0, "completions/mean_length": 454.57421875, "completions/mean_terminated_length": 458.1535339355469, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.08426666666666667, "grad_norm": 0.0486554317176342, "learning_rate": 3.3611111111111117e-06, "loss": 0.0315, "num_tokens": 17110628.0, "reward": 1.1722087860107422, "reward_std": 0.2507716715335846, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.4310230612754822, "rewards/format_reward_step": 0.98046875, "step": 79 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.122955576458474e-06, "aux_brier/mean_r": 0.9465570580214262, "aux_brier/n_active_tok": 220.625, "aux_brier/n_step_records": 55.15625, "aux_brier/std_r": 0.11152001099799236, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5113501776549546, "calib/avg_num_step_conf": 6.9296875, "calib/ece": 0.5724541832669323, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008170811948940704, "calib/mean_conf": 0.021171314741035858, "calib/mu_c": 0.02150335570469799, "calib/mu_w": 0.02068627450980392, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.023020139585986626, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2556.0, "completions/max_terminated_length": 2556.0, "completions/mean_length": 429.8125, "completions/mean_terminated_length": 433.19683837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.08533333333333333, "grad_norm": 0.1881030946969986, "learning_rate": 3.3333333333333333e-06, "loss": 0.0251, "num_tokens": 17322820.0, "reward": 1.173011064529419, "reward_std": 0.2777586281299591, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.4186069667339325, "rewards/format_reward_step": 0.97265625, "step": 80 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.5029714336443085e-06, "aux_brier/mean_r": 0.9561555609107018, "aux_brier/n_active_tok": 224.25, "aux_brier/n_step_records": 56.0625, "aux_brier/std_r": 0.1031700312596513, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.4685198946159562, "calib/avg_num_step_conf": 7.140625, "calib/ece": 0.5130737704918032, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0039444707153955295, "calib/mean_conf": 0.023811475409836066, "calib/mu_c": 0.02198473282442748, "calib/mu_w": 0.02592920353982301, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.022557333861472195, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2848.0, "completions/max_terminated_length": 2848.0, "completions/mean_length": 472.19140625, "completions/mean_terminated_length": 479.6865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.0864, "grad_norm": 0.4952904284000397, "learning_rate": 3.3055555555555558e-06, "loss": 0.0761, "num_tokens": 17549949.0, "reward": 1.104001522064209, "reward_std": 0.2605801224708557, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.46288084983825684, "rewards/format_reward_step": 0.953125, "step": 81 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.9720755916718957e-05, "aux_brier/mean_r": 0.9714564047753811, "aux_brier/n_active_tok": 191.125, "aux_brier/n_step_records": 47.78125, "aux_brier/std_r": 0.06360979704913916, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5121765096217651, "calib/avg_num_step_conf": 6.1015625, "calib/ece": 0.5317813765182187, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006751824817518327, "calib/mean_conf": 0.022874493927125508, "calib/mu_c": 0.023175182481751825, "calib/mu_w": 0.022499999999999992, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.021027176295511332, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3020.0, "completions/max_terminated_length": 3020.0, "completions/mean_length": 428.13671875, "completions/mean_terminated_length": 431.50787353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.08746666666666666, "grad_norm": 0.15230073034763336, "learning_rate": 3.277777777777778e-06, "loss": 0.0657, "num_tokens": 17765104.0, "reward": 1.1348745822906494, "reward_std": 0.283991277217865, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.4535607695579529, "rewards/format_reward_step": 0.96484375, "step": 82 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.247837684585252e-06, "aux_brier/mean_r": 0.9476204346865416, "aux_brier/n_active_tok": 225.875, "aux_brier/n_step_records": 56.46875, "aux_brier/std_r": 0.10184100706737809, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.47741402116402115, "calib/avg_num_step_conf": 7.34765625, "calib/ece": 0.4845528455284553, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.003908730158730167, "calib/mean_conf": 0.030081300813008128, "calib/mu_c": 0.028174603174603165, "calib/mu_w": 0.03208333333333333, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0012195121951219512, "calib/std_conf": 0.03064339701178227, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2680.0, "completions/max_terminated_length": 2680.0, "completions/mean_length": 466.0859375, "completions/mean_terminated_length": 475.37054443359375, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.08853333333333334, "grad_norm": 0.031010059639811516, "learning_rate": 3.2500000000000002e-06, "loss": 0.0327, "num_tokens": 17991686.0, "reward": 1.093408226966858, "reward_std": 0.2682487368583679, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.4908202886581421, "rewards/format_reward_step": 0.95703125, "step": 83 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -9.938589001223797e-07, "aux_brier/mean_r": 0.9530369993299246, "aux_brier/n_active_tok": 200.25, "aux_brier/n_step_records": 50.0625, "aux_brier/std_r": 0.09934906920716458, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.45075270200720535, "calib/avg_num_step_conf": 6.2890625, "calib/ece": 0.501596, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": 0.004099073597529604, "calib/mean_conf": 0.034404000000000004, "calib/mu_c": 0.03630597014925374, "calib/mu_w": 0.03220689655172414, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.06647568566024724, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2984.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 435.32421875, "completions/mean_terminated_length": 437.0314025878906, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.0896, "grad_norm": 0.20255745947360992, "learning_rate": 3.2222222222222227e-06, "loss": 0.0305, "num_tokens": 18209049.0, "reward": 1.133134126663208, "reward_std": 0.23849549889564514, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.48566144704818726, "rewards/format_reward_step": 0.9765625, "step": 84 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.3112116958591269e-06, "aux_brier/mean_r": 0.9549486991018057, "aux_brier/n_active_tok": 202.375, "aux_brier/n_step_records": 50.59375, "aux_brier/std_r": 0.0979152055697341, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5352040489461208, "calib/avg_num_step_conf": 6.359375, "calib/ece": 0.4508999999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003602088538663596, "calib/mean_conf": 0.033100000000000004, "calib/mu_c": 0.034958677685950415, "calib/mu_w": 0.03135658914728682, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.025816467612746714, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2836.0, "completions/max_terminated_length": 2836.0, "completions/mean_length": 479.80859375, "completions/mean_terminated_length": 481.6902160644531, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.09066666666666667, "grad_norm": 0.20410680770874023, "learning_rate": 3.1944444444444443e-06, "loss": 0.07, "num_tokens": 18439704.0, "reward": 1.0888876914978027, "reward_std": 0.2575167715549469, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5274260640144348, "rewards/format_reward_step": 0.96875, "step": 85 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.420407020173545e-07, "aux_brier/mean_r": 0.9533153977245092, "aux_brier/n_active_tok": 201.75, "aux_brier/n_step_records": 50.4375, "aux_brier/std_r": 0.09625982833676971, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5537828426717315, "calib/avg_num_step_conf": 6.37109375, "calib/ece": 0.4328730158730159, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0016501424501424514, "calib/mean_conf": 0.03784126984126985, "calib/mu_c": 0.03695726495726495, "calib/mu_w": 0.0386074074074074, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0032142857142857142, "calib/std_conf": 0.05499847222963427, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2693.0, "completions/max_terminated_length": 2693.0, "completions/mean_length": 435.80078125, "completions/mean_terminated_length": 440.9683837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.09173333333333333, "grad_norm": 0.19507144391536713, "learning_rate": 3.1666666666666667e-06, "loss": 0.0046, "num_tokens": 18656781.0, "reward": 1.0854783058166504, "reward_std": 0.24163693189620972, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5528507232666016, "rewards/format_reward_step": 0.98046875, "step": 86 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -8.30398129769705e-06, "aux_brier/mean_r": 0.9406522922217846, "aux_brier/n_active_tok": 178.5, "aux_brier/n_step_records": 44.625, "aux_brier/std_r": 0.11897473732096842, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5404861111111111, "calib/avg_num_step_conf": 5.62890625, "calib/ece": 0.6040599999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0038125000000000034, "calib/mean_conf": 0.03594000000000001, "calib/mu_c": 0.0373125, "calib/mu_w": 0.033499999999999995, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.024446193977795398, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2861.0, "completions/max_terminated_length": 2861.0, "completions/mean_length": 408.0859375, "completions/mean_terminated_length": 411.2992248535156, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.0928, "grad_norm": 0.35734379291534424, "learning_rate": 3.138888888888889e-06, "loss": 0.0698, "num_tokens": 18866747.0, "reward": 1.216277003288269, "reward_std": 0.2729073166847229, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.3963581323623657, "rewards/format_reward_step": 0.9765625, "step": 87 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.586885609306977e-06, "aux_brier/mean_r": 0.958948154002428, "aux_brier/n_active_tok": 186.5, "aux_brier/n_step_records": 46.625, "aux_brier/std_r": 0.09168809103903186, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5831846565566458, "calib/avg_num_step_conf": 5.9375, "calib/ece": 0.426390438247012, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009595514209251955, "calib/mean_conf": 0.04372908366533864, "calib/mu_c": 0.048813559322033906, "calib/mu_w": 0.03921804511278195, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.027815955873425644, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2427.0, "completions/max_terminated_length": 2427.0, "completions/mean_length": 440.56640625, "completions/mean_terminated_length": 444.0354309082031, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.09386666666666667, "grad_norm": 0.03470201417803764, "learning_rate": 3.1111111111111116e-06, "loss": 0.0571, "num_tokens": 19089380.0, "reward": 1.0857967138290405, "reward_std": 0.2128307819366455, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5541242957115173, "rewards/format_reward_step": 0.97265625, "step": 88 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.0660022709512744e-07, "aux_brier/mean_r": 0.9549915064126253, "aux_brier/n_active_tok": 195.25, "aux_brier/n_step_records": 48.8125, "aux_brier/std_r": 0.10538984932281892, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48639942734430924, "calib/avg_num_step_conf": 6.4921875, "calib/ece": 0.4424931451612903, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00365990108674432, "calib/mean_conf": 0.04702298387096775, "calib/mu_c": 0.04514876033057852, "calib/mu_w": 0.04880866141732284, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0008064516129032258, "calib/std_conf": 0.02516101046357493, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2154.0, "completions/max_terminated_length": 2154.0, "completions/mean_length": 447.8359375, "completions/mean_terminated_length": 451.3622131347656, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.09493333333333333, "grad_norm": 0.03613610938191414, "learning_rate": 3.0833333333333336e-06, "loss": 0.0916, "num_tokens": 19312914.0, "reward": 1.0920158624649048, "reward_std": 0.27613985538482666, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5321258902549744, "rewards/format_reward_step": 0.96484375, "step": 89 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.1211321207030278e-06, "aux_brier/mean_r": 0.9305798541754484, "aux_brier/n_active_tok": 225.5, "aux_brier/n_step_records": 56.375, "aux_brier/std_r": 0.12866863430645026, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5555231805231805, "calib/avg_num_step_conf": 7.27734375, "calib/ece": 0.48307630522088363, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001032634032634025, "calib/mean_conf": 0.051060240963855426, "calib/mu_c": 0.05154545454545454, "calib/mu_w": 0.050512820512820515, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002008032128514056, "calib/std_conf": 0.03211526031847574, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2640.0, "completions/max_terminated_length": 2640.0, "completions/mean_length": 446.48828125, "completions/mean_terminated_length": 450.0039367675781, "completions/min_length": 0.0, "completions/min_terminated_length": 129.0, "epoch": 0.096, "grad_norm": 0.04400340095162392, "learning_rate": 3.055555555555556e-06, "loss": 0.0506, "num_tokens": 19530535.0, "reward": 1.1325215101242065, "reward_std": 0.23195365071296692, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5066484808921814, "rewards/format_reward_step": 0.97265625, "step": 90 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.524275157789191e-07, "aux_brier/mean_r": 0.9710229635238647, "aux_brier/n_active_tok": 211.875, "aux_brier/n_step_records": 52.96875, "aux_brier/std_r": 0.07653339493845124, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5369927629878521, "calib/avg_num_step_conf": 6.75, "calib/ece": 0.5215674603174604, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0021583096407340255, "calib/mean_conf": 0.057797619047619056, "calib/mu_c": 0.05870547945205478, "calib/mu_w": 0.056547169811320755, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.02801889091843786, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2377.0, "completions/max_terminated_length": 2377.0, "completions/mean_length": 455.1796875, "completions/mean_terminated_length": 456.9647216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 177.0, "epoch": 0.09706666666666666, "grad_norm": 0.10957203805446625, "learning_rate": 3.0277777777777776e-06, "loss": 0.0056, "num_tokens": 19754773.0, "reward": 1.1797873973846436, "reward_std": 0.24657493829727173, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.4769623279571533, "rewards/format_reward_step": 0.98046875, "step": 91 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.0264232747081437e-06, "aux_brier/mean_r": 0.9800701979547739, "aux_brier/n_active_tok": 195.625, "aux_brier/n_step_records": 48.90625, "aux_brier/std_r": 0.05496317755387281, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4971723011185119, "calib/avg_num_step_conf": 6.15625, "calib/ece": 0.5177254901960784, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 2.796280005027546e-05, "calib/mean_conf": 0.05639215686274511, "calib/mu_c": 0.056404109589041095, "calib/mu_w": 0.05637614678899082, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0007843137254901962, "calib/std_conf": 0.031105814620277675, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1535.0, "completions/max_terminated_length": 1535.0, "completions/mean_length": 403.2578125, "completions/mean_terminated_length": 403.2578125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.09813333333333334, "grad_norm": 0.0068860137835145, "learning_rate": 3e-06, "loss": 0.0433, "num_tokens": 19964727.0, "reward": 1.1898558139801025, "reward_std": 0.22652819752693176, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.48598572611808777, "rewards/format_reward_step": 0.99609375, "step": 92 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.3416964775878633e-06, "aux_brier/mean_r": 0.9667877722531557, "aux_brier/n_active_tok": 223.5, "aux_brier/n_step_records": 55.875, "aux_brier/std_r": 0.08430354389565764, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.43957713293650796, "calib/avg_num_step_conf": 7.0703125, "calib/ece": 0.42933070866141726, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.011117311507936503, "calib/mean_conf": 0.0690944881889764, "calib/mu_c": 0.0634920634920635, "calib/mu_w": 0.074609375, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0011811023622047244, "calib/std_conf": 0.04065854222957474, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2462.0, "completions/max_terminated_length": 2462.0, "completions/mean_length": 446.9296875, "completions/mean_terminated_length": 448.682373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.0992, "grad_norm": 0.024433419108390808, "learning_rate": 2.9722222222222225e-06, "loss": -0.0094, "num_tokens": 20184917.0, "reward": 1.1273119449615479, "reward_std": 0.2513663172721863, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5561230182647705, "rewards/format_reward_step": 0.9921875, "step": 93 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.637672211343924e-06, "aux_brier/mean_r": 0.9576106257736683, "aux_brier/n_active_tok": 188.5, "aux_brier/n_step_records": 47.125, "aux_brier/std_r": 0.08591984666600183, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5248251748251748, "calib/avg_num_step_conf": 5.89453125, "calib/ece": 0.4592231075697212, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014259376986649475, "calib/mean_conf": 0.06069721115537849, "calib/mu_c": 0.06138461538461538, "calib/mu_w": 0.05995867768595043, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00099601593625498, "calib/std_conf": 0.02886311891768817, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2821.0, "completions/max_terminated_length": 2821.0, "completions/mean_length": 418.40625, "completions/mean_terminated_length": 421.7007751464844, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.10026666666666667, "grad_norm": 0.027087682858109474, "learning_rate": 2.944444444444445e-06, "loss": 0.0689, "num_tokens": 20400709.0, "reward": 1.1306896209716797, "reward_std": 0.1795433759689331, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5305709838867188, "rewards/format_reward_step": 0.98046875, "step": 94 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.1341438536104373e-06, "aux_brier/mean_r": 0.9487221036106348, "aux_brier/n_active_tok": 201.75, "aux_brier/n_step_records": 50.4375, "aux_brier/std_r": 0.12240388198188157, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5189329863370202, "calib/avg_num_step_conf": 6.36328125, "calib/ece": 0.5137051792828684, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0009847104749511892, "calib/mean_conf": 0.06844621513944223, "calib/mu_c": 0.06886206896551723, "calib/mu_w": 0.06787735849056604, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0022310756972111555, "calib/std_conf": 0.03630416129191106, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2231.0, "completions/max_terminated_length": 2231.0, "completions/mean_length": 434.8671875, "completions/mean_terminated_length": 436.57257080078125, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.10133333333333333, "grad_norm": 0.07396937161684036, "learning_rate": 2.916666666666667e-06, "loss": 0.0509, "num_tokens": 20618163.0, "reward": 1.1752595901489258, "reward_std": 0.28123950958251953, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.4822881817817688, "rewards/format_reward_step": 0.9765625, "step": 95 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.7296195456716354e-08, "aux_brier/mean_r": 0.9515257924795151, "aux_brier/n_active_tok": 198.5, "aux_brier/n_step_records": 49.625, "aux_brier/std_r": 0.09161369614230352, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5040392785012884, "calib/avg_num_step_conf": 6.30078125, "calib/ece": 0.6062304687500001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011181140747962975, "calib/mean_conf": 0.06955078125, "calib/mu_c": 0.06991329479768786, "calib/mu_w": 0.06879518072289156, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.035771264427814795, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1379.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 394.8515625, "completions/mean_terminated_length": 396.4000244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.1024, "grad_norm": 0.1715133637189865, "learning_rate": 2.888888888888889e-06, "loss": -0.022, "num_tokens": 20825061.0, "reward": 1.2789297103881836, "reward_std": 0.2132999300956726, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.4125940203666687, "rewards/format_reward_step": 1.0, "step": 96 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.5436490251329325e-06, "aux_brier/mean_r": 0.9722522143274546, "aux_brier/n_active_tok": 203.75, "aux_brier/n_step_records": 50.9375, "aux_brier/std_r": 0.07436182858145912, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4908001711596063, "calib/avg_num_step_conf": 6.48046875, "calib/ece": 0.4502734375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00048352588789044026, "calib/mean_conf": 0.07082031250000001, "calib/mu_c": 0.07105263157894737, "calib/mu_w": 0.07056910569105693, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00078125, "calib/std_conf": 0.03331477093426194, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 993.0, "completions/max_terminated_length": 993.0, "completions/mean_length": 395.16015625, "completions/mean_terminated_length": 396.7098388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.10346666666666667, "grad_norm": 0.07863761484622955, "learning_rate": 2.861111111111111e-06, "loss": 0.049, "num_tokens": 21031294.0, "reward": 1.1536492109298706, "reward_std": 0.2430051863193512, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5442843437194824, "rewards/format_reward_step": 0.99609375, "step": 97 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.8480245942152926e-07, "aux_brier/mean_r": 0.941277053207159, "aux_brier/n_active_tok": 204.25, "aux_brier/n_step_records": 51.0625, "aux_brier/std_r": 0.11857475936994888, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5042275905912269, "calib/avg_num_step_conf": 6.55078125, "calib/ece": 0.45173625498007963, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0036596312778131163, "calib/mean_conf": 0.07161035856573705, "calib/mu_c": 0.06984615384615384, "calib/mu_w": 0.07350578512396695, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0027091633466135457, "calib/std_conf": 0.03385360638999163, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2572.0, "completions/max_terminated_length": 2572.0, "completions/mean_length": 447.6796875, "completions/mean_terminated_length": 449.4353332519531, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.10453333333333334, "grad_norm": 0.026394343003630638, "learning_rate": 2.8333333333333335e-06, "loss": 0.0615, "num_tokens": 21252084.0, "reward": 1.1324074268341064, "reward_std": 0.2503637373447418, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5374422073364258, "rewards/format_reward_step": 0.98046875, "step": 98 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.126785942054623e-06, "aux_brier/mean_r": 0.9733042698353529, "aux_brier/n_active_tok": 218.125, "aux_brier/n_step_records": 54.53125, "aux_brier/std_r": 0.07072518536369898, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.46119114910043424, "calib/avg_num_step_conf": 6.90625, "calib/ece": 0.27119047619047615, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005099882815192663, "calib/mean_conf": 0.08198412698412698, "calib/mu_c": 0.07868539325842697, "calib/mu_w": 0.08378527607361963, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03978144341412035, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1775.0, "completions/max_terminated_length": 1775.0, "completions/mean_length": 475.3203125, "completions/mean_terminated_length": 480.95654296875, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.1056, "grad_norm": 0.08703938871622086, "learning_rate": 2.805555555555556e-06, "loss": -0.0333, "num_tokens": 21479566.0, "reward": 1.010657548904419, "reward_std": 0.24851438403129578, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6832554936408997, "rewards/format_reward_step": 0.984375, "step": 99 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -9.474406214016184e-07, "aux_brier/mean_r": 0.9467994384467602, "aux_brier/n_active_tok": 200.25, "aux_brier/n_step_records": 50.0625, "aux_brier/std_r": 0.09086217200092506, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5883014623172104, "calib/avg_num_step_conf": 6.31640625, "calib/ece": 0.42170355731225295, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012717597800274955, "calib/mean_conf": 0.0822490118577075, "calib/mu_c": 0.08858267716535434, "calib/mu_w": 0.07586507936507939, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0009881422924901185, "calib/std_conf": 0.046348964720305544, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2220.0, "completions/max_terminated_length": 2220.0, "completions/mean_length": 427.08984375, "completions/mean_terminated_length": 428.7647399902344, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.10666666666666667, "grad_norm": 0.23760278522968292, "learning_rate": 2.7777777777777783e-06, "loss": 0.0292, "num_tokens": 21696309.0, "reward": 1.1313462257385254, "reward_std": 0.3002142310142517, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5644475817680359, "rewards/format_reward_step": 0.98046875, "step": 100 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.2005507928902759e-06, "aux_brier/mean_r": 0.9751377236098051, "aux_brier/n_active_tok": 224.625, "aux_brier/n_step_records": 56.15625, "aux_brier/std_r": 0.06352466254975297, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6312695674397801, "calib/avg_num_step_conf": 7.25, "calib/ece": 0.3567539682539684, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.015325666091623544, "calib/mean_conf": 0.08372222222222221, "calib/mu_c": 0.09229729729729728, "calib/mu_w": 0.07697163120567374, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.04128407950358775, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1664.0, "completions/max_terminated_length": 1664.0, "completions/mean_length": 455.83203125, "completions/mean_terminated_length": 459.4212646484375, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.10773333333333333, "grad_norm": 0.1534683108329773, "learning_rate": 2.7500000000000004e-06, "loss": 0.0202, "num_tokens": 21919994.0, "reward": 1.0813418626785278, "reward_std": 0.2274094820022583, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.6222426891326904, "rewards/format_reward_step": 0.984375, "step": 101 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.4301347423261177e-06, "aux_brier/mean_r": 0.960718834772706, "aux_brier/n_active_tok": 195.75, "aux_brier/n_step_records": 48.9375, "aux_brier/std_r": 0.0864135812225868, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.47873786407766994, "calib/avg_num_step_conf": 6.30078125, "calib/ece": 0.511999209486166, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.003679961165048512, "calib/mean_conf": 0.08088616600790514, "calib/mu_c": 0.079388, "calib/mu_w": 0.08306796116504851, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03451517175602543, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2431.0, "completions/max_terminated_length": 2431.0, "completions/mean_length": 391.41015625, "completions/mean_terminated_length": 394.49212646484375, "completions/min_length": 0.0, "completions/min_terminated_length": 67.0, "epoch": 0.1088, "grad_norm": 0.2126937061548233, "learning_rate": 2.7222222222222224e-06, "loss": -0.0243, "num_tokens": 22126891.0, "reward": 1.1990914344787598, "reward_std": 0.19276341795921326, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.4838661551475525, "rewards/format_reward_step": 0.984375, "step": 102 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.9599245464907433e-06, "aux_brier/mean_r": 0.9712262339890003, "aux_brier/n_active_tok": 204.75, "aux_brier/n_step_records": 51.1875, "aux_brier/std_r": 0.07889097868610406, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.41545345686014995, "calib/avg_num_step_conf": 6.67578125, "calib/ece": 0.42543650793650795, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.009837398373983758, "calib/mean_conf": 0.08646825396825399, "calib/mu_c": 0.08166666666666667, "calib/mu_w": 0.09150406504065042, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.03863785595532429, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2845.0, "completions/max_terminated_length": 2845.0, "completions/mean_length": 471.1796875, "completions/mean_terminated_length": 476.7668151855469, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.10986666666666667, "grad_norm": 0.20270833373069763, "learning_rate": 2.6944444444444444e-06, "loss": 0.0215, "num_tokens": 22352065.0, "reward": 1.1316890716552734, "reward_std": 0.17910169064998627, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5501939654350281, "rewards/format_reward_step": 0.98046875, "step": 103 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.972208633244549e-07, "aux_brier/mean_r": 0.9698248207569122, "aux_brier/n_active_tok": 205.625, "aux_brier/n_step_records": 51.40625, "aux_brier/std_r": 0.06784799284650944, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5757894066717596, "calib/avg_num_step_conf": 6.49609375, "calib/ece": 0.31007421875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006632161955691385, "calib/mean_conf": 0.08836328125000001, "calib/mu_c": 0.09235294117647061, "calib/mu_w": 0.08572077922077923, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.033913678036795104, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1112.0, "completions/max_terminated_length": 1112.0, "completions/mean_length": 401.39453125, "completions/mean_terminated_length": 402.9686584472656, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.11093333333333333, "grad_norm": 0.2142917662858963, "learning_rate": 2.666666666666667e-06, "loss": 0.043, "num_tokens": 22561502.0, "reward": 1.0649869441986084, "reward_std": 0.1767987608909607, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.6661980152130127, "rewards/format_reward_step": 1.0, "step": 104 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.2800965493164895e-06, "aux_brier/mean_r": 0.9729265086352825, "aux_brier/n_active_tok": 216.25, "aux_brier/n_step_records": 54.0625, "aux_brier/std_r": 0.06955023356567835, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.44589785214785216, "calib/avg_num_step_conf": 6.78515625, "calib/ece": 0.3487843137254902, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.008751873126873116, "calib/mean_conf": 0.09356862745098041, "calib/mu_c": 0.0886607142857143, "calib/mu_w": 0.09741258741258742, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0015686274509803923, "calib/std_conf": 0.04703993084810543, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2305.0, "completions/max_terminated_length": 2305.0, "completions/mean_length": 453.078125, "completions/mean_terminated_length": 453.078125, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.112, "grad_norm": 0.29024821519851685, "learning_rate": 2.6388888888888893e-06, "loss": 0.0712, "num_tokens": 22783250.0, "reward": 1.0889368057250977, "reward_std": 0.2602972984313965, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6213722825050354, "rewards/format_reward_step": 0.9921875, "step": 105 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.263094973451121e-07, "aux_brier/mean_r": 0.9607481062412262, "aux_brier/n_active_tok": 196.0, "aux_brier/n_step_records": 49.0, "aux_brier/std_r": 0.0893817414471414, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.638063909774436, "calib/avg_num_step_conf": 6.140625, "calib/ece": 0.3584448818897637, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01055513784461154, "calib/mean_conf": 0.09659448818897638, "calib/mu_c": 0.1024122807017544, "calib/mu_w": 0.09185714285714286, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.003110236220472441, "calib/std_conf": 0.03486358298337732, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2765.0, "completions/max_terminated_length": 2765.0, "completions/mean_length": 419.21875, "completions/mean_terminated_length": 419.21875, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 0.11306666666666666, "grad_norm": 0.4787517786026001, "learning_rate": 2.6111111111111113e-06, "loss": 0.0234, "num_tokens": 22995154.0, "reward": 1.0962450504302979, "reward_std": 0.23281504213809967, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6271676421165466, "rewards/format_reward_step": 0.98828125, "step": 106 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.7233121986925948e-06, "aux_brier/mean_r": 0.9619654640555382, "aux_brier/n_active_tok": 203.125, "aux_brier/n_step_records": 50.78125, "aux_brier/std_r": 0.07960818279389059, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5041942604856512, "calib/avg_num_step_conf": 6.390625, "calib/ece": 0.49173828125000013, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005017344686218811, "calib/mean_conf": 0.09810546875000001, "calib/mu_c": 0.09831125827814569, "calib/mu_w": 0.0978095238095238, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.038725100485250824, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1079.0, "completions/max_terminated_length": 1079.0, "completions/mean_length": 392.1796875, "completions/mean_terminated_length": 393.7176818847656, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.11413333333333334, "grad_norm": 0.9041869640350342, "learning_rate": 2.5833333333333337e-06, "loss": 0.0233, "num_tokens": 23200168.0, "reward": 1.216104507446289, "reward_std": 0.22070017457008362, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.5128557682037354, "rewards/format_reward_step": 0.99609375, "step": 107 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.099712111485296e-06, "aux_brier/mean_r": 0.9625500831753016, "aux_brier/n_active_tok": 206.5, "aux_brier/n_step_records": 51.625, "aux_brier/std_r": 0.08628429169039009, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.519775641025641, "calib/avg_num_step_conf": 6.59765625, "calib/ece": 0.4774133858267717, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001210256410256419, "calib/mean_conf": 0.11313779527559056, "calib/mu_c": 0.11363333333333335, "calib/mu_w": 0.11242307692307693, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.044309584518234434, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1412.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 422.6171875, "completions/mean_terminated_length": 424.2745361328125, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.1152, "grad_norm": 0.5872453451156616, "learning_rate": 2.5555555555555557e-06, "loss": -0.0107, "num_tokens": 23411590.0, "reward": 1.2171289920806885, "reward_std": 0.24834440648555756, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.5247659087181091, "rewards/format_reward_step": 0.9921875, "step": 108 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.118077545687491e-06, "aux_brier/mean_r": 0.9741557370871305, "aux_brier/n_active_tok": 238.75, "aux_brier/n_step_records": 59.6875, "aux_brier/std_r": 0.059286125950166024, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5429352068696331, "calib/avg_num_step_conf": 7.73046875, "calib/ece": 0.3932725806451613, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002741451990632332, "calib/mean_conf": 0.11543709677419357, "calib/mu_c": 0.11404426229508198, "calib/mu_w": 0.11678571428571431, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00838709677419355, "calib/std_conf": 0.046235695203231446, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2604.0, "completions/max_terminated_length": 2604.0, "completions/mean_length": 470.63671875, "completions/mean_terminated_length": 478.107177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.11626666666666667, "grad_norm": 0.24365048110485077, "learning_rate": 2.5277777777777778e-06, "loss": 0.0144, "num_tokens": 23636673.0, "reward": 1.1074140071868896, "reward_std": 0.15337789058685303, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5859057307243347, "rewards/format_reward_step": 0.96875, "step": 109 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.6531132100983115e-06, "aux_brier/mean_r": 0.9645422715693712, "aux_brier/n_active_tok": 186.5, "aux_brier/n_step_records": 46.625, "aux_brier/std_r": 0.07064221261680359, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5730846144231971, "calib/avg_num_step_conf": 5.875, "calib/ece": 0.38774110671936757, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008528058992625903, "calib/mean_conf": 0.11028260869565219, "calib/mu_c": 0.11456349206349208, "calib/mu_w": 0.10603543307086617, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04433911020295698, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2529.0, "completions/max_terminated_length": 2529.0, "completions/mean_length": 410.0625, "completions/mean_terminated_length": 411.6706237792969, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.11733333333333333, "grad_norm": 0.929890513420105, "learning_rate": 2.5e-06, "loss": 0.0519, "num_tokens": 23846569.0, "reward": 1.135054349899292, "reward_std": 0.29901665449142456, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5949045419692993, "rewards/format_reward_step": 0.98828125, "step": 110 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 9.971097242739146e-07, "aux_brier/mean_r": 0.9534461908042431, "aux_brier/n_active_tok": 213.875, "aux_brier/n_step_records": 53.46875, "aux_brier/std_r": 0.09113074054766912, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4310224089635854, "calib/avg_num_step_conf": 6.9375, "calib/ece": 0.40815537848605576, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0287392411510059, "calib/mean_conf": 0.13877689243027888, "calib/mu_c": 0.12515151515151515, "calib/mu_w": 0.15389075630252105, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.01051792828685259, "calib/std_conf": 0.07677393487116559, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2807.0, "completions/max_terminated_length": 2807.0, "completions/mean_length": 459.984375, "completions/mean_terminated_length": 463.6062927246094, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.1184, "grad_norm": 0.3018098473548889, "learning_rate": 2.4722222222222226e-06, "loss": 0.0499, "num_tokens": 24071733.0, "reward": 1.1431818008422852, "reward_std": 0.2569875717163086, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5649149417877197, "rewards/format_reward_step": 0.97265625, "step": 111 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.5166348797955873e-07, "aux_brier/mean_r": 0.9389750920236111, "aux_brier/n_active_tok": 206.875, "aux_brier/n_step_records": 51.71875, "aux_brier/std_r": 0.11770817166689085, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6010658914728682, "calib/avg_num_step_conf": 6.86328125, "calib/ece": 0.3773734939759036, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.015600387596899218, "calib/mean_conf": 0.1406987951807229, "calib/mu_c": 0.1482170542635659, "calib/mu_w": 0.1326166666666667, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.06453119197337234, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2913.0, "completions/max_terminated_length": 2913.0, "completions/mean_length": 457.41015625, "completions/mean_terminated_length": 462.8340148925781, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.11946666666666667, "grad_norm": 1.4498605728149414, "learning_rate": 2.4444444444444447e-06, "loss": -0.0034, "num_tokens": 24296750.0, "reward": 1.1367487907409668, "reward_std": 0.27346163988113403, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5938701033592224, "rewards/format_reward_step": 0.96875, "step": 112 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.1186343113210881e-07, "aux_brier/mean_r": 0.9530810378491879, "aux_brier/n_active_tok": 213.75, "aux_brier/n_step_records": 53.4375, "aux_brier/std_r": 0.09911039125290699, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5730155774840191, "calib/avg_num_step_conf": 6.72265625, "calib/ece": 0.3657086614173228, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01889344007943894, "calib/mean_conf": 0.15161417322834647, "calib/mu_c": 0.16076335877862594, "calib/mu_w": 0.141869918699187, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0007874015748031494, "calib/std_conf": 0.06573593646116775, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2937.0, "completions/max_terminated_length": 2937.0, "completions/mean_length": 407.609375, "completions/mean_terminated_length": 409.2078552246094, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.12053333333333334, "grad_norm": 0.08281253278255463, "learning_rate": 2.4166666666666667e-06, "loss": 0.0149, "num_tokens": 24506298.0, "reward": 1.1622889041900635, "reward_std": 0.24908068776130676, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6179052591323853, "rewards/format_reward_step": 0.9921875, "step": 113 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 8.571375050836494e-07, "aux_brier/mean_r": 0.9518765844404697, "aux_brier/n_active_tok": 209.0, "aux_brier/n_step_records": 52.25, "aux_brier/std_r": 0.08919659565435722, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.48012173012173015, "calib/avg_num_step_conf": 6.62109375, "calib/ece": 0.47306078431372556, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.014660353535353554, "calib/mean_conf": 0.15023333333333333, "calib/mu_c": 0.14454166666666668, "calib/mu_w": 0.15920202020202023, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005764705882352941, "calib/std_conf": 0.08733099907073553, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2707.0, "completions/max_terminated_length": 2707.0, "completions/mean_length": 412.65625, "completions/mean_terminated_length": 412.65625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.1216, "grad_norm": 0.1044851541519165, "learning_rate": 2.388888888888889e-06, "loss": 0.0548, "num_tokens": 24716962.0, "reward": 1.2406219244003296, "reward_std": 0.22638444602489471, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.5328000783920288, "rewards/format_reward_step": 0.99609375, "step": 114 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.011089377835674e-07, "aux_brier/mean_r": 0.9587736297398806, "aux_brier/n_active_tok": 196.0, "aux_brier/n_step_records": 49.0, "aux_brier/std_r": 0.0770155581558356, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5316535433070866, "calib/avg_num_step_conf": 6.14453125, "calib/ece": 0.3045238095238096, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.016079370078740163, "calib/mean_conf": 0.19293650793650796, "calib/mu_c": 0.20104, "calib/mu_w": 0.18496062992125983, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0007142857142857148, "calib/std_conf": 0.11020142064245368, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3058.0, "completions/max_terminated_length": 3058.0, "completions/mean_length": 407.12890625, "completions/mean_terminated_length": 408.72552490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.12266666666666666, "grad_norm": 0.09419503808021545, "learning_rate": 2.361111111111111e-06, "loss": 0.0633, "num_tokens": 24926451.0, "reward": 1.1358131170272827, "reward_std": 0.2533056437969208, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.6370025277137756, "rewards/format_reward_step": 0.9765625, "step": 115 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.5253489710497234e-07, "aux_brier/mean_r": 0.9548651613295078, "aux_brier/n_active_tok": 222.5, "aux_brier/n_step_records": 55.625, "aux_brier/std_r": 0.07094483741093427, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5422413793103449, "calib/avg_num_step_conf": 7.2578125, "calib/ece": 0.3584189723320158, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0227292464878672, "calib/mean_conf": 0.2239525691699605, "calib/mu_c": 0.23365517241379313, "calib/mu_w": 0.21092592592592593, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.004624505928853755, "calib/std_conf": 0.12984100083314182, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3068.0, "completions/max_terminated_length": 3068.0, "completions/mean_length": 459.1484375, "completions/mean_terminated_length": 462.7637634277344, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.12373333333333333, "grad_norm": 0.10471275448799133, "learning_rate": 2.3333333333333336e-06, "loss": -0.0039, "num_tokens": 25148513.0, "reward": 1.2133257389068604, "reward_std": 0.23516668379306793, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.618928074836731, "rewards/format_reward_step": 0.984375, "step": 116 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.8026882203935344e-07, "aux_brier/mean_r": 0.9457707889378071, "aux_brier/n_active_tok": 212.0, "aux_brier/n_step_records": 53.0, "aux_brier/std_r": 0.08542424446204677, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5064516129032257, "calib/avg_num_step_conf": 7.203125, "calib/ece": 0.1695486055776892, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.0043745430107527405, "calib/mean_conf": 0.23969442231075697, "calib/mu_c": 0.24239583333333337, "calib/mu_w": 0.23802129032258063, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.013386454183266934, "calib/std_conf": 0.12561702456800855, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2788.0, "completions/max_terminated_length": 2788.0, "completions/mean_length": 419.0546875, "completions/mean_terminated_length": 425.70635986328125, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.1248, "grad_norm": 0.07769742608070374, "learning_rate": 2.305555555555556e-06, "loss": -0.025, "num_tokens": 25362391.0, "reward": 1.041290044784546, "reward_std": 0.3035169839859009, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.7120351791381836, "rewards/format_reward_step": 0.9765625, "step": 117 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.1297898944162554e-07, "aux_brier/mean_r": 0.9387212097644806, "aux_brier/n_active_tok": 224.625, "aux_brier/n_step_records": 56.15625, "aux_brier/std_r": 0.09899964329088107, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.48184617358393744, "calib/avg_num_step_conf": 7.3828125, "calib/ece": 0.3282188, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": -0.019473622426069648, "calib/mean_conf": 0.25194120000000003, "calib/mu_c": 0.24267175572519084, "calib/mu_w": 0.2621453781512605, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.028080000000000008, "calib/std_conf": 0.16203874876880528, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1644.0, "completions/max_terminated_length": 1644.0, "completions/mean_length": 424.3125, "completions/mean_terminated_length": 431.0476379394531, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.12586666666666665, "grad_norm": 0.1443454772233963, "learning_rate": 2.277777777777778e-06, "loss": -0.0635, "num_tokens": 25575023.0, "reward": 1.1563937664031982, "reward_std": 0.27992963790893555, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6255753040313721, "rewards/format_reward_step": 0.9765625, "step": 118 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 9.348430286826304e-08, "aux_brier/mean_r": 0.928545979782939, "aux_brier/n_active_tok": 202.25, "aux_brier/n_step_records": 50.5625, "aux_brier/std_r": 0.10552759381243959, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5303793328973185, "calib/avg_num_step_conf": 6.421875, "calib/ece": 0.2990763052208835, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": 0.015157619359058239, "calib/mean_conf": 0.2671887550200803, "calib/mu_c": 0.273884892086331, "calib/mu_w": 0.25872727272727275, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.004016064257028112, "calib/std_conf": 0.143208109983746, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2854.0, "completions/max_terminated_length": 2854.0, "completions/mean_length": 479.15234375, "completions/mean_terminated_length": 484.8340148925781, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.12693333333333334, "grad_norm": 0.1274876594543457, "learning_rate": 2.25e-06, "loss": 0.0035, "num_tokens": 25802750.0, "reward": 1.1830272674560547, "reward_std": 0.2851863503456116, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.6305460929870605, "rewards/format_reward_step": 0.96484375, "step": 119 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.848828588117527e-08, "aux_brier/mean_r": 0.9395423028618097, "aux_brier/n_active_tok": 196.875, "aux_brier/n_step_records": 49.21875, "aux_brier/std_r": 0.09219324978766963, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4797689646651841, "calib/avg_num_step_conf": 6.1796875, "calib/ece": 0.29363281249999995, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.008479120336051416, "calib/mean_conf": 0.2822265625, "calib/mu_c": 0.2784507042253522, "calib/mu_w": 0.2869298245614036, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.010585937499999996, "calib/std_conf": 0.13133376581988956, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1104.0, "completions/max_terminated_length": 1104.0, "completions/mean_length": 399.3984375, "completions/mean_terminated_length": 400.9647216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.128, "grad_norm": 0.22246916592121124, "learning_rate": 2.222222222222222e-06, "loss": 0.0094, "num_tokens": 26011684.0, "reward": 1.2160890102386475, "reward_std": 0.2542319595813751, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.6534183025360107, "rewards/format_reward_step": 0.99609375, "step": 120 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.904218475765276e-09, "aux_brier/mean_r": 0.919608561322093, "aux_brier/n_active_tok": 219.625, "aux_brier/n_step_records": 54.90625, "aux_brier/std_r": 0.11453270737547427, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5790712468193384, "calib/avg_num_step_conf": 7.08984375, "calib/ece": 0.19111553784860558, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.02390438247011952, "calib/gap": 0.045514529262086545, "calib/mean_conf": 0.3282820717131474, "calib/mu_c": 0.3520366666666667, "calib/mu_w": 0.3065221374045802, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.02065498007968128, "calib/std_conf": 0.18741276801443255, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2559.0, "completions/max_terminated_length": 2559.0, "completions/mean_length": 460.58203125, "completions/mean_terminated_length": 466.0434875488281, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.12906666666666666, "grad_norm": 0.20854102075099945, "learning_rate": 2.1944444444444445e-06, "loss": -0.0333, "num_tokens": 26234649.0, "reward": 1.131608486175537, "reward_std": 0.3270259201526642, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6983091831207275, "rewards/format_reward_step": 0.9765625, "step": 121 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.3580222608944315e-07, "aux_brier/mean_r": 0.9207999929785728, "aux_brier/n_active_tok": 213.25, "aux_brier/n_step_records": 53.3125, "aux_brier/std_r": 0.10475145361851901, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6422525200310157, "calib/avg_num_step_conf": 6.984375, "calib/ece": 0.27222380952380953, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.06454241406048072, "calib/mean_conf": 0.32182380952380957, "calib/mu_c": 0.348972602739726, "calib/mu_w": 0.2844301886792453, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.007341269841269841, "calib/std_conf": 0.15098661689277507, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2416.0, "completions/max_terminated_length": 2416.0, "completions/mean_length": 416.7421875, "completions/mean_terminated_length": 418.3764953613281, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.13013333333333332, "grad_norm": 0.0847122073173523, "learning_rate": 2.166666666666667e-06, "loss": 0.0377, "num_tokens": 26448679.0, "reward": 1.2359998226165771, "reward_std": 0.24089466035366058, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.686186671257019, "rewards/format_reward_step": 0.98046875, "step": 122 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 8.972941983875771e-08, "aux_brier/mean_r": 0.8995146565139294, "aux_brier/n_active_tok": 221.25, "aux_brier/n_step_records": 55.3125, "aux_brier/std_r": 0.12771078263176605, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.47579493835171965, "calib/avg_num_step_conf": 7.09375, "calib/ece": 0.2025301204819277, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.020080321285140562, "calib/gap": -0.016073329007138204, "calib/mean_conf": 0.35160642570281125, "calib/mu_c": 0.34295652173913044, "calib/mu_w": 0.35902985074626864, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.046144578313253026, "calib/std_conf": 0.19232834029544033, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2124.0, "completions/max_terminated_length": 2124.0, "completions/mean_length": 490.41015625, "completions/mean_terminated_length": 494.2716369628906, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.1312, "grad_norm": 0.05073205381631851, "learning_rate": 2.138888888888889e-06, "loss": 0.0109, "num_tokens": 26679512.0, "reward": 1.0989124774932861, "reward_std": 0.31424033641815186, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6690871119499207, "rewards/format_reward_step": 0.96484375, "step": 123 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.925066599854478e-08, "aux_brier/mean_r": 0.9220362659543753, "aux_brier/n_active_tok": 207.375, "aux_brier/n_step_records": 51.84375, "aux_brier/std_r": 0.10577379714231938, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4525448345541804, "calib/avg_num_step_conf": 6.546875, "calib/ece": 0.3172352941176471, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.01568627450980392, "calib/gap": -0.03454723415003791, "calib/mean_conf": 0.3455098039215686, "calib/mu_c": 0.3310135135135135, "calib/mu_w": 0.3655607476635514, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04117647058823531, "calib/std_conf": 0.16270246783972891, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1435.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 442.12890625, "completions/mean_terminated_length": 443.8627624511719, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.13226666666666667, "grad_norm": 0.27319470047950745, "learning_rate": 2.1111111111111114e-06, "loss": 0.0268, "num_tokens": 26899513.0, "reward": 1.243934154510498, "reward_std": 0.23813574016094208, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6554237604141235, "rewards/format_reward_step": 0.99609375, "step": 124 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.6478172820976198e-08, "aux_brier/mean_r": 0.880499541759491, "aux_brier/n_active_tok": 203.875, "aux_brier/n_step_records": 50.96875, "aux_brier/std_r": 0.14444965438451618, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4911504424778761, "calib/avg_num_step_conf": 6.625, "calib/ece": 0.17086152610441765, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.028112449799196786, "calib/gap": -0.0063922917751171315, "calib/mean_conf": 0.3800621686746988, "calib/mu_c": 0.376570796460177, "calib/mu_w": 0.3829630882352941, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.048554216867469885, "calib/std_conf": 0.19724339864989018, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2910.0, "completions/max_terminated_length": 2910.0, "completions/mean_length": 478.03125, "completions/mean_terminated_length": 481.7952880859375, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.13333333333333333, "grad_norm": 0.2627008855342865, "learning_rate": 2.0833333333333334e-06, "loss": 0.0082, "num_tokens": 27126697.0, "reward": 1.091411828994751, "reward_std": 0.29050981998443604, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6781472563743591, "rewards/format_reward_step": 0.9609375, "step": 125 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.8760992945288102e-07, "aux_brier/mean_r": 0.907262934371829, "aux_brier/n_active_tok": 220.625, "aux_brier/n_step_records": 55.15625, "aux_brier/std_r": 0.11587508826050907, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6095294706544863, "calib/avg_num_step_conf": 7.06640625, "calib/ece": 0.16498023715415017, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.015810276679841896, "calib/gap": 0.05830121386559883, "calib/mean_conf": 0.3815810276679843, "calib/mu_c": 0.40969465648854964, "calib/mu_w": 0.3513934426229508, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.014387351778656125, "calib/std_conf": 0.16757474397389432, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2576.0, "completions/max_terminated_length": 2576.0, "completions/mean_length": 451.58984375, "completions/mean_terminated_length": 453.3608093261719, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.1344, "grad_norm": 0.28045162558555603, "learning_rate": 2.0555555555555555e-06, "loss": -0.0043, "num_tokens": 27347768.0, "reward": 1.1840758323669434, "reward_std": 0.27042731642723083, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.7206785082817078, "rewards/format_reward_step": 0.984375, "step": 126 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.482128341751988e-08, "aux_brier/mean_r": 0.9151476826518774, "aux_brier/n_active_tok": 222.75, "aux_brier/n_step_records": 55.6875, "aux_brier/std_r": 0.10804212570656091, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5121724646037542, "calib/avg_num_step_conf": 7.16015625, "calib/ece": 0.199314, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.012, "calib/gap": -0.004110545198283022, "calib/mean_conf": 0.35652600000000007, "calib/mu_c": 0.35440495867768596, "calib/mu_w": 0.358515503875969, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03592000000000002, "calib/std_conf": 0.1663230240345575, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2832.0, "completions/max_terminated_length": 2832.0, "completions/mean_length": 438.4609375, "completions/mean_terminated_length": 443.66009521484375, "completions/min_length": 0.0, "completions/min_terminated_length": 129.0, "epoch": 0.13546666666666668, "grad_norm": 0.2871302366256714, "learning_rate": 2.027777777777778e-06, "loss": 0.0481, "num_tokens": 27563686.0, "reward": 1.1367894411087036, "reward_std": 0.2733447253704071, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6877830624580383, "rewards/format_reward_step": 0.9765625, "step": 127 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.683733678152336e-08, "aux_brier/mean_r": 0.9009036216884851, "aux_brier/n_active_tok": 204.0, "aux_brier/n_step_records": 51.0, "aux_brier/std_r": 0.12476722081191838, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.515438733632988, "calib/avg_num_step_conf": 6.5234375, "calib/ece": 0.18460806451612904, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.016129032258064516, "calib/gap": 0.009849599374633644, "calib/mean_conf": 0.38434354838709683, "calib/mu_c": 0.38906976744186045, "calib/mu_w": 0.3792201680672268, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.024395161290322577, "calib/std_conf": 0.16708054086516494, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3036.0, "completions/max_terminated_length": 3036.0, "completions/mean_length": 474.2265625, "completions/mean_terminated_length": 477.96063232421875, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.13653333333333334, "grad_norm": 0.10523315519094467, "learning_rate": 2.0000000000000003e-06, "loss": 0.0545, "num_tokens": 27791752.0, "reward": 1.1542980670928955, "reward_std": 0.30696868896484375, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6796922087669373, "rewards/format_reward_step": 0.9609375, "step": 128 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.1682828749207119e-07, "aux_brier/mean_r": 0.8982313759624958, "aux_brier/n_active_tok": 208.5, "aux_brier/n_step_records": 52.125, "aux_brier/std_r": 0.1205826576333493, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5295098039215687, "calib/avg_num_step_conf": 6.5234375, "calib/ece": 0.24430830039525697, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": 0.017050326797385607, "calib/mean_conf": 0.4024110671936759, "calib/mu_c": 0.4091503267973856, "calib/mu_w": 0.3921, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.020988142292490117, "calib/std_conf": 0.1788879708716506, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2589.0, "completions/max_terminated_length": 2589.0, "completions/mean_length": 421.234375, "completions/mean_terminated_length": 422.88629150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.1376, "grad_norm": 0.08267068862915039, "learning_rate": 1.9722222222222224e-06, "loss": 0.0427, "num_tokens": 28001972.0, "reward": 1.2638030052185059, "reward_std": 0.23557443916797638, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.688024640083313, "rewards/format_reward_step": 0.98828125, "step": 129 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.9647965827116494e-08, "aux_brier/mean_r": 0.8863687589764595, "aux_brier/n_active_tok": 208.0, "aux_brier/n_step_records": 52.0, "aux_brier/std_r": 0.14508607238531113, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5415671735578736, "calib/avg_num_step_conf": 6.60546875, "calib/ece": 0.24348862745098035, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.03529411764705882, "calib/gap": 0.019205957018976927, "calib/mean_conf": 0.3954917647058824, "calib/mu_c": 0.40370136986301375, "calib/mu_w": 0.3844954128440368, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0332156862745098, "calib/std_conf": 0.1962376010091516, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 395.33984375, "completions/mean_terminated_length": 396.8902282714844, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.13866666666666666, "grad_norm": 0.0783396065235138, "learning_rate": 1.944444444444445e-06, "loss": -0.012, "num_tokens": 28208467.0, "reward": 1.245288610458374, "reward_std": 0.22594107687473297, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6920915842056274, "rewards/format_reward_step": 0.99609375, "step": 130 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.633915508065137e-08, "aux_brier/mean_r": 0.888754416257143, "aux_brier/n_active_tok": 198.0, "aux_brier/n_step_records": 49.5, "aux_brier/std_r": 0.1411213732790202, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.617965367965368, "calib/avg_num_step_conf": 6.37890625, "calib/ece": 0.0931796875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.015625, "calib/gap": 0.07048051948051959, "calib/mean_conf": 0.4176015625, "calib/mu_c": 0.46, "calib/mu_w": 0.38951948051948043, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.05617187500000002, "calib/std_conf": 0.19151764441575247, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1827.0, "completions/max_terminated_length": 1827.0, "completions/mean_length": 394.60546875, "completions/mean_terminated_length": 396.1529541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.13973333333333332, "grad_norm": 0.050378166139125824, "learning_rate": 1.916666666666667e-06, "loss": 0.0227, "num_tokens": 28415694.0, "reward": 1.0798051357269287, "reward_std": 0.19457346200942993, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.7489080429077148, "rewards/format_reward_step": 0.98828125, "step": 131 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.26369157044715e-08, "aux_brier/mean_r": 0.8635993953794241, "aux_brier/n_active_tok": 211.125, "aux_brier/n_step_records": 52.78125, "aux_brier/std_r": 0.1567929598968476, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4409935897435897, "calib/avg_num_step_conf": 6.6953125, "calib/ece": 0.26063908386230467, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0234375, "calib/gap": -0.040887022235577, "calib/mean_conf": 0.4440484161376954, "calib/mu_c": 0.4280769230769231, "calib/mu_w": 0.4689639453125001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04765625, "calib/std_conf": 0.19377898635015073, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1145.0, "completions/max_terminated_length": 1145.0, "completions/mean_length": 405.3515625, "completions/mean_terminated_length": 406.9411926269531, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.1408, "grad_norm": 0.09182775765657425, "learning_rate": 1.888888888888889e-06, "loss": 0.0135, "num_tokens": 28625056.0, "reward": 1.2787785530090332, "reward_std": 0.24889016151428223, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.6776144504547119, "rewards/format_reward_step": 1.0, "step": 132 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.4734078885610984e-08, "aux_brier/mean_r": 0.8646962121129036, "aux_brier/n_active_tok": 227.0, "aux_brier/n_step_records": 56.75, "aux_brier/std_r": 0.16395573201589286, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5164760348583878, "calib/avg_num_step_conf": 7.62109375, "calib/ece": 0.18841526104417672, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.05622489959839357, "calib/gap": 0.0016290441176470272, "calib/mean_conf": 0.4535927710843374, "calib/mu_c": 0.45459374999999996, "calib/mu_w": 0.45296470588235294, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.1282329317269076, "calib/std_conf": 0.21843736263177577, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2994.0, "completions/max_terminated_length": 2994.0, "completions/mean_length": 487.44921875, "completions/mean_terminated_length": 497.15936279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.14186666666666667, "grad_norm": 0.10583373159170151, "learning_rate": 1.8611111111111113e-06, "loss": 0.0108, "num_tokens": 28856187.0, "reward": 1.028933048248291, "reward_std": 0.37068942189216614, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.6860446929931641, "rewards/format_reward_step": 0.96484375, "step": 133 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.99102370385718e-09, "aux_brier/mean_r": 0.8611395936459303, "aux_brier/n_active_tok": 221.625, "aux_brier/n_step_records": 55.40625, "aux_brier/std_r": 0.16742704529315233, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4701492537313433, "calib/avg_num_step_conf": 7.13671875, "calib/ece": 0.19036086956521736, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.039525691699604744, "calib/gap": -0.023781650570676105, "calib/mean_conf": 0.43343359683794463, "calib/mu_c": 0.42224776119402985, "calib/mu_w": 0.44602941176470595, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.047075098814229235, "calib/std_conf": 0.21354872836213862, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2367.0, "completions/max_terminated_length": 2367.0, "completions/mean_length": 494.671875, "completions/mean_terminated_length": 494.671875, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.14293333333333333, "grad_norm": 0.12384790182113647, "learning_rate": 1.8333333333333333e-06, "loss": 0.0724, "num_tokens": 29091775.0, "reward": 1.183688998222351, "reward_std": 0.35327237844467163, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.6722561120986938, "rewards/format_reward_step": 0.984375, "step": 134 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 8.770893714094008e-08, "aux_brier/mean_r": 0.8544477913528681, "aux_brier/n_active_tok": 224.75, "aux_brier/n_step_records": 56.1875, "aux_brier/std_r": 0.17348002130165696, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5098343030609663, "calib/avg_num_step_conf": 7.3515625, "calib/ece": 0.1913301587301587, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.03571428571428571, "calib/gap": 0.00045906906147230586, "calib/mean_conf": 0.44716190476190476, "calib/mu_c": 0.4473768656716418, "calib/mu_w": 0.4469177966101695, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05337301587301588, "calib/std_conf": 0.2107196854836779, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2850.0, "completions/max_terminated_length": 2850.0, "completions/mean_length": 470.65625, "completions/mean_terminated_length": 472.5019836425781, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.144, "grad_norm": 0.08482138067483902, "learning_rate": 1.8055555555555557e-06, "loss": 0.0304, "num_tokens": 29318143.0, "reward": 1.1878116130828857, "reward_std": 0.31839847564697266, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.6887466907501221, "rewards/format_reward_step": 0.984375, "step": 135 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.606758112288503e-09, "aux_brier/mean_r": 0.8612100519239902, "aux_brier/n_active_tok": 235.0, "aux_brier/n_step_records": 58.75, "aux_brier/std_r": 0.15901895437855273, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.47738951695786225, "calib/avg_num_step_conf": 7.44921875, "calib/ece": 0.16964741035856576, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.03187250996015936, "calib/gap": -0.015089157245632034, "calib/mean_conf": 0.48924900398406374, "calib/mu_c": 0.4808928571428571, "calib/mu_w": 0.49598201438848916, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10634063745019923, "calib/std_conf": 0.18945002901827143, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2170.0, "completions/max_terminated_length": 2170.0, "completions/mean_length": 442.34375, "completions/mean_terminated_length": 445.8267822265625, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.14506666666666668, "grad_norm": 0.03690337762236595, "learning_rate": 1.777777777777778e-06, "loss": -0.0317, "num_tokens": 29539871.0, "reward": 1.1051082611083984, "reward_std": 0.26446324586868286, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6938701868057251, "rewards/format_reward_step": 0.98046875, "step": 136 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 9.798647643277647e-09, "aux_brier/mean_r": 0.855107743293047, "aux_brier/n_active_tok": 225.875, "aux_brier/n_step_records": 56.46875, "aux_brier/std_r": 0.15921062580309808, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5522699004975125, "calib/avg_num_step_conf": 7.20703125, "calib/ece": 0.11597047244094486, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.01968503937007874, "calib/gap": 0.02715677860696525, "calib/mean_conf": 0.4966476377952756, "calib/mu_c": 0.5094776119402986, "calib/mu_w": 0.48232083333333337, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.042529527559055105, "calib/std_conf": 0.1831278727283494, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3042.0, "completions/max_terminated_length": 3042.0, "completions/mean_length": 453.9140625, "completions/mean_terminated_length": 455.69415283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.14613333333333334, "grad_norm": 0.04309280961751938, "learning_rate": 1.75e-06, "loss": -0.0056, "num_tokens": 29763057.0, "reward": 1.2005572319030762, "reward_std": 0.33588626980781555, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.7241036891937256, "rewards/format_reward_step": 0.9921875, "step": 137 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.0094727063980624e-07, "aux_brier/mean_r": 0.8415694739669561, "aux_brier/n_active_tok": 224.25, "aux_brier/n_step_records": 56.0625, "aux_brier/std_r": 0.16858775913715363, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4650232318017553, "calib/avg_num_step_conf": 7.45703125, "calib/ece": 0.1920541501976285, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.03557312252964427, "calib/gap": -0.028633763551884295, "calib/mean_conf": 0.5221750988142292, "calib/mu_c": 0.5104046979865773, "calib/mu_w": 0.5390384615384616, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06264822134387354, "calib/std_conf": 0.20784797132111368, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2717.0, "completions/max_terminated_length": 2717.0, "completions/mean_length": 446.45703125, "completions/mean_terminated_length": 446.45703125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.1472, "grad_norm": 0.008997076191008091, "learning_rate": 1.7222222222222224e-06, "loss": -0.0032, "num_tokens": 29981686.0, "reward": 1.2454711198806763, "reward_std": 0.3116518259048462, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.685009777545929, "rewards/format_reward_step": 0.984375, "step": 138 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.5731516884066634e-08, "aux_brier/mean_r": 0.866764098405838, "aux_brier/n_active_tok": 201.75, "aux_brier/n_step_records": 50.4375, "aux_brier/std_r": 0.1419970259303227, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5766108891108891, "calib/avg_num_step_conf": 6.32421875, "calib/ece": 0.13423098039215686, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.03137254901960784, "calib/gap": 0.046206150099900145, "calib/mean_conf": 0.4853768627450981, "calib/mu_c": 0.5056713286713287, "calib/mu_w": 0.45946517857142855, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.029411764705882353, "calib/std_conf": 0.18573747859509537, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2483.0, "completions/max_terminated_length": 2483.0, "completions/mean_length": 407.8984375, "completions/mean_terminated_length": 407.8984375, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.14826666666666666, "grad_norm": 0.019953807815909386, "learning_rate": 1.6944444444444446e-06, "loss": 0.0184, "num_tokens": 30189204.0, "reward": 1.2343586683273315, "reward_std": 0.29369819164276123, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.7264971733093262, "rewards/format_reward_step": 0.98828125, "step": 139 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.680944515860318e-08, "aux_brier/mean_r": 0.8431448042392731, "aux_brier/n_active_tok": 215.125, "aux_brier/n_step_records": 53.78125, "aux_brier/std_r": 0.17906808014959097, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.47558922558922556, "calib/avg_num_step_conf": 6.8125, "calib/ece": 0.19137843137254903, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.050980392156862744, "calib/gap": -0.008792637917637847, "calib/mean_conf": 0.49364117647058825, "calib/mu_c": 0.4902275641025641, "calib/mu_w": 0.4990202020202019, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.036627450980392134, "calib/std_conf": 0.21302567660433228, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1491.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 440.4921875, "completions/mean_terminated_length": 442.2196350097656, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.14933333333333335, "grad_norm": 0.03961111977696419, "learning_rate": 1.6666666666666667e-06, "loss": -0.0209, "num_tokens": 30406986.0, "reward": 1.281484603881836, "reward_std": 0.31075361371040344, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.6962511539459229, "rewards/format_reward_step": 0.99609375, "step": 140 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.8728322965253525e-08, "aux_brier/mean_r": 0.834965918213129, "aux_brier/n_active_tok": 203.875, "aux_brier/n_step_records": 50.96875, "aux_brier/std_r": 0.18984848959371448, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5758879113717823, "calib/avg_num_step_conf": 6.60546875, "calib/ece": 0.17545157480314968, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.07874015748031496, "calib/gap": 0.05084051482567603, "calib/mean_conf": 0.5069893700787402, "calib/mu_c": 0.5268051612903225, "calib/mu_w": 0.4759646464646465, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.036102362204724424, "calib/std_conf": 0.21634318720677176, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1456.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 442.4921875, "completions/mean_terminated_length": 445.97637939453125, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.1504, "grad_norm": 0.04783289507031441, "learning_rate": 1.638888888888889e-06, "loss": -0.0162, "num_tokens": 30627360.0, "reward": 1.2777183055877686, "reward_std": 0.3144700527191162, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.7202483415603638, "rewards/format_reward_step": 0.984375, "step": 141 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.755059033161423e-08, "aux_brier/mean_r": 0.8158276583999395, "aux_brier/n_active_tok": 228.75, "aux_brier/n_step_records": 57.1875, "aux_brier/std_r": 0.19122428877744824, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.45670807453416146, "calib/avg_num_step_conf": 7.1953125, "calib/ece": 0.18248039215686274, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0784313725490196, "calib/gap": -0.03583757763975137, "calib/mean_conf": 0.5617549019607844, "calib/mu_c": 0.5455928571428572, "calib/mu_w": 0.5814304347826086, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09760784313725493, "calib/std_conf": 0.21130024573700762, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2377.0, "completions/max_terminated_length": 2377.0, "completions/mean_length": 465.90234375, "completions/mean_terminated_length": 465.90234375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.15146666666666667, "grad_norm": 0.02992737479507923, "learning_rate": 1.6111111111111113e-06, "loss": 0.0106, "num_tokens": 30851791.0, "reward": 1.2167097330093384, "reward_std": 0.2924765348434448, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6871516704559326, "rewards/format_reward_step": 0.99609375, "step": 142 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.07010374464867e-08, "aux_brier/mean_r": 0.8592647183686495, "aux_brier/n_active_tok": 217.0, "aux_brier/n_step_records": 54.25, "aux_brier/std_r": 0.14784136065281928, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5375318461442864, "calib/avg_num_step_conf": 6.96484375, "calib/ece": 0.12099960629921258, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.01968503937007874, "calib/gap": 0.021490778599391025, "calib/mean_conf": 0.5034885826771653, "calib/mu_c": 0.5137263157894737, "calib/mu_w": 0.4922355371900827, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05043307086614174, "calib/std_conf": 0.1865499698407842, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3029.0, "completions/max_terminated_length": 3029.0, "completions/mean_length": 451.6953125, "completions/mean_terminated_length": 451.6953125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.15253333333333333, "grad_norm": 0.023315150290727615, "learning_rate": 1.5833333333333333e-06, "loss": 0.0414, "num_tokens": 31074761.0, "reward": 1.1957252025604248, "reward_std": 0.2707880139350891, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.7204006910324097, "rewards/format_reward_step": 0.9921875, "step": 143 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.952828720952482e-08, "aux_brier/mean_r": 0.820482898503542, "aux_brier/n_active_tok": 215.25, "aux_brier/n_step_records": 53.8125, "aux_brier/std_r": 0.18170885206200182, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49164983164983167, "calib/avg_num_step_conf": 6.97265625, "calib/ece": 0.22038235294117642, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.06274509803921569, "calib/gap": 0.0008270707070706651, "calib/mean_conf": 0.5251862745098039, "calib/mu_c": 0.5254781818181818, "calib/mu_w": 0.5246511111111112, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0492549019607843, "calib/std_conf": 0.22322515057857306, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2441.0, "completions/max_terminated_length": 2441.0, "completions/mean_length": 452.41015625, "completions/mean_terminated_length": 452.41015625, "completions/min_length": 65.0, "completions/min_terminated_length": 65.0, "epoch": 0.1536, "grad_norm": 0.024755064398050308, "learning_rate": 1.5555555555555558e-06, "loss": -0.0216, "num_tokens": 31294706.0, "reward": 1.3187178373336792, "reward_std": 0.226515993475914, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.7045587301254272, "rewards/format_reward_step": 0.99609375, "step": 144 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.076441402154572e-08, "aux_brier/mean_r": 0.8027276508510113, "aux_brier/n_active_tok": 219.0, "aux_brier/n_step_records": 54.75, "aux_brier/std_r": 0.19599402940366417, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.48295861844248944, "calib/avg_num_step_conf": 7.109375, "calib/ece": 0.19662244094488193, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.047244094488188976, "calib/gap": -0.009343023786249405, "calib/mean_conf": 0.5370783464566928, "calib/mu_c": 0.5334367741935485, "calib/mu_w": 0.5427797979797979, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.061732283464566926, "calib/std_conf": 0.2230492077951706, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2651.0, "completions/max_terminated_length": 2651.0, "completions/mean_length": 442.28515625, "completions/mean_terminated_length": 442.28515625, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.15466666666666667, "grad_norm": 0.028061792254447937, "learning_rate": 1.527777777777778e-06, "loss": 0.0462, "num_tokens": 31510635.0, "reward": 1.2733901739120483, "reward_std": 0.3333938717842102, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6951233148574829, "rewards/format_reward_step": 0.98828125, "step": 145 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.299429776615348e-08, "aux_brier/mean_r": 0.8234522491693497, "aux_brier/n_active_tok": 214.5, "aux_brier/n_step_records": 53.625, "aux_brier/std_r": 0.18366397521458566, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5631024860476916, "calib/avg_num_step_conf": 6.9296875, "calib/ece": 0.15656746062992125, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.05905511811023622, "calib/gap": 0.04451860857432777, "calib/mean_conf": 0.5206698228346457, "calib/mu_c": 0.5462592592592593, "calib/mu_w": 0.5017406506849316, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.12602021653543305, "calib/std_conf": 0.22569164079354553, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1224.0, "completions/max_terminated_length": 1224.0, "completions/mean_length": 444.71484375, "completions/mean_terminated_length": 446.4588623046875, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.15573333333333333, "grad_norm": 0.025134719908237457, "learning_rate": 1.5e-06, "loss": -0.0001, "num_tokens": 31731698.0, "reward": 1.0931053161621094, "reward_std": 0.30833274126052856, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.708358645439148, "rewards/format_reward_step": 0.98828125, "step": 146 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.911516463841249e-08, "aux_brier/mean_r": 0.8122192062437534, "aux_brier/n_active_tok": 216.5, "aux_brier/n_step_records": 54.125, "aux_brier/std_r": 0.1872737722005695, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5760673868312757, "calib/avg_num_step_conf": 6.81640625, "calib/ece": 0.17974920634920633, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.06349206349206349, "calib/gap": 0.050612962962962915, "calib/mean_conf": 0.5747746031746032, "calib/mu_c": 0.6036962962962963, "calib/mu_w": 0.5530833333333334, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16297619047619044, "calib/std_conf": 0.2062286354250713, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2591.0, "completions/max_terminated_length": 2591.0, "completions/mean_length": 460.56640625, "completions/mean_terminated_length": 464.1929016113281, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.1568, "grad_norm": 0.0515858419239521, "learning_rate": 1.4722222222222225e-06, "loss": 0.0014, "num_tokens": 31953283.0, "reward": 1.0902622938156128, "reward_std": 0.24324165284633636, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.7047991752624512, "rewards/format_reward_step": 0.984375, "step": 147 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.7710615885245815e-09, "aux_brier/mean_r": 0.8038588259369135, "aux_brier/n_active_tok": 222.5, "aux_brier/n_step_records": 55.625, "aux_brier/std_r": 0.18959468672983348, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.481797385620915, "calib/avg_num_step_conf": 7.21875, "calib/ece": 0.1828735177865613, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.07905138339920949, "calib/gap": -0.013629823529411778, "calib/mean_conf": 0.5986284584980237, "calib/mu_c": 0.5932411764705883, "calib/mu_w": 0.606871, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08837944664031622, "calib/std_conf": 0.20025337798810655, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2421.0, "completions/max_terminated_length": 2421.0, "completions/mean_length": 447.41796875, "completions/mean_terminated_length": 449.1725769042969, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.15786666666666666, "grad_norm": 0.05226176977157593, "learning_rate": 1.4444444444444445e-06, "loss": 0.0016, "num_tokens": 32172934.0, "reward": 1.2658051252365112, "reward_std": 0.31609511375427246, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7038455605506897, "rewards/format_reward_step": 0.984375, "step": 148 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.1584773019233126e-08, "aux_brier/mean_r": 0.7744041997939348, "aux_brier/n_active_tok": 228.75, "aux_brier/n_step_records": 57.1875, "aux_brier/std_r": 0.21435644826851785, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4474770938185572, "calib/avg_num_step_conf": 7.7890625, "calib/ece": 0.2526072289156627, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.07228915662650602, "calib/gap": -0.019915911730546076, "calib/mean_conf": 0.5614570281124499, "calib/mu_c": 0.5516190476190475, "calib/mu_w": 0.5715349593495935, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.15402008032128514, "calib/std_conf": 0.2302332410010566, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2652.0, "completions/max_terminated_length": 2652.0, "completions/mean_length": 523.11328125, "completions/mean_terminated_length": 529.3162231445312, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.15893333333333334, "grad_norm": 0.1554420441389084, "learning_rate": 1.4166666666666667e-06, "loss": 0.0156, "num_tokens": 32411307.0, "reward": 1.1419100761413574, "reward_std": 0.28279829025268555, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.6613902449607849, "rewards/format_reward_step": 0.96875, "step": 149 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.219894197439068e-09, "aux_brier/mean_r": 0.8038320560008287, "aux_brier/n_active_tok": 205.375, "aux_brier/n_step_records": 51.34375, "aux_brier/std_r": 0.19461392844095826, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5002602641681306, "calib/avg_num_step_conf": 6.94921875, "calib/ece": 0.16590600000000003, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.064, "calib/gap": -0.005148578306981566, "calib/mean_conf": 0.563454, "calib/mu_c": 0.561209219858156, "calib/mu_w": 0.5663577981651375, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.08267999999999998, "calib/std_conf": 0.2013730291871282, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2373.0, "completions/max_terminated_length": 2373.0, "completions/mean_length": 439.04296875, "completions/mean_terminated_length": 442.5, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.16, "grad_norm": 0.0734696164727211, "learning_rate": 1.3888888888888892e-06, "loss": 0.002, "num_tokens": 32628662.0, "reward": 1.2126495838165283, "reward_std": 0.27915123105049133, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6943480968475342, "rewards/format_reward_step": 0.9765625, "step": 150 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.5514008961113888e-09, "aux_brier/mean_r": 0.7861856613308191, "aux_brier/n_active_tok": 219.0, "aux_brier/n_step_records": 54.75, "aux_brier/std_r": 0.20428610290400684, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4747511612475116, "calib/avg_num_step_conf": 7.78515625, "calib/ece": 0.2190323886639676, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.05668016194331984, "calib/gap": -0.018078666224286777, "calib/mean_conf": 0.5513319838056682, "calib/mu_c": 0.5413045454545454, "calib/mu_w": 0.5593832116788322, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.16251012145748986, "calib/std_conf": 0.21728345614212916, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2956.0, "completions/max_terminated_length": 2956.0, "completions/mean_length": 537.99609375, "completions/mean_terminated_length": 540.1058959960938, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.16106666666666666, "grad_norm": 0.11959611624479294, "learning_rate": 1.3611111111111112e-06, "loss": 0.0422, "num_tokens": 32873413.0, "reward": 1.0640509128570557, "reward_std": 0.3209001421928406, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.646828830242157, "rewards/format_reward_step": 0.9453125, "step": 151 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.702060511976633e-08, "aux_brier/mean_r": 0.8102346882224083, "aux_brier/n_active_tok": 223.125, "aux_brier/n_step_records": 55.78125, "aux_brier/std_r": 0.18743508565239608, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.431973667226023, "calib/avg_num_step_conf": 7.31640625, "calib/ece": 0.20082530120481928, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0321285140562249, "calib/gap": -0.04694078352910791, "calib/mean_conf": 0.5519457831325301, "calib/mu_c": 0.5280040983606558, "calib/mu_w": 0.5749448818897637, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.13140562248995985, "calib/std_conf": 0.19334658954074496, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 466.2109375, "completions/mean_terminated_length": 469.88189697265625, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.16213333333333332, "grad_norm": 0.10813432186841965, "learning_rate": 1.3333333333333334e-06, "loss": 0.0248, "num_tokens": 33098155.0, "reward": 1.1269245147705078, "reward_std": 0.3119822144508362, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6639482975006104, "rewards/format_reward_step": 0.96875, "step": 152 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.942694269485017e-08, "aux_brier/mean_r": 0.8132796436548233, "aux_brier/n_active_tok": 226.125, "aux_brier/n_step_records": 56.53125, "aux_brier/std_r": 0.18426152970641851, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.510104643557881, "calib/avg_num_step_conf": 7.45703125, "calib/ece": 0.15398072289156622, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.08433734939759036, "calib/gap": 0.004058862001308006, "calib/mean_conf": 0.6040112449799196, "calib/mu_c": 0.6058043165467626, "calib/mu_w": 0.6017454545454546, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.09987951807228912, "calib/std_conf": 0.19901518991606024, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1489.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 455.08984375, "completions/mean_terminated_length": 464.1553955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.1632, "grad_norm": 0.14978143572807312, "learning_rate": 1.3055555555555556e-06, "loss": 0.0096, "num_tokens": 33321978.0, "reward": 1.202840805053711, "reward_std": 0.29197293519973755, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.6941753625869751, "rewards/format_reward_step": 0.97265625, "step": 153 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.0014349349773966e-08, "aux_brier/mean_r": 0.8017580434679985, "aux_brier/n_active_tok": 221.625, "aux_brier/n_step_records": 55.40625, "aux_brier/std_r": 0.19344893470406532, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5956202563145452, "calib/avg_num_step_conf": 6.96875, "calib/ece": 0.1741133333333333, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.06666666666666667, "calib/gap": 0.058423236282194924, "calib/mean_conf": 0.6001611764705881, "calib/mu_c": 0.6324657894736843, "calib/mu_w": 0.5740425531914893, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.16360784313725485, "calib/std_conf": 0.19478726943994779, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2833.0, "completions/max_terminated_length": 2833.0, "completions/mean_length": 453.81640625, "completions/mean_terminated_length": 453.81640625, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.16426666666666667, "grad_norm": 0.03800573572516441, "learning_rate": 1.2777777777777779e-06, "loss": 0.0285, "num_tokens": 33542595.0, "reward": 1.1202408075332642, "reward_std": 0.33582866191864014, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.7153380513191223, "rewards/format_reward_step": 0.9921875, "step": 154 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.634872838304993e-08, "aux_brier/mean_r": 0.8097844570875168, "aux_brier/n_active_tok": 206.0, "aux_brier/n_step_records": 51.5, "aux_brier/std_r": 0.17847078037448227, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5928487978847691, "calib/avg_num_step_conf": 6.4921875, "calib/ece": 0.17345703125000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.078125, "calib/gap": 0.05238203283527032, "calib/mean_conf": 0.58669921875, "calib/mu_c": 0.6151410256410257, "calib/mu_w": 0.5627589928057554, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.15156250000000002, "calib/std_conf": 0.18830996243564666, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 924.0, "completions/max_terminated_length": 924.0, "completions/mean_length": 404.1875, "completions/mean_terminated_length": 405.7725830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.16533333333333333, "grad_norm": 0.062165211886167526, "learning_rate": 1.25e-06, "loss": 0.0342, "num_tokens": 33753283.0, "reward": 1.1384236812591553, "reward_std": 0.24906030297279358, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.725569486618042, "rewards/format_reward_step": 1.0, "step": 155 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.007677693873557e-08, "aux_brier/mean_r": 0.7784804869443178, "aux_brier/n_active_tok": 219.875, "aux_brier/n_step_records": 54.96875, "aux_brier/std_r": 0.2090508914552629, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5300525920360631, "calib/avg_num_step_conf": 7.1171875, "calib/ece": 0.19717707509881421, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.06324110671936758, "calib/gap": 0.02168250688705231, "calib/mean_conf": 0.5817952569169962, "calib/mu_c": 0.5921651515151515, "calib/mu_w": 0.5704826446280992, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.12861660079051385, "calib/std_conf": 0.2393334609098778, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3069.0, "completions/max_terminated_length": 3069.0, "completions/mean_length": 459.0859375, "completions/mean_terminated_length": 460.88629150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.1664, "grad_norm": 0.01808425970375538, "learning_rate": 1.2222222222222223e-06, "loss": -0.0063, "num_tokens": 33975569.0, "reward": 1.1721856594085693, "reward_std": 0.29748740792274475, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.6809302568435669, "rewards/format_reward_step": 0.97265625, "step": 156 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.992033432125396e-08, "aux_brier/mean_r": 0.7912592738866806, "aux_brier/n_active_tok": 223.25, "aux_brier/n_step_records": 55.8125, "aux_brier/std_r": 0.19565992313437164, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5183146766169154, "calib/avg_num_step_conf": 7.21484375, "calib/ece": 0.179605905511811, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.07480314960629922, "calib/gap": 0.010377922885572155, "calib/mean_conf": 0.5900791338582678, "calib/mu_c": 0.5949820895522389, "calib/mu_w": 0.5846041666666667, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12106299212598423, "calib/std_conf": 0.20903415423195865, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 432.34765625, "completions/mean_terminated_length": 434.04315185546875, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.16746666666666668, "grad_norm": 0.07360821962356567, "learning_rate": 1.1944444444444446e-06, "loss": 0.0128, "num_tokens": 34189978.0, "reward": 1.1952300071716309, "reward_std": 0.28814953565597534, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.7027948498725891, "rewards/format_reward_step": 0.9921875, "step": 157 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.6169309630684765e-08, "aux_brier/mean_r": 0.7900642603635788, "aux_brier/n_active_tok": 215.375, "aux_brier/n_step_records": 53.84375, "aux_brier/std_r": 0.20204701973125339, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.395147868390731, "calib/avg_num_step_conf": 7.2421875, "calib/ece": 0.25689288537549415, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.08300395256916997, "calib/gap": -0.06423064268339507, "calib/mean_conf": 0.6048462450592885, "calib/mu_c": 0.5776815068493151, "calib/mu_w": 0.6419121495327101, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1423320158102767, "calib/std_conf": 0.2096806007803486, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2807.0, "completions/max_terminated_length": 2807.0, "completions/mean_length": 457.0625, "completions/mean_terminated_length": 458.85491943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.16853333333333334, "grad_norm": 0.030399853363633156, "learning_rate": 1.1666666666666668e-06, "loss": 0.026, "num_tokens": 34412226.0, "reward": 1.2299730777740479, "reward_std": 0.31046023964881897, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.6698920130729675, "rewards/format_reward_step": 0.984375, "step": 158 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 9.161260805523597e-09, "aux_brier/mean_r": 0.8144844230264425, "aux_brier/n_active_tok": 200.875, "aux_brier/n_step_records": 50.21875, "aux_brier/std_r": 0.18931242334656417, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5227889478228691, "calib/avg_num_step_conf": 6.46484375, "calib/ece": 0.15896627450980394, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.054901960784313725, "calib/gap": 0.015614999383249062, "calib/mean_conf": 0.532327843137255, "calib/mu_c": 0.5397373134328359, "calib/mu_w": 0.5241223140495869, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08290196078431374, "calib/std_conf": 0.21394798444549348, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2738.0, "completions/max_terminated_length": 2738.0, "completions/mean_length": 418.04296875, "completions/mean_terminated_length": 418.04296875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.1696, "grad_norm": 0.016105517745018005, "learning_rate": 1.138888888888889e-06, "loss": 0.0391, "num_tokens": 34624029.0, "reward": 1.1962106227874756, "reward_std": 0.2835184335708618, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.7067175507545471, "rewards/format_reward_step": 0.9921875, "step": 159 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.0736986780511835e-08, "aux_brier/mean_r": 0.7846236266195774, "aux_brier/n_active_tok": 218.75, "aux_brier/n_step_records": 54.6875, "aux_brier/std_r": 0.19788970216177404, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5085429369918699, "calib/avg_num_step_conf": 7.390625, "calib/ece": 0.17402191235059758, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.05179282868525897, "calib/gap": 0.010204363567073216, "calib/mean_conf": 0.5824721115537849, "calib/mu_c": 0.5874726562500001, "calib/mu_w": 0.5772682926829269, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.12326693227091633, "calib/std_conf": 0.21788352461333965, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2803.0, "completions/max_terminated_length": 2803.0, "completions/mean_length": 445.61328125, "completions/mean_terminated_length": 449.1220397949219, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.17066666666666666, "grad_norm": 0.070772185921669, "learning_rate": 1.111111111111111e-06, "loss": 0.0123, "num_tokens": 34842946.0, "reward": 1.166327714920044, "reward_std": 0.3040813207626343, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6887481212615967, "rewards/format_reward_step": 0.98046875, "step": 160 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.201992906740283e-08, "aux_brier/mean_r": 0.800326943397522, "aux_brier/n_active_tok": 197.875, "aux_brier/n_step_records": 49.46875, "aux_brier/std_r": 0.18986242730170488, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5327109673625404, "calib/avg_num_step_conf": 6.33203125, "calib/ece": 0.18333529411764704, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.050980392156862744, "calib/gap": 0.029023228462216943, "calib/mean_conf": 0.568978431372549, "calib/mu_c": 0.5776284916201118, "calib/mu_w": 0.5486052631578948, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.02517647058823529, "calib/std_conf": 0.20709775234171518, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 413.76953125, "completions/mean_terminated_length": 415.3921813964844, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.17173333333333332, "grad_norm": 0.1003478467464447, "learning_rate": 1.0833333333333335e-06, "loss": 0.0251, "num_tokens": 35052791.0, "reward": 1.3792005777359009, "reward_std": 0.24497488141059875, "rewards/accuracy_reward_step": 0.69921875, "rewards/final_brier_reward_step": 0.7355523109436035, "rewards/format_reward_step": 0.9921875, "step": 161 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.6508984868334196e-09, "aux_brier/mean_r": 0.7857399228960276, "aux_brier/n_active_tok": 209.0, "aux_brier/n_step_records": 52.25, "aux_brier/std_r": 0.20222072442993522, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4963493959909731, "calib/avg_num_step_conf": 6.61328125, "calib/ece": 0.16089294117647057, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.06666666666666667, "calib/gap": 0.0019580446037434918, "calib/mean_conf": 0.5973815686274511, "calib/mu_c": 0.5980956790123457, "calib/mu_w": 0.5961376344086022, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.06149019607843135, "calib/std_conf": 0.20633723836184856, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1275.0, "completions/max_terminated_length": 1275.0, "completions/mean_length": 407.96484375, "completions/mean_terminated_length": 409.5647277832031, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.1728, "grad_norm": 0.02969902940094471, "learning_rate": 1.0555555555555557e-06, "loss": -0.0291, "num_tokens": 35261374.0, "reward": 1.3031649589538574, "reward_std": 0.3088793158531189, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.7126595377922058, "rewards/format_reward_step": 0.984375, "step": 162 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.6231071363624903e-08, "aux_brier/mean_r": 0.7842877190560102, "aux_brier/n_active_tok": 216.75, "aux_brier/n_step_records": 54.1875, "aux_brier/std_r": 0.19400051701813936, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4706771653543307, "calib/avg_num_step_conf": 6.93359375, "calib/ece": 0.17219166666666663, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.09523809523809523, "calib/gap": -0.023430834645669307, "calib/mean_conf": 0.6063003968253968, "calib/mu_c": 0.5944919999999999, "calib/mu_w": 0.6179228346456692, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.14123015873015868, "calib/std_conf": 0.20429980935629172, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2313.0, "completions/max_terminated_length": 2313.0, "completions/mean_length": 458.52734375, "completions/mean_terminated_length": 460.3255310058594, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.17386666666666667, "grad_norm": 0.03033628687262535, "learning_rate": 1.0277777777777777e-06, "loss": 0.0218, "num_tokens": 35483589.0, "reward": 1.1464180946350098, "reward_std": 0.3199278712272644, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.67160964012146, "rewards/format_reward_step": 0.98046875, "step": 163 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.783250752618315e-08, "aux_brier/mean_r": 0.7870215680450201, "aux_brier/n_active_tok": 228.625, "aux_brier/n_step_records": 57.15625, "aux_brier/std_r": 0.1935525459703058, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5013118440779609, "calib/avg_num_step_conf": 7.4453125, "calib/ece": 0.14960866141732285, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.05511811023622047, "calib/gap": 0.0026458020989506004, "calib/mean_conf": 0.5933047244094489, "calib/mu_c": 0.5945130434782608, "calib/mu_w": 0.5918672413793102, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09980314960629923, "calib/std_conf": 0.20247135399922225, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2803.0, "completions/max_terminated_length": 2803.0, "completions/mean_length": 497.72265625, "completions/mean_terminated_length": 499.6745300292969, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.17493333333333333, "grad_norm": 0.11999011784791946, "learning_rate": 1.0000000000000002e-06, "loss": 0.0088, "num_tokens": 35717142.0, "reward": 1.208479642868042, "reward_std": 0.28109174966812134, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.7011061906814575, "rewards/format_reward_step": 0.98828125, "step": 164 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.5884240820173154e-08, "aux_brier/mean_r": 0.7734075896441936, "aux_brier/n_active_tok": 228.875, "aux_brier/n_step_records": 57.21875, "aux_brier/std_r": 0.21174128586426377, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.48524795982423097, "calib/avg_num_step_conf": 7.46875, "calib/ece": 0.23406916996047436, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.1225296442687747, "calib/gap": -0.004246170747018163, "calib/mean_conf": 0.6206343873517788, "calib/mu_c": 0.6183686440677968, "calib/mu_w": 0.622614814814815, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19415019762845856, "calib/std_conf": 0.22241601702167657, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2555.0, "completions/max_terminated_length": 2555.0, "completions/mean_length": 465.515625, "completions/mean_terminated_length": 469.18109130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.176, "grad_norm": 0.1529640108346939, "learning_rate": 9.722222222222224e-07, "loss": 0.0211, "num_tokens": 35941890.0, "reward": 1.1220381259918213, "reward_std": 0.26641708612442017, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6678400039672852, "rewards/format_reward_step": 0.98828125, "step": 165 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.7232448110713356e-08, "aux_brier/mean_r": 0.7676201425492764, "aux_brier/n_active_tok": 224.0, "aux_brier/n_step_records": 56.0, "aux_brier/std_r": 0.21411156747490168, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5302083333333334, "calib/avg_num_step_conf": 7.3984375, "calib/ece": 0.14111832669322705, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.11155378486055777, "calib/gap": 0.013518366935483694, "calib/mean_conf": 0.6099573705179282, "calib/mu_c": 0.6151277419354838, "calib/mu_w": 0.6016093750000001, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.06677290836653382, "calib/std_conf": 0.2152603485661002, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2448.0, "completions/max_terminated_length": 2448.0, "completions/mean_length": 520.79296875, "completions/mean_terminated_length": 522.8353271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.17706666666666668, "grad_norm": 0.06923183798789978, "learning_rate": 9.444444444444445e-07, "loss": 0.0105, "num_tokens": 36181397.0, "reward": 1.270603060722351, "reward_std": 0.2961066961288452, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.7074119448661804, "rewards/format_reward_step": 0.9765625, "step": 166 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.094241307095459e-09, "aux_brier/mean_r": 0.7531142849475145, "aux_brier/n_active_tok": 224.375, "aux_brier/n_step_records": 56.09375, "aux_brier/std_r": 0.22563402936793864, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5855592105263157, "calib/avg_num_step_conf": 7.4453125, "calib/ece": 0.1635956349206349, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.15079365079365079, "calib/gap": 0.05671534210526319, "calib/mean_conf": 0.610293253968254, "calib/mu_c": 0.6327993421052632, "calib/mu_w": 0.576084, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.08535714285714285, "calib/std_conf": 0.25165342348104297, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1343.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 471.265625, "completions/mean_terminated_length": 474.97637939453125, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.17813333333333334, "grad_norm": 0.05178198963403702, "learning_rate": 9.166666666666666e-07, "loss": -0.0163, "num_tokens": 36407649.0, "reward": 1.2556668519973755, "reward_std": 0.32814353704452515, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.7023549675941467, "rewards/format_reward_step": 0.97265625, "step": 167 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -8.276514482097319e-08, "aux_brier/mean_r": 0.7753052655607462, "aux_brier/n_active_tok": 220.75, "aux_brier/n_step_records": 55.1875, "aux_brier/std_r": 0.20657177781686187, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4692128316632004, "calib/avg_num_step_conf": 7.04296875, "calib/ece": 0.2539781746031746, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.09523809523809523, "calib/gap": -0.01952363395726997, "calib/mean_conf": 0.6211805555555555, "calib/mu_c": 0.6116511627906976, "calib/mu_w": 0.6311747967479676, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1816269841269841, "calib/std_conf": 0.21944376412643188, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2588.0, "completions/max_terminated_length": 2588.0, "completions/mean_length": 520.62890625, "completions/mean_terminated_length": 522.6705932617188, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.1792, "grad_norm": 0.05993538722395897, "learning_rate": 8.88888888888889e-07, "loss": 0.0317, "num_tokens": 36645602.0, "reward": 1.1635085344314575, "reward_std": 0.375709593296051, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6696591377258301, "rewards/format_reward_step": 0.984375, "step": 168 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.985440191732307e-08, "aux_brier/mean_r": 0.7509642671793699, "aux_brier/n_active_tok": 217.25, "aux_brier/n_step_records": 54.3125, "aux_brier/std_r": 0.22496945364400744, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5504725897920605, "calib/avg_num_step_conf": 7.07421875, "calib/ece": 0.17495652173913037, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.07905138339920949, "calib/gap": 0.022868985507246364, "calib/mean_conf": 0.5849644268774704, "calib/mu_c": 0.595359420289855, "calib/mu_w": 0.5724904347826086, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.10723320158102762, "calib/std_conf": 0.23726811804638298, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1772.0, "completions/max_terminated_length": 1772.0, "completions/mean_length": 487.30859375, "completions/mean_terminated_length": 487.30859375, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.18026666666666666, "grad_norm": 0.018199941143393517, "learning_rate": 8.611111111111112e-07, "loss": 0.0355, "num_tokens": 36874537.0, "reward": 1.2030644416809082, "reward_std": 0.3347136378288269, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.6950702667236328, "rewards/format_reward_step": 0.98046875, "step": 169 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 8.991255279200416e-08, "aux_brier/mean_r": 0.7418857179582119, "aux_brier/n_active_tok": 230.0, "aux_brier/n_step_records": 57.5, "aux_brier/std_r": 0.2211719653569162, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5159357470678225, "calib/avg_num_step_conf": 7.296875, "calib/ece": 0.2000043307086615, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.14173228346456693, "calib/gap": 0.008210211626721087, "calib/mean_conf": 0.6777122047244095, "calib/mu_c": 0.6811385135135136, "calib/mu_w": 0.6729283018867925, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.14751968503937016, "calib/std_conf": 0.20372021759209152, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1307.0, "completions/max_terminated_length": 1307.0, "completions/mean_length": 461.44921875, "completions/mean_terminated_length": 463.25885009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.18133333333333335, "grad_norm": 0.11392112821340561, "learning_rate": 8.333333333333333e-07, "loss": -0.0117, "num_tokens": 37096820.0, "reward": 1.2417938709259033, "reward_std": 0.3122642934322357, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.6937382221221924, "rewards/format_reward_step": 0.98046875, "step": 170 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.966358712112708e-08, "aux_brier/mean_r": 0.7842296324670315, "aux_brier/n_active_tok": 209.5, "aux_brier/n_step_records": 52.375, "aux_brier/std_r": 0.20828879345208406, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5413246268656716, "calib/avg_num_step_conf": 6.83984375, "calib/ece": 0.1887838582677165, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.06299212598425197, "calib/gap": 0.04228944029850745, "calib/mean_conf": 0.5617673228346456, "calib/mu_c": 0.5840775, "calib/mu_w": 0.5417880597014926, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1390551181102362, "calib/std_conf": 0.22709650987706656, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2554.0, "completions/max_terminated_length": 2554.0, "completions/mean_length": 453.734375, "completions/mean_terminated_length": 455.5137634277344, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.1824, "grad_norm": 0.013566054403781891, "learning_rate": 8.055555555555557e-07, "loss": 0.0654, "num_tokens": 37319872.0, "reward": 1.1388390064239502, "reward_std": 0.37724727392196655, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.7037935256958008, "rewards/format_reward_step": 0.98828125, "step": 171 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -8.25886247030061e-08, "aux_brier/mean_r": 0.778722807765007, "aux_brier/n_active_tok": 213.25, "aux_brier/n_step_records": 53.3125, "aux_brier/std_r": 0.1989540159702301, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4723503780597206, "calib/avg_num_step_conf": 6.95703125, "calib/ece": 0.19872078431372553, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.09019607843137255, "calib/gap": -0.02283692810457516, "calib/mean_conf": 0.6222203921568628, "calib/mu_c": 0.6130856209150326, "calib/mu_w": 0.6359225490196078, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11047058823529413, "calib/std_conf": 0.2067231369461721, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2433.0, "completions/max_terminated_length": 2433.0, "completions/mean_length": 448.63671875, "completions/mean_terminated_length": 448.63671875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.18346666666666667, "grad_norm": 0.013927267864346504, "learning_rate": 7.777777777777779e-07, "loss": 0.0018, "num_tokens": 37538075.0, "reward": 1.2694181203842163, "reward_std": 0.33193492889404297, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7026721239089966, "rewards/format_reward_step": 0.9921875, "step": 172 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.045277327002772e-08, "aux_brier/mean_r": 0.7391694243997335, "aux_brier/n_active_tok": 234.0, "aux_brier/n_step_records": 58.5, "aux_brier/std_r": 0.23577190446667373, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5733346193415638, "calib/avg_num_step_conf": 7.71484375, "calib/ece": 0.17457857142857142, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.15873015873015872, "calib/gap": 0.0566409722222222, "calib/mean_conf": 0.6194690476190476, "calib/mu_c": 0.6437437500000001, "calib/mu_w": 0.5871027777777779, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11130952380952379, "calib/std_conf": 0.23711986854294417, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2601.0, "completions/max_terminated_length": 2601.0, "completions/mean_length": 495.359375, "completions/mean_terminated_length": 497.302001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.18453333333333333, "grad_norm": 0.04449158161878586, "learning_rate": 7.5e-07, "loss": -0.0038, "num_tokens": 37768047.0, "reward": 1.2300063371658325, "reward_std": 0.319230318069458, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.7090877294540405, "rewards/format_reward_step": 0.98046875, "step": 173 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.6862715496456246e-08, "aux_brier/mean_r": 0.7858659084886312, "aux_brier/n_active_tok": 226.875, "aux_brier/n_step_records": 56.71875, "aux_brier/std_r": 0.19160686992108822, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5840610687022901, "calib/avg_num_step_conf": 7.35546875, "calib/ece": 0.17277539062500002, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.05859375, "calib/gap": 0.05416923969465637, "calib/mean_conf": 0.607458984375, "calib/mu_c": 0.6351783999999999, "calib/mu_w": 0.5810091603053436, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14597656250000002, "calib/std_conf": 0.20693023619968157, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1911.0, "completions/max_terminated_length": 1911.0, "completions/mean_length": 490.5234375, "completions/mean_terminated_length": 492.44708251953125, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.1856, "grad_norm": 0.02974775806069374, "learning_rate": 7.222222222222222e-07, "loss": 0.0056, "num_tokens": 37997853.0, "reward": 1.162726640701294, "reward_std": 0.3572935461997986, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.7134063243865967, "rewards/format_reward_step": 0.9921875, "step": 174 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.6371787203706134e-08, "aux_brier/mean_r": 0.7583804409950972, "aux_brier/n_active_tok": 234.0, "aux_brier/n_step_records": 58.5, "aux_brier/std_r": 0.22469458472914994, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4853681774874098, "calib/avg_num_step_conf": 7.6796875, "calib/ece": 0.2699258964143427, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.10756972111553785, "calib/gap": 0.008985626786443435, "calib/mean_conf": 0.5952533864541832, "calib/mu_c": 0.6009096774193549, "calib/mu_w": 0.5919240506329114, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24733067729083671, "calib/std_conf": 0.24697494368307288, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2452.0, "completions/max_terminated_length": 2452.0, "completions/mean_length": 492.9609375, "completions/mean_terminated_length": 498.80633544921875, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.18666666666666668, "grad_norm": 0.3764019310474396, "learning_rate": 6.944444444444446e-07, "loss": -0.0021, "num_tokens": 38229875.0, "reward": 1.0151591300964355, "reward_std": 0.31759583950042725, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.6465744972229004, "rewards/format_reward_step": 0.98046875, "step": 175 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.330966579286667e-08, "aux_brier/mean_r": 0.7834791298955679, "aux_brier/n_active_tok": 223.875, "aux_brier/n_step_records": 55.96875, "aux_brier/std_r": 0.19350098632276058, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5318801613212983, "calib/avg_num_step_conf": 7.0703125, "calib/ece": 0.16035680000000005, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.072, "calib/gap": 0.01819047436143617, "calib/mean_conf": 0.6228432, "calib/mu_c": 0.6317929133858265, "calib/mu_w": 0.6136024390243904, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13760000000000006, "calib/std_conf": 0.18823001815268467, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2496.0, "completions/max_terminated_length": 2496.0, "completions/mean_length": 476.21484375, "completions/mean_terminated_length": 479.9645690917969, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.18773333333333334, "grad_norm": 0.04411868751049042, "learning_rate": 6.666666666666667e-07, "loss": 0.0043, "num_tokens": 38455850.0, "reward": 1.1590666770935059, "reward_std": 0.3019905388355255, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6909544467926025, "rewards/format_reward_step": 0.97265625, "step": 176 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.4630474715348072e-08, "aux_brier/mean_r": 0.7609710041433573, "aux_brier/n_active_tok": 229.375, "aux_brier/n_step_records": 57.34375, "aux_brier/std_r": 0.21466213348321617, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5135368663594471, "calib/avg_num_step_conf": 7.90234375, "calib/ece": 0.17280240000000002, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.128, "calib/gap": 0.018543945212493473, "calib/mean_conf": 0.6188776, "calib/mu_c": 0.6280753968253968, "calib/mu_w": 0.6095314516129033, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14384000000000002, "calib/std_conf": 0.23478227799014134, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2973.0, "completions/max_terminated_length": 2973.0, "completions/mean_length": 484.671875, "completions/mean_terminated_length": 492.3651123046875, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.1888, "grad_norm": 0.10568588227033615, "learning_rate": 6.388888888888889e-07, "loss": 0.0012, "num_tokens": 38683758.0, "reward": 1.1491620540618896, "reward_std": 0.34768766164779663, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.6747732758522034, "rewards/format_reward_step": 0.9765625, "step": 177 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.234977790147255e-08, "aux_brier/mean_r": 0.7877587005496025, "aux_brier/n_active_tok": 218.0, "aux_brier/n_step_records": 54.5, "aux_brier/std_r": 0.1997723402455449, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5308825429642073, "calib/avg_num_step_conf": 7.171875, "calib/ece": 0.1564474103585657, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.09561752988047809, "calib/gap": 0.01923988241374852, "calib/mean_conf": 0.613273705179283, "calib/mu_c": 0.6216288732394366, "calib/mu_w": 0.6023889908256881, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10199203187250996, "calib/std_conf": 0.21049897811312543, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3061.0, "completions/max_terminated_length": 3061.0, "completions/mean_length": 473.12890625, "completions/mean_terminated_length": 476.8543395996094, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.18986666666666666, "grad_norm": 0.03318691626191139, "learning_rate": 6.111111111111112e-07, "loss": 0.0027, "num_tokens": 38910951.0, "reward": 1.2207213640213013, "reward_std": 0.37348127365112305, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7031975984573364, "rewards/format_reward_step": 0.98046875, "step": 178 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.3590677550311412e-08, "aux_brier/mean_r": 0.7528609000146389, "aux_brier/n_active_tok": 234.875, "aux_brier/n_step_records": 58.71875, "aux_brier/std_r": 0.21921817678958178, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5900310117586252, "calib/avg_num_step_conf": 7.60546875, "calib/ece": 0.14260239043824702, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.11155378486055777, "calib/gap": 0.0482013503036568, "calib/mean_conf": 0.6175968127490039, "calib/mu_c": 0.6385288732394366, "calib/mu_w": 0.5903275229357798, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.09723107569721118, "calib/std_conf": 0.2299492597543088, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1997.0, "completions/max_terminated_length": 1997.0, "completions/mean_length": 471.09375, "completions/mean_terminated_length": 476.67987060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.19093333333333334, "grad_norm": 0.07718180119991302, "learning_rate": 5.833333333333334e-07, "loss": -0.0319, "num_tokens": 39137815.0, "reward": 1.2231450080871582, "reward_std": 0.3644871115684509, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.7050795555114746, "rewards/format_reward_step": 0.9765625, "step": 179 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.4701016515239473e-09, "aux_brier/mean_r": 0.7343279607594013, "aux_brier/n_active_tok": 247.25, "aux_brier/n_step_records": 61.8125, "aux_brier/std_r": 0.23598173074424267, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5294979612640162, "calib/avg_num_step_conf": 8.06640625, "calib/ece": 0.2123094861660079, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.18181818181818182, "calib/gap": 0.015871839959225253, "calib/mean_conf": 0.6691924901185771, "calib/mu_c": 0.6760305555555556, "calib/mu_w": 0.6601587155963303, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.15616600790513835, "calib/std_conf": 0.23946849360566766, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2989.0, "completions/max_terminated_length": 2989.0, "completions/mean_length": 541.54296875, "completions/mean_terminated_length": 545.8070678710938, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.192, "grad_norm": 0.13195045292377472, "learning_rate": 5.555555555555555e-07, "loss": 0.0467, "num_tokens": 39380306.0, "reward": 1.2197463512420654, "reward_std": 0.3591662645339966, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6758601665496826, "rewards/format_reward_step": 0.9765625, "step": 180 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.2559889331641436e-08, "aux_brier/mean_r": 0.7783240135759115, "aux_brier/n_active_tok": 219.5, "aux_brier/n_step_records": 54.875, "aux_brier/std_r": 0.19999481202103198, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.524643928035982, "calib/avg_num_step_conf": 7.24609375, "calib/ece": 0.21105984251968501, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.08267716535433071, "calib/gap": 0.0017970639680159328, "calib/mean_conf": 0.6065779527559055, "calib/mu_c": 0.6075543103448277, "calib/mu_w": 0.6057572463768117, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.18047244094488185, "calib/std_conf": 0.1986199710496047, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2662.0, "completions/max_terminated_length": 2662.0, "completions/mean_length": 449.2890625, "completions/mean_terminated_length": 451.0509948730469, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.19306666666666666, "grad_norm": 0.06607302278280258, "learning_rate": 5.277777777777779e-07, "loss": 0.0726, "num_tokens": 39601588.0, "reward": 1.1180652379989624, "reward_std": 0.31925728917121887, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6831987500190735, "rewards/format_reward_step": 0.98828125, "step": 181 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.7544570075961374e-09, "aux_brier/mean_r": 0.7577127385884523, "aux_brier/n_active_tok": 231.125, "aux_brier/n_step_records": 57.78125, "aux_brier/std_r": 0.22663994459435344, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5823636136136136, "calib/avg_num_step_conf": 7.43359375, "calib/ece": 0.14662000000000003, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.15294117647058825, "calib/gap": 0.06783564189189206, "calib/mean_conf": 0.633615294117647, "calib/mu_c": 0.6631437500000001, "calib/mu_w": 0.5953081081081081, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10776470588235296, "calib/std_conf": 0.22659795086169118, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3013.0, "completions/max_terminated_length": 3013.0, "completions/mean_length": 497.4765625, "completions/mean_terminated_length": 497.4765625, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.19413333333333332, "grad_norm": 0.009706188924610615, "learning_rate": 5.000000000000001e-07, "loss": 0.0083, "num_tokens": 39835102.0, "reward": 1.2398512363433838, "reward_std": 0.2927827835083008, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.7250296473503113, "rewards/format_reward_step": 0.9921875, "step": 182 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -9.149970170430066e-09, "aux_brier/mean_r": 0.7672966662794352, "aux_brier/n_active_tok": 216.75, "aux_brier/n_step_records": 54.1875, "aux_brier/std_r": 0.20986755122430623, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5678553427419355, "calib/avg_num_step_conf": 6.921875, "calib/ece": 0.14822738095238097, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0873015873015873, "calib/gap": 0.05744493447580645, "calib/mean_conf": 0.5914154761904762, "calib/mu_c": 0.61968203125, "calib/mu_w": 0.5622370967741935, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1158531746031746, "calib/std_conf": 0.22001805093373136, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2938.0, "completions/max_terminated_length": 2938.0, "completions/mean_length": 514.3203125, "completions/mean_terminated_length": 514.3203125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.1952, "grad_norm": 0.011388537473976612, "learning_rate": 4.7222222222222226e-07, "loss": 0.0331, "num_tokens": 40073448.0, "reward": 1.1702120304107666, "reward_std": 0.3779938220977783, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.7120984196662903, "rewards/format_reward_step": 0.984375, "step": 183 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.084384265882422e-08, "aux_brier/mean_r": 0.7429876867681742, "aux_brier/n_active_tok": 245.5, "aux_brier/n_step_records": 61.375, "aux_brier/std_r": 0.21635166136547923, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.473652273888605, "calib/avg_num_step_conf": 7.9609375, "calib/ece": 0.2161431372549021, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.13333333333333333, "calib/gap": -0.03624070643842614, "calib/mean_conf": 0.6745627450980393, "calib/mu_c": 0.6599243421052632, "calib/mu_w": 0.6961650485436893, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1473137254901962, "calib/std_conf": 0.20710437205862595, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1243.0, "completions/max_terminated_length": 1243.0, "completions/mean_length": 493.31640625, "completions/mean_terminated_length": 495.2510070800781, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.19626666666666667, "grad_norm": 0.09844480454921722, "learning_rate": 4.444444444444445e-07, "loss": 0.0101, "num_tokens": 40305017.0, "reward": 1.262253999710083, "reward_std": 0.2725922167301178, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6896409392356873, "rewards/format_reward_step": 0.9921875, "step": 184 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -8.79858214064555e-10, "aux_brier/mean_r": 0.7613634951412678, "aux_brier/n_active_tok": 248.5, "aux_brier/n_step_records": 62.125, "aux_brier/std_r": 0.21437946753576398, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5550709939148073, "calib/avg_num_step_conf": 8.296875, "calib/ece": 0.13101943319838055, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.08502024291497975, "calib/gap": 0.036053191345503666, "calib/mean_conf": 0.6258226720647774, "calib/mu_c": 0.6407110344827586, "calib/mu_w": 0.6046578431372549, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.08489878542510121, "calib/std_conf": 0.22071358189963147, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2835.0, "completions/max_terminated_length": 2835.0, "completions/mean_length": 497.671875, "completions/mean_terminated_length": 505.57147216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.19733333333333333, "grad_norm": 0.15276029706001282, "learning_rate": 4.1666666666666667e-07, "loss": -0.0357, "num_tokens": 40539341.0, "reward": 1.225562572479248, "reward_std": 0.30816981196403503, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.6991252899169922, "rewards/format_reward_step": 0.9609375, "step": 185 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.979132911866248e-08, "aux_brier/mean_r": 0.7636740412563086, "aux_brier/n_active_tok": 235.5, "aux_brier/n_step_records": 58.875, "aux_brier/std_r": 0.22241042833775282, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.47173006774361653, "calib/avg_num_step_conf": 7.6875, "calib/ece": 0.22383715415019764, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.11857707509881422, "calib/gap": -0.020000729546638785, "calib/mean_conf": 0.6126055335968379, "calib/mu_c": 0.604621052631579, "calib/mu_w": 0.6246217821782177, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11782608695652177, "calib/std_conf": 0.24029222272348957, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 977.0, "completions/max_terminated_length": 977.0, "completions/mean_length": 469.46875, "completions/mean_terminated_length": 475.03558349609375, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.1984, "grad_norm": 0.0938546285033226, "learning_rate": 3.8888888888888895e-07, "loss": -0.0339, "num_tokens": 40764565.0, "reward": 1.253624677658081, "reward_std": 0.33846354484558105, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6785610914230347, "rewards/format_reward_step": 0.98046875, "step": 186 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.123711770318006e-08, "aux_brier/mean_r": 0.7306062150746584, "aux_brier/n_active_tok": 256.375, "aux_brier/n_step_records": 64.09375, "aux_brier/std_r": 0.23782278783619404, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5809750996666885, "calib/avg_num_step_conf": 9.01953125, "calib/ece": 0.14691719999999994, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.132, "calib/gap": 0.06617900137245936, "calib/mean_conf": 0.6267628, "calib/mu_c": 0.6550874125874127, "calib/mu_w": 0.5889084112149533, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10083999999999996, "calib/std_conf": 0.24310063993367026, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 539.796875, "completions/mean_terminated_length": 546.1976318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.19946666666666665, "grad_norm": 0.08663739264011383, "learning_rate": 3.611111111111111e-07, "loss": 0.0235, "num_tokens": 41004297.0, "reward": 1.223996639251709, "reward_std": 0.38673633337020874, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.708486795425415, "rewards/format_reward_step": 0.9765625, "step": 187 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.242779945620498e-08, "aux_brier/mean_r": 0.7603856101632118, "aux_brier/n_active_tok": 237.75, "aux_brier/n_step_records": 59.4375, "aux_brier/std_r": 0.21150804916396737, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.48428498271668163, "calib/avg_num_step_conf": 7.546875, "calib/ece": 0.19895256916996046, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.11462450592885376, "calib/gap": -0.013389706823710101, "calib/mean_conf": 0.6331422924901186, "calib/mu_c": 0.6274794520547945, "calib/mu_w": 0.6408691588785046, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1275098814229249, "calib/std_conf": 0.2146966045511938, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2598.0, "completions/max_terminated_length": 2598.0, "completions/mean_length": 504.74609375, "completions/mean_terminated_length": 508.720458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.20053333333333334, "grad_norm": 0.044322092086076736, "learning_rate": 3.3333333333333335e-07, "loss": 0.0025, "num_tokens": 41237584.0, "reward": 1.234865427017212, "reward_std": 0.3233446776866913, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.6894615888595581, "rewards/format_reward_step": 0.984375, "step": 188 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.70022790688418e-08, "aux_brier/mean_r": 0.7549281157553196, "aux_brier/n_active_tok": 226.75, "aux_brier/n_step_records": 56.6875, "aux_brier/std_r": 0.21891773724928498, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4905071355759429, "calib/avg_num_step_conf": 7.30078125, "calib/ece": 0.18193083003952562, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.11067193675889328, "calib/gap": -0.018132186544342677, "calib/mean_conf": 0.640203557312253, "calib/mu_c": 0.6323916666666666, "calib/mu_w": 0.6505238532110093, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1264822134387351, "calib/std_conf": 0.2101019621357078, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2203.0, "completions/max_terminated_length": 2203.0, "completions/mean_length": 469.953125, "completions/mean_terminated_length": 471.7961120605469, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.2016, "grad_norm": 0.07346808165311813, "learning_rate": 3.055555555555556e-07, "loss": 0.0019, "num_tokens": 41465660.0, "reward": 1.2287752628326416, "reward_std": 0.2613146901130676, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6885387897491455, "rewards/format_reward_step": 0.98828125, "step": 189 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.9795711431180862e-08, "aux_brier/mean_r": 0.7416869290173054, "aux_brier/n_active_tok": 245.75, "aux_brier/n_step_records": 61.4375, "aux_brier/std_r": 0.23273182520642877, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5646343017329255, "calib/avg_num_step_conf": 8.0703125, "calib/ece": 0.17744229249011856, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.14624505928853754, "calib/gap": 0.05935076452599397, "calib/mean_conf": 0.6187632411067193, "calib/mu_c": 0.6443333333333333, "calib/mu_w": 0.5849825688073393, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1135177865612648, "calib/std_conf": 0.24990314841853278, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1615.0, "completions/max_terminated_length": 1615.0, "completions/mean_length": 527.42578125, "completions/mean_terminated_length": 531.5787353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.20266666666666666, "grad_norm": 0.08770303428173065, "learning_rate": 2.7777777777777776e-07, "loss": -0.0053, "num_tokens": 41706289.0, "reward": 1.232055425643921, "reward_std": 0.33430615067481995, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.7094713449478149, "rewards/format_reward_step": 0.984375, "step": 190 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.038965662976011e-08, "aux_brier/mean_r": 0.7504640500992537, "aux_brier/n_active_tok": 246.125, "aux_brier/n_step_records": 61.53125, "aux_brier/std_r": 0.2225411895196885, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.590440031152648, "calib/avg_num_step_conf": 7.953125, "calib/ece": 0.271511155378486, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.1394422310756972, "calib/gap": 0.050895723001038395, "calib/mean_conf": 0.6472139442231075, "calib/mu_c": 0.6764130841121495, "calib/mu_w": 0.6255173611111111, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24621513944223108, "calib/std_conf": 0.22694470939041445, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2975.0, "completions/max_terminated_length": 2975.0, "completions/mean_length": 495.890625, "completions/mean_terminated_length": 497.8353271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.20373333333333332, "grad_norm": 0.026141246780753136, "learning_rate": 2.5000000000000004e-07, "loss": 0.0192, "num_tokens": 41937405.0, "reward": 1.077864646911621, "reward_std": 0.2893039584159851, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6630213260650635, "rewards/format_reward_step": 0.97265625, "step": 191 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -9.107425272270042e-09, "aux_brier/mean_r": 0.7753161191940308, "aux_brier/n_active_tok": 225.625, "aux_brier/n_step_records": 56.40625, "aux_brier/std_r": 0.20858300244435668, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5480855423229607, "calib/avg_num_step_conf": 7.4140625, "calib/ece": 0.17907091633466135, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0796812749003984, "calib/gap": 0.04229238698937132, "calib/mean_conf": 0.5884988047808766, "calib/mu_c": 0.607707299270073, "calib/mu_w": 0.5654149122807017, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.11087649402390437, "calib/std_conf": 0.23393689404832627, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3046.0, "completions/max_terminated_length": 3046.0, "completions/mean_length": 510.96875, "completions/mean_terminated_length": 512.9725952148438, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.2048, "grad_norm": 0.03855369985103607, "learning_rate": 2.2222222222222224e-07, "loss": 0.0562, "num_tokens": 42173189.0, "reward": 1.1960439682006836, "reward_std": 0.28442227840423584, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.6982383728027344, "rewards/format_reward_step": 0.97265625, "step": 192 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.864155766197808e-09, "aux_brier/mean_r": 0.7524569313973188, "aux_brier/n_active_tok": 247.75, "aux_brier/n_step_records": 61.9375, "aux_brier/std_r": 0.20505563402548432, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4885655533399801, "calib/avg_num_step_conf": 8.11328125, "calib/ece": 0.252634251968504, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.12598425196850394, "calib/gap": 0.0020106306081757452, "calib/mean_conf": 0.6454759842519685, "calib/mu_c": 0.6465525423728815, "calib/mu_w": 0.6445419117647058, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.21677165354330713, "calib/std_conf": 0.21947669942068607, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2512.0, "completions/max_terminated_length": 2512.0, "completions/mean_length": 536.2578125, "completions/mean_terminated_length": 536.2578125, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 0.20586666666666667, "grad_norm": 0.03138832002878189, "learning_rate": 1.9444444444444447e-07, "loss": 0.0711, "num_tokens": 42416183.0, "reward": 1.1206305027008057, "reward_std": 0.3979037404060364, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6622095108032227, "rewards/format_reward_step": 0.98828125, "step": 193 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.144103057599711e-08, "aux_brier/mean_r": 0.7848739847540855, "aux_brier/n_active_tok": 226.875, "aux_brier/n_step_records": 56.71875, "aux_brier/std_r": 0.19625219888985157, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.48969371930731453, "calib/avg_num_step_conf": 7.30078125, "calib/ece": 0.15639246031746032, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.06349206349206349, "calib/gap": -0.031080860687516254, "calib/mean_conf": 0.6051154761904762, "calib/mu_c": 0.5920417808219177, "calib/mu_w": 0.623122641509434, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.09107142857142857, "calib/std_conf": 0.2128361685462316, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2641.0, "completions/max_terminated_length": 2641.0, "completions/mean_length": 473.24609375, "completions/mean_terminated_length": 476.9724426269531, "completions/min_length": 0.0, "completions/min_terminated_length": 191.0, "epoch": 0.20693333333333333, "grad_norm": 0.0721108615398407, "learning_rate": 1.6666666666666668e-07, "loss": 0.0491, "num_tokens": 42643278.0, "reward": 1.2311301231384277, "reward_std": 0.324305921792984, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.6823331117630005, "rewards/format_reward_step": 0.98046875, "step": 194 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.1848507131966288e-08, "aux_brier/mean_r": 0.7506272029131651, "aux_brier/n_active_tok": 242.625, "aux_brier/n_step_records": 60.65625, "aux_brier/std_r": 0.22388874553143978, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5168831168831167, "calib/avg_num_step_conf": 7.890625, "calib/ece": 0.20170120000000002, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.156, "calib/gap": 0.005981623376623224, "calib/mean_conf": 0.6594588, "calib/mu_c": 0.6620907142857142, "calib/mu_w": 0.656109090909091, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.15058000000000002, "calib/std_conf": 0.22140749206510604, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2367.0, "completions/max_terminated_length": 2367.0, "completions/mean_length": 499.07421875, "completions/mean_terminated_length": 501.0314025878906, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.208, "grad_norm": 0.02382788434624672, "learning_rate": 1.3888888888888888e-07, "loss": 0.0274, "num_tokens": 42877025.0, "reward": 1.2027819156646729, "reward_std": 0.31919705867767334, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6783152222633362, "rewards/format_reward_step": 0.97265625, "step": 195 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.1521586858187494e-08, "aux_brier/mean_r": 0.7673561163246632, "aux_brier/n_active_tok": 226.125, "aux_brier/n_step_records": 56.53125, "aux_brier/std_r": 0.21260583796538413, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4740475742170338, "calib/avg_num_step_conf": 7.23046875, "calib/ece": 0.18918661417322838, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.08267716535433071, "calib/gap": -0.02016176489047894, "calib/mean_conf": 0.6183724409448819, "calib/mu_c": 0.6094028368794326, "calib/mu_w": 0.6295646017699116, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12622047244094492, "calib/std_conf": 0.21736828165677655, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1134.0, "completions/max_terminated_length": 1134.0, "completions/mean_length": 425.83203125, "completions/mean_terminated_length": 429.1850280761719, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.20906666666666668, "grad_norm": 0.06819652765989304, "learning_rate": 1.1111111111111112e-07, "loss": 0.0082, "num_tokens": 43088582.0, "reward": 1.2184810638427734, "reward_std": 0.2531457841396332, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6864246726036072, "rewards/format_reward_step": 0.9921875, "step": 196 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.148380794241849e-08, "aux_brier/mean_r": 0.7538832575082779, "aux_brier/n_active_tok": 242.25, "aux_brier/n_step_records": 60.5625, "aux_brier/std_r": 0.21373565820977092, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49071955250444954, "calib/avg_num_step_conf": 8.04296875, "calib/ece": 0.25523333333333337, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.10714285714285714, "calib/gap": -0.013332112890923065, "calib/mean_conf": 0.6437746031746032, "calib/mu_c": 0.6364736842105263, "calib/mu_w": 0.6498057971014494, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22331349206349205, "calib/std_conf": 0.2132599309868712, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2674.0, "completions/max_terminated_length": 2674.0, "completions/mean_length": 504.546875, "completions/mean_terminated_length": 506.5255126953125, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.21013333333333334, "grad_norm": 0.06600037962198257, "learning_rate": 8.333333333333334e-08, "loss": 0.0207, "num_tokens": 43322802.0, "reward": 1.1007957458496094, "reward_std": 0.33143335580825806, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6531826853752136, "rewards/format_reward_step": 0.984375, "step": 197 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.852925210631966e-08, "aux_brier/mean_r": 0.7748613730072975, "aux_brier/n_active_tok": 231.875, "aux_brier/n_step_records": 57.96875, "aux_brier/std_r": 0.20690141199156642, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5847468157812985, "calib/avg_num_step_conf": 7.375, "calib/ece": 0.13784882812500004, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0703125, "calib/gap": 0.04433693072382727, "calib/mean_conf": 0.5851199218750001, "calib/mu_c": 0.6043441379310345, "calib/mu_w": 0.5600072072072072, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07828125000000001, "calib/std_conf": 0.21049331907819877, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1237.0, "completions/max_terminated_length": 1237.0, "completions/mean_length": 448.53125, "completions/mean_terminated_length": 450.29022216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.2112, "grad_norm": 0.014602994546294212, "learning_rate": 5.555555555555556e-08, "loss": 0.0198, "num_tokens": 43543010.0, "reward": 1.249288558959961, "reward_std": 0.262395441532135, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.7315295934677124, "rewards/format_reward_step": 1.0, "step": 198 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.424231964643809e-08, "aux_brier/mean_r": 0.746009947732091, "aux_brier/n_active_tok": 247.375, "aux_brier/n_step_records": 61.84375, "aux_brier/std_r": 0.21845908579416573, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.45856922285126095, "calib/avg_num_step_conf": 8.15625, "calib/ece": 0.22041159999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.128, "calib/gap": -0.02590461914565112, "calib/mean_conf": 0.6251884, "calib/mu_c": 0.613168656716418, "calib/mu_w": 0.6390732758620691, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1548, "calib/std_conf": 0.2238250382451436, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2801.0, "completions/max_terminated_length": 2801.0, "completions/mean_length": 556.8984375, "completions/mean_terminated_length": 563.5020141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.21226666666666666, "grad_norm": 0.24404789507389069, "learning_rate": 2.777777777777778e-08, "loss": 0.0246, "num_tokens": 43789776.0, "reward": 1.172433853149414, "reward_std": 0.41650083661079407, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.658484935760498, "rewards/format_reward_step": 0.96875, "step": 199 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.940847843024841e-10, "aux_brier/mean_r": 0.7518376670777798, "aux_brier/n_active_tok": 238.0, "aux_brier/n_step_records": 59.5, "aux_brier/std_r": 0.22355738934129477, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5108237547892721, "calib/avg_num_step_conf": 7.89453125, "calib/ece": 0.17630316205533603, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.11067193675889328, "calib/gap": -0.007147790549169808, "calib/mean_conf": 0.6145664031620554, "calib/mu_c": 0.6115151724137932, "calib/mu_w": 0.618662962962963, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10887351778656132, "calib/std_conf": 0.22629195319484657, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2747.0, "completions/max_terminated_length": 2747.0, "completions/mean_length": 495.73828125, "completions/mean_terminated_length": 499.6417236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.21333333333333335, "grad_norm": 0.11620119959115982, "learning_rate": 0.0, "loss": -0.0097, "num_tokens": 44024733.0, "reward": 1.2305445671081543, "reward_std": 0.2702827453613281, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.6878036260604858, "rewards/format_reward_step": 0.984375, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.03209023250557948, "train_runtime": 14667.9702, "train_samples_per_second": 3.491, "train_steps_per_second": 0.014 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 44024733, "num_train_epochs": 1, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }