{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.0040498161688447, "learning_rate": 2.5000000000000004e-07, "loss": 0.0322, "num_tokens": 264685.0, "reward": 0.055236753076314926, "reward_std": 0.11281141638755798, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step": 0.04296875, "rewards/stepwise_brier_reward": 0.024703249335289, "step": 1 }, { "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.006203852593898773, "learning_rate": 5.000000000000001e-07, "loss": 0.0643, "num_tokens": 533467.0, "reward": 0.11156807839870453, "reward_std": 0.21452845633029938, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step": 0.08984375, "rewards/stepwise_brier_reward": 0.04943438619375229, "step": 2 }, { "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.5555555555555556, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.553853846153846, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.09887777777777784, "calib/mean_conf": 0.8615461538461537, "calib/mu_c": 0.9299999999999999, "calib/mu_w": 0.8311222222222221, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.08984375, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.553853846153846, "calib/std_conf": 0.24775902254900398, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2872.0, "completions/max_terminated_length": 2872.0, "completions/mean_length": 696.01953125, "completions/mean_terminated_length": 761.457275390625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.004721686244010925, "learning_rate": 7.5e-07, "loss": 0.0425, "num_tokens": 816904.0, "reward": 0.05670829862356186, "reward_std": 0.12842696905136108, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.02178749069571495, "rewards/format_reward_step": 0.04296875, "rewards/stepwise_brier_reward": 0.02535819262266159, "step": 3 }, { "calib/answer_extract_rate": 0.078125, "calib/auroc": 0.5375, "calib/avg_num_step_conf": 0.29296875, "calib/ece": 0.575, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.8571428571428571, "calib/gap": 0.139, "calib/mean_conf": 0.8607142857142858, "calib/mu_c": 0.96, "calib/mu_w": 0.821, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.575, "calib/std_conf": 0.27101566337744826, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3063.0, "completions/max_terminated_length": 3063.0, "completions/mean_length": 659.09375, "completions/mean_terminated_length": 724.1544799804688, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.004266666666666667, "grad_norm": 0.004437054041773081, "learning_rate": 1.0000000000000002e-06, "loss": 0.0262, "num_tokens": 1091800.0, "reward": 0.05978678539395332, "reward_std": 0.1366736888885498, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.0242253877222538, "rewards/format_reward_step": 0.046875, "rewards/stepwise_brier_reward": 0.027421751990914345, "step": 4 }, { "calib/answer_extract_rate": 0.05078125, "calib/auroc": 0.25, "calib/avg_num_step_conf": 0.16015625, "calib/ece": 0.7177777777777776, "calib/final_conf_rate": 0.03515625, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": 0.07999999999999996, "calib/mean_conf": 0.8288888888888889, "calib/mu_c": 0.9, "calib/mu_w": 0.8200000000000001, "calib/nonempty_final_conf_rate": 0.03515625, "calib/nonempty_reasoning_rate": 0.05078125, "calib/nonempty_step_conf_rate": 0.03125, "calib/pce": 0.7177777777777776, "calib/std_conf": 0.3111785641162194, "calib/step_conf_rate": 0.03125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3051.0, "completions/max_terminated_length": 3051.0, "completions/mean_length": 713.015625, "completions/mean_terminated_length": 786.77587890625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.002612976124510169, "learning_rate": 1.25e-06, "loss": 0.0354, "num_tokens": 1381020.0, "reward": 0.022981680929660797, "reward_std": 0.060142092406749725, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.006355468649417162, "rewards/format_reward_step": 0.0234375, "rewards/stepwise_brier_reward": 0.015258748084306717, "step": 5 }, { "calib/answer_extract_rate": 0.1171875, "calib/auroc": 0.531578947368421, "calib/avg_num_step_conf": 0.375, "calib/ece": 0.7041666666666667, "calib/final_conf_rate": 0.09375, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 0.875, "calib/gap": 0.05999999999999994, "calib/mean_conf": 0.9125, "calib/mu_c": 0.96, "calib/mu_w": 0.9, "calib/nonempty_final_conf_rate": 0.09375, "calib/nonempty_reasoning_rate": 0.1328125, "calib/nonempty_step_conf_rate": 0.08203125, "calib/pce": 0.7041666666666667, "calib/std_conf": 0.15476460620352875, "calib/step_conf_rate": 0.08203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3014.0, "completions/max_terminated_length": 3014.0, "completions/mean_length": 609.15234375, "completions/mean_terminated_length": 655.2227172851562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.003538590855896473, "learning_rate": 1.5e-06, "loss": 0.0299, "num_tokens": 1642915.0, "reward": 0.0765293687582016, "reward_std": 0.13818004727363586, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.02597109228372574, "rewards/format_reward_step": 0.0625, "rewards/stepwise_brier_reward": 0.03795885294675827, "step": 6 }, { "calib/answer_extract_rate": 0.1015625, "calib/auroc": 0.8125, "calib/avg_num_step_conf": 0.4921875, "calib/ece": 0.8647058823529413, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.05859375, "calib/frac_conf_gt_0.9": 0.7058823529411765, "calib/gap": 0.06000000000000005, "calib/mean_conf": 0.9235294117647058, "calib/mu_c": 0.98, "calib/mu_w": 0.9199999999999999, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.0859375, "calib/pce": 0.8647058823529413, "calib/std_conf": 0.10312420703270675, "calib/step_conf_rate": 0.0859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 3027.0, "completions/max_terminated_length": 3027.0, "completions/mean_length": 743.33984375, "completions/mean_terminated_length": 823.7879028320312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.004745212383568287, "learning_rate": 1.75e-06, "loss": 0.0266, "num_tokens": 1940634.0, "reward": 0.05298828333616257, "reward_std": 0.09799638390541077, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.011421484872698784, "rewards/format_reward_step": 0.05859375, "rewards/stepwise_brier_reward": 0.03646914288401604, "step": 7 }, { "calib/answer_extract_rate": 0.0859375, "calib/auroc": 0.8506493506493507, "calib/avg_num_step_conf": 0.3671875, "calib/ece": 0.4780555555555557, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.06640625, "calib/frac_conf_gt_0.9": 0.7222222222222222, "calib/gap": 0.1756493506493506, "calib/mean_conf": 0.8669444444444444, "calib/mu_c": 0.9742857142857143, "calib/mu_w": 0.7986363636363637, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.08203125, "calib/pce": 0.4780555555555557, "calib/std_conf": 0.24804311278983163, "calib/step_conf_rate": 0.08203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 3021.0, "completions/max_terminated_length": 3021.0, "completions/mean_length": 641.24609375, "completions/mean_terminated_length": 726.3672485351562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.004859757609665394, "learning_rate": 2.0000000000000003e-06, "loss": 0.0115, "num_tokens": 2211305.0, "reward": 0.092274010181427, "reward_std": 0.18566857278347015, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.03517138585448265, "rewards/format_reward_step": 0.06640625, "rewards/stepwise_brier_reward": 0.03704963997006416, "step": 8 }, { "calib/answer_extract_rate": 0.1015625, "calib/auroc": 0.926470588235294, "calib/avg_num_step_conf": 0.328125, "calib/ece": 0.8073684210526315, "calib/final_conf_rate": 0.07421875, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 0.8421052631578947, "calib/gap": 0.09205882352941175, "calib/mean_conf": 0.9126315789473682, "calib/mu_c": 0.995, "calib/mu_w": 0.9029411764705882, "calib/nonempty_final_conf_rate": 0.07421875, "calib/nonempty_reasoning_rate": 0.1015625, "calib/nonempty_step_conf_rate": 0.0625, "calib/pce": 0.8073684210526315, "calib/std_conf": 0.19086622775521844, "calib/step_conf_rate": 0.0625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 2959.0, "completions/max_terminated_length": 2959.0, "completions/mean_length": 589.00390625, "completions/mean_terminated_length": 658.4497680664062, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.0096, "grad_norm": 0.007626960054039955, "learning_rate": 2.25e-06, "loss": 0.0175, "num_tokens": 2469626.0, "reward": 0.05566889047622681, "reward_std": 0.12854988873004913, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.015421093441545963, "rewards/format_reward_step": 0.0625, "rewards/stepwise_brier_reward": 0.03537946939468384, "step": 9 }, { "calib/answer_extract_rate": 0.1171875, "calib/auroc": 0.49404761904761907, "calib/avg_num_step_conf": 0.40234375, "calib/ece": 0.7312000000000001, "calib/final_conf_rate": 0.09765625, "calib/format_rate": 0.08203125, "calib/frac_conf_gt_0.9": 0.84, "calib/gap": 0.06630952380952371, "calib/mean_conf": 0.8768, "calib/mu_c": 0.9325, "calib/mu_w": 0.8661904761904763, "calib/nonempty_final_conf_rate": 0.09765625, "calib/nonempty_reasoning_rate": 0.125, "calib/nonempty_step_conf_rate": 0.08984375, "calib/pce": 0.724, "calib/std_conf": 0.23929429579494785, "calib/step_conf_rate": 0.08984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 3026.0, "completions/max_terminated_length": 3026.0, "completions/mean_length": 666.9453125, "completions/mean_terminated_length": 708.4564819335938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.0056964014656841755, "learning_rate": 2.5e-06, "loss": 0.0414, "num_tokens": 2747164.0, "reward": 0.08224216103553772, "reward_std": 0.1923098862171173, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.02598554641008377, "rewards/format_reward_step": 0.08203125, "rewards/stepwise_brier_reward": 0.045170582830905914, "step": 10 }, { "calib/answer_extract_rate": 0.1875, "calib/auroc": 0.453125, "calib/avg_num_step_conf": 0.7421875, "calib/ece": 0.7353657894736841, "calib/final_conf_rate": 0.1484375, "calib/format_rate": 0.1171875, "calib/frac_conf_gt_0.9": 0.7105263157894737, "calib/gap": 0.04404479166666664, "calib/mean_conf": 0.8795763157894736, "calib/mu_c": 0.9166666666666666, "calib/mu_w": 0.872621875, "calib/nonempty_final_conf_rate": 0.1484375, "calib/nonempty_reasoning_rate": 0.23046875, "calib/nonempty_step_conf_rate": 0.171875, "calib/pce": 0.7285236842105263, "calib/std_conf": 0.20181194925006496, "calib/step_conf_rate": 0.171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 3025.0, "completions/max_terminated_length": 3025.0, "completions/mean_length": 707.6015625, "completions/mean_terminated_length": 739.3713989257812, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.011733333333333333, "grad_norm": 0.006040748208761215, "learning_rate": 2.7500000000000004e-06, "loss": 0.075, "num_tokens": 3032790.0, "reward": 0.11954192072153091, "reward_std": 0.18748214840888977, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.03433149307966232, "rewards/format_reward_step": 0.1171875, "rewards/stepwise_brier_reward": 0.06883619725704193, "step": 11 }, { "calib/answer_extract_rate": 0.19140625, "calib/auroc": 0.5789473684210527, "calib/avg_num_step_conf": 0.64453125, "calib/ece": 0.5094484848484849, "calib/final_conf_rate": 0.12890625, "calib/format_rate": 0.11328125, "calib/frac_conf_gt_0.9": 0.8181818181818182, "calib/gap": 0.05772481203007518, "calib/mean_conf": 0.912478787878788, "calib/mu_c": 0.9457142857142857, "calib/mu_w": 0.8879894736842106, "calib/nonempty_final_conf_rate": 0.12890625, "calib/nonempty_reasoning_rate": 0.23046875, "calib/nonempty_step_conf_rate": 0.15625, "calib/pce": 0.4988424242424243, "calib/std_conf": 0.17290449940265368, "calib/step_conf_rate": 0.15625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 2957.0, "completions/max_terminated_length": 2957.0, "completions/mean_length": 500.68359375, "completions/mean_terminated_length": 572.2098388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.007509665098041296, "learning_rate": 3e-06, "loss": 0.065, "num_tokens": 3265141.0, "reward": 0.17071044445037842, "reward_std": 0.330117404460907, "rewards/accuracy_reward_step": 0.0546875, "rewards/final_brier_reward_step": 0.0605458989739418, "rewards/format_reward_step": 0.11328125, "rewards/stepwise_brier_reward": 0.06760834902524948, "step": 12 }, { "calib/answer_extract_rate": 0.24609375, "calib/auroc": 0.4939271255060728, "calib/avg_num_step_conf": 1.32421875, "calib/ece": 0.6212941176470589, "calib/final_conf_rate": 0.19921875, "calib/format_rate": 0.1640625, "calib/frac_conf_gt_0.9": 0.7058823529411765, "calib/gap": 0.042352226720647757, "calib/mean_conf": 0.8530588235294121, "calib/mu_c": 0.8846153846153846, "calib/mu_w": 0.8422631578947368, "calib/nonempty_final_conf_rate": 0.19921875, "calib/nonempty_reasoning_rate": 0.296875, "calib/nonempty_step_conf_rate": 0.24609375, "calib/pce": 0.6097254901960785, "calib/std_conf": 0.24945976230723188, "calib/step_conf_rate": 0.24609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 3009.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 682.77734375, "completions/mean_terminated_length": 725.27392578125, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.013866666666666666, "grad_norm": 0.006930575706064701, "learning_rate": 3.2500000000000002e-06, "loss": 0.1169, "num_tokens": 3544524.0, "reward": 0.21188601851463318, "reward_std": 0.38299399614334106, "rewards/accuracy_reward_step": 0.05859375, "rewards/final_brier_reward_step": 0.06761399656534195, "rewards/format_reward_step": 0.1640625, "rewards/stepwise_brier_reward": 0.10024251788854599, "step": 13 }, { "calib/answer_extract_rate": 0.359375, "calib/auroc": 0.5449236298292902, "calib/avg_num_step_conf": 1.4375, "calib/ece": 0.6217297297297297, "calib/final_conf_rate": 0.2890625, "calib/format_rate": 0.234375, "calib/frac_conf_gt_0.9": 0.7297297297297297, "calib/gap": 0.04615633423180576, "calib/mean_conf": 0.9055135135135135, "calib/mu_c": 0.9385714285714284, "calib/mu_w": 0.8924150943396226, "calib/nonempty_final_conf_rate": 0.2890625, "calib/nonempty_reasoning_rate": 0.40625, "calib/nonempty_step_conf_rate": 0.30859375, "calib/pce": 0.6217297297297297, "calib/std_conf": 0.15345800052618, "calib/step_conf_rate": 0.30859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2908.0, "completions/max_terminated_length": 2908.0, "completions/mean_length": 591.48046875, "completions/mean_terminated_length": 615.5243530273438, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.014933333333333333, "grad_norm": 0.00944006908684969, "learning_rate": 3.5e-06, "loss": 0.1012, "num_tokens": 3801343.0, "reward": 0.2951757311820984, "reward_std": 0.4583866000175476, "rewards/accuracy_reward_step": 0.08203125, "rewards/final_brier_reward_step": 0.0882023349404335, "rewards/format_reward_step": 0.234375, "rewards/stepwise_brier_reward": 0.13156315684318542, "step": 14 }, { "calib/answer_extract_rate": 0.4296875, "calib/auroc": 0.5465277777777778, "calib/avg_num_step_conf": 1.7265625, "calib/ece": 0.6830434782608696, "calib/final_conf_rate": 0.359375, "calib/format_rate": 0.3203125, "calib/frac_conf_gt_0.9": 0.717391304347826, "calib/gap": 0.012222222222222356, "calib/mean_conf": 0.9004347826086957, "calib/mu_c": 0.9100000000000001, "calib/mu_w": 0.8977777777777778, "calib/nonempty_final_conf_rate": 0.359375, "calib/nonempty_reasoning_rate": 0.46484375, "calib/nonempty_step_conf_rate": 0.37109375, "calib/pce": 0.6830434782608696, "calib/std_conf": 0.16801858750356122, "calib/step_conf_rate": 0.37109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2582.0, "completions/max_terminated_length": 2582.0, "completions/mean_length": 517.28515625, "completions/mean_terminated_length": 529.7000122070312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.012086746282875538, "learning_rate": 3.7500000000000005e-06, "loss": 0.0694, "num_tokens": 4041648.0, "reward": 0.35359853506088257, "reward_std": 0.40532323718070984, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.1023242175579071, "rewards/format_reward_step": 0.3203125, "rewards/stepwise_brier_reward": 0.20269489288330078, "step": 15 }, { "calib/answer_extract_rate": 0.48046875, "calib/auroc": 0.45916666666666667, "calib/avg_num_step_conf": 2.203125, "calib/ece": 0.7230227272727273, "calib/final_conf_rate": 0.4296875, "calib/format_rate": 0.3515625, "calib/frac_conf_gt_0.9": 0.7636363636363637, "calib/gap": 0.005455555555555547, "calib/mean_conf": 0.8880863636363635, "calib/mu_c": 0.89255, "calib/mu_w": 0.8870944444444444, "calib/nonempty_final_conf_rate": 0.4296875, "calib/nonempty_reasoning_rate": 0.578125, "calib/nonempty_step_conf_rate": 0.4765625, "calib/pce": 0.7146454545454546, "calib/std_conf": 0.22062873853737872, "calib/step_conf_rate": 0.4765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2850.0, "completions/max_terminated_length": 2850.0, "completions/mean_length": 540.2578125, "completions/mean_terminated_length": 559.943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.017066666666666667, "grad_norm": 0.010672827251255512, "learning_rate": 4.000000000000001e-06, "loss": 0.1363, "num_tokens": 4288802.0, "reward": 0.3784298300743103, "reward_std": 0.49684762954711914, "rewards/accuracy_reward_step": 0.08203125, "rewards/final_brier_reward_step": 0.10257644206285477, "rewards/format_reward_step": 0.3515625, "rewards/stepwise_brier_reward": 0.21583032608032227, "step": 16 }, { "calib/answer_extract_rate": 0.6171875, "calib/auroc": 0.48190045248868774, "calib/avg_num_step_conf": 3.13671875, "calib/ece": 0.6582450331125829, "calib/final_conf_rate": 0.58984375, "calib/format_rate": 0.5, "calib/frac_conf_gt_0.9": 0.7019867549668874, "calib/gap": 0.004967320261437869, "calib/mean_conf": 0.8705629139072847, "calib/mu_c": 0.8744117647058822, "calib/mu_w": 0.8694444444444444, "calib/nonempty_final_conf_rate": 0.58984375, "calib/nonempty_reasoning_rate": 0.70703125, "calib/nonempty_step_conf_rate": 0.62890625, "calib/pce": 0.6518211920529803, "calib/std_conf": 0.2147164640849067, "calib/step_conf_rate": 0.62890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 3003.0, "completions/max_terminated_length": 3003.0, "completions/mean_length": 440.71875, "completions/mean_terminated_length": 451.2960205078125, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.018133333333333335, "grad_norm": 0.009812280535697937, "learning_rate": 4.25e-06, "loss": 0.1454, "num_tokens": 4505154.0, "reward": 0.5809885859489441, "reward_std": 0.6631735563278198, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.19295614957809448, "rewards/format_reward_step": 0.5, "rewards/stepwise_brier_reward": 0.3106856942176819, "step": 17 }, { "calib/answer_extract_rate": 0.65234375, "calib/auroc": 0.5294930875576036, "calib/avg_num_step_conf": 3.41015625, "calib/ece": 0.6736477987421385, "calib/final_conf_rate": 0.62109375, "calib/format_rate": 0.546875, "calib/frac_conf_gt_0.9": 0.7610062893081762, "calib/gap": 0.022638248847925868, "calib/mean_conf": 0.8937735849056603, "calib/mu_c": 0.9114285714285713, "calib/mu_w": 0.8887903225806454, "calib/nonempty_final_conf_rate": 0.62109375, "calib/nonempty_reasoning_rate": 0.734375, "calib/nonempty_step_conf_rate": 0.6640625, "calib/pce": 0.6736477987421385, "calib/std_conf": 0.18813443355326082, "calib/step_conf_rate": 0.6640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2636.0, "completions/max_terminated_length": 2636.0, "completions/mean_length": 445.26953125, "completions/mean_terminated_length": 455.9560241699219, "completions/min_length": 0.0, "completions/min_terminated_length": 26.0, "epoch": 0.0192, "grad_norm": 0.009522214531898499, "learning_rate": 4.5e-06, "loss": 0.154, "num_tokens": 4729863.0, "reward": 0.6203250885009766, "reward_std": 0.6184340715408325, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.19318124651908875, "rewards/format_reward_step": 0.546875, "rewards/stepwise_brier_reward": 0.3506191074848175, "step": 18 }, { "calib/answer_extract_rate": 0.85546875, "calib/auroc": 0.5221840833968981, "calib/avg_num_step_conf": 4.39453125, "calib/ece": 0.6783364055299539, "calib/final_conf_rate": 0.84765625, "calib/format_rate": 0.76953125, "calib/frac_conf_gt_0.9": 0.6866359447004609, "calib/gap": 0.015556318332061947, "calib/mean_conf": 0.8816543778801844, "calib/mu_c": 0.8939130434782608, "calib/mu_w": 0.8783567251461989, "calib/nonempty_final_conf_rate": 0.84765625, "calib/nonempty_reasoning_rate": 0.953125, "calib/nonempty_step_conf_rate": 0.91015625, "calib/pce": 0.6740046082949308, "calib/std_conf": 0.18007124906241606, "calib/step_conf_rate": 0.91015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3008.0, "completions/max_terminated_length": 3008.0, "completions/mean_length": 323.453125, "completions/mean_terminated_length": 327.2885437011719, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.020266666666666665, "grad_norm": 0.01093797292560339, "learning_rate": 4.75e-06, "loss": 0.0717, "num_tokens": 4917427.0, "reward": 0.8620531558990479, "reward_std": 0.6407575607299805, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.27811408042907715, "rewards/format_reward_step": 0.76953125, "rewards/stepwise_brier_reward": 0.5060364007949829, "step": 19 }, { "calib/answer_extract_rate": 0.86328125, "calib/auroc": 0.5285938654841094, "calib/avg_num_step_conf": 4.4765625, "calib/ece": 0.6103956521739131, "calib/final_conf_rate": 0.8984375, "calib/format_rate": 0.8203125, "calib/frac_conf_gt_0.9": 0.717391304347826, "calib/gap": 0.011736511456023613, "calib/mean_conf": 0.8793434782608696, "calib/mu_c": 0.8877121212121212, "calib/mu_w": 0.8759756097560976, "calib/nonempty_final_conf_rate": 0.8984375, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.92578125, "calib/pce": 0.6013913043478262, "calib/std_conf": 0.19598049886590194, "calib/step_conf_rate": 0.92578125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1490.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 270.5390625, "completions/mean_terminated_length": 272.6692810058594, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.021333333333333333, "grad_norm": 0.010510188527405262, "learning_rate": 5e-06, "loss": 0.0665, "num_tokens": 5091557.0, "reward": 1.0149328708648682, "reward_std": 0.7371776103973389, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.3442583680152893, "rewards/format_reward_step": 0.8203125, "rewards/stepwise_brier_reward": 0.5279731154441833, "step": 20 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4460788542834958, "calib/avg_num_step_conf": 4.90625, "calib/ece": 0.6471679166666667, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.7791666666666667, "calib/gap": -0.024085937365197263, "calib/mean_conf": 0.9161679166666669, "calib/mu_c": 0.8988059701492537, "calib/mu_w": 0.9228919075144509, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.6420845833333334, "calib/std_conf": 0.1259816409071565, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2262.0, "completions/max_terminated_length": 2262.0, "completions/mean_length": 283.921875, "completions/mean_terminated_length": 285.0353088378906, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.0224, "grad_norm": 0.008625438436865807, "learning_rate": 4.9722222222222224e-06, "loss": 0.0561, "num_tokens": 5267201.0, "reward": 1.100550889968872, "reward_std": 0.6891491413116455, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.343247652053833, "rewards/format_reward_step": 0.921875, "rewards/stepwise_brier_reward": 0.5980186462402344, "step": 21 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.49097638057218895, "calib/avg_num_step_conf": 4.65234375, "calib/ece": 0.603175732217573, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.698744769874477, "calib/gap": 0.020667664670658614, "calib/mean_conf": 0.8950585774058577, "calib/mu_c": 0.9095, "calib/mu_w": 0.8888323353293414, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.5984895397489538, "calib/std_conf": 0.1637264171260149, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1551.0, "completions/max_terminated_length": 1551.0, "completions/mean_length": 259.1953125, "completions/mean_terminated_length": 259.1953125, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 0.023466666666666667, "grad_norm": 0.008928249590098858, "learning_rate": 4.944444444444445e-06, "loss": -0.0152, "num_tokens": 5435371.0, "reward": 1.1315412521362305, "reward_std": 0.7189508676528931, "rewards/accuracy_reward_step": 0.28515625, "rewards/final_brier_reward_step": 0.3857576251029968, "rewards/format_reward_step": 0.9140625, "rewards/stepwise_brier_reward": 0.6013451814651489, "step": 22 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4944663692518874, "calib/avg_num_step_conf": 4.55859375, "calib/ece": 0.64112, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.716, "calib/gap": 0.018452299245024117, "calib/mean_conf": 0.87064, "calib/mu_c": 0.8845161290322581, "calib/mu_w": 0.866063829787234, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.63188, "calib/std_conf": 0.2155801252434927, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1238.0, "completions/max_terminated_length": 1238.0, "completions/mean_length": 262.15234375, "completions/mean_terminated_length": 262.15234375, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.024533333333333334, "grad_norm": 0.008457643911242485, "learning_rate": 4.9166666666666665e-06, "loss": -0.0006, "num_tokens": 5606418.0, "reward": 1.0740993022918701, "reward_std": 0.6285284757614136, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.36159294843673706, "rewards/format_reward_step": 0.9296875, "rewards/stepwise_brier_reward": 0.6223044991493225, "step": 23 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.4774484536082475, "calib/avg_num_step_conf": 4.9296875, "calib/ece": 0.6777235772357724, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.6951219512195121, "calib/gap": 0.02600713719270409, "calib/mean_conf": 0.8891056910569107, "calib/mu_c": 0.9096153846153846, "calib/mu_w": 0.8836082474226805, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.6777235772357724, "calib/std_conf": 0.16910336536862294, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2538.0, "completions/max_terminated_length": 2538.0, "completions/mean_length": 281.55859375, "completions/mean_terminated_length": 281.55859375, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.0256, "grad_norm": 0.008437077514827251, "learning_rate": 4.888888888888889e-06, "loss": 0.0738, "num_tokens": 5783009.0, "reward": 1.0127873420715332, "reward_std": 0.5980093479156494, "rewards/accuracy_reward_step": 0.203125, "rewards/final_brier_reward_step": 0.33446913957595825, "rewards/format_reward_step": 0.93359375, "rewards/stepwise_brier_reward": 0.6307430863380432, "step": 24 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5340866571877573, "calib/avg_num_step_conf": 5.234375, "calib/ece": 0.5391618852459015, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.6229508196721312, "calib/gap": 0.04392161191349242, "calib/mean_conf": 0.8685430327868853, "calib/mu_c": 0.8975240963855422, "calib/mu_w": 0.8536024844720498, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.5337704918032785, "calib/std_conf": 0.19359585140432667, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 280.28515625, "completions/mean_terminated_length": 281.38433837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.02666666666666667, "grad_norm": 0.007779384031891823, "learning_rate": 4.861111111111111e-06, "loss": -0.0124, "num_tokens": 5957986.0, "reward": 1.2369064092636108, "reward_std": 0.6545590162277222, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.44903552532196045, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.6470277309417725, "step": 25 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49043005671077505, "calib/avg_num_step_conf": 5.14453125, "calib/ece": 0.6145981554677207, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.6442687747035574, "calib/gap": -0.015380434782608643, "calib/mean_conf": 0.8832147562582344, "calib/mu_c": 0.8720289855072464, "calib/mu_w": 0.887409420289855, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.6125428194993412, "calib/std_conf": 0.1627337088137336, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 716.0, "completions/max_terminated_length": 716.0, "completions/mean_length": 275.27734375, "completions/mean_terminated_length": 276.35687255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.027733333333333332, "grad_norm": 0.008365216664969921, "learning_rate": 4.833333333333333e-06, "loss": -0.0109, "num_tokens": 6133697.0, "reward": 1.1731226444244385, "reward_std": 0.5622432231903076, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.3909871578216553, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7155659198760986, "step": 26 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4591040589111949, "calib/avg_num_step_conf": 5.33203125, "calib/ece": 0.6002741935483872, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.5282258064516129, "calib/gap": -0.0024202682563337907, "calib/mean_conf": 0.8306774193548387, "calib/mu_c": 0.8288524590163934, "calib/mu_w": 0.8312727272727272, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.592491935483871, "calib/std_conf": 0.21991851674255056, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3055.0, "completions/max_terminated_length": 3055.0, "completions/mean_length": 327.75, "completions/mean_terminated_length": 327.75, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.0288, "grad_norm": 0.007940828800201416, "learning_rate": 4.805555555555556e-06, "loss": 0.071, "num_tokens": 6322817.0, "reward": 1.118673324584961, "reward_std": 0.5638723373413086, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.4043359160423279, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.7266073822975159, "step": 27 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5241018171729781, "calib/avg_num_step_conf": 5.2890625, "calib/ece": 0.44848605577689243, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.4063745019920319, "calib/gap": 0.028539325842696583, "calib/mean_conf": 0.7801195219123506, "calib/mu_c": 0.7985393258426966, "calib/mu_w": 0.77, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.4370119521912351, "calib/std_conf": 0.22883645070809805, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1562.0, "completions/max_terminated_length": 1562.0, "completions/mean_length": 319.4375, "completions/mean_terminated_length": 319.4375, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.029866666666666666, "grad_norm": 0.0069170789793133736, "learning_rate": 4.777777777777778e-06, "loss": 0.0388, "num_tokens": 6511537.0, "reward": 1.3392189741134644, "reward_std": 0.617000937461853, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.5301804542541504, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.7876328229904175, "step": 28 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.460865561694291, "calib/avg_num_step_conf": 6.0078125, "calib/ece": 0.46424291497975717, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.32793522267206476, "calib/gap": -0.0018699146157710178, "calib/mean_conf": 0.7272793522267207, "calib/mu_c": 0.725909090909091, "calib/mu_w": 0.727779005524862, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.4621578947368422, "calib/std_conf": 0.2607661223114103, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2038.0, "completions/max_terminated_length": 2038.0, "completions/mean_length": 396.26171875, "completions/mean_terminated_length": 397.8157043457031, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.030933333333333334, "grad_norm": 0.005927573889493942, "learning_rate": 4.75e-06, "loss": 0.0884, "num_tokens": 6720108.0, "reward": 1.1746200323104858, "reward_std": 0.6304316520690918, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.4880704879760742, "rewards/format_reward_step": 0.9296875, "rewards/stepwise_brier_reward": 0.7807222604751587, "step": 29 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5158426966292136, "calib/avg_num_step_conf": 6.0234375, "calib/ece": 0.35924901185770747, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.22529644268774704, "calib/gap": 0.020675655430711593, "calib/mean_conf": 0.6333201581027668, "calib/mu_c": 0.6478666666666667, "calib/mu_w": 0.6271910112359551, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3480632411067193, "calib/std_conf": 0.2838165294692594, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1814.0, "completions/max_terminated_length": 1814.0, "completions/mean_length": 370.265625, "completions/mean_terminated_length": 370.265625, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.032, "grad_norm": 0.005835330579429865, "learning_rate": 4.722222222222222e-06, "loss": 0.0146, "num_tokens": 6921880.0, "reward": 1.2952570915222168, "reward_std": 0.52005535364151, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.5941238403320312, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8525295257568359, "step": 30 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.47856310356310355, "calib/avg_num_step_conf": 6.13671875, "calib/ece": 0.36245967741935486, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.13709677419354838, "calib/gap": -0.019958374958375047, "calib/mean_conf": 0.5747983870967742, "calib/mu_c": 0.5601515151515152, "calib/mu_w": 0.5801098901098902, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.33556451612903226, "calib/std_conf": 0.277652819874921, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2377.0, "completions/max_terminated_length": 2377.0, "completions/mean_length": 390.66796875, "completions/mean_terminated_length": 390.66796875, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.03306666666666667, "grad_norm": 0.005797600839287043, "learning_rate": 4.694444444444445e-06, "loss": 0.0471, "num_tokens": 7127803.0, "reward": 1.2438130378723145, "reward_std": 0.5065557956695557, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.6011230945587158, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.8506916165351868, "step": 31 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5199475623582767, "calib/avg_num_step_conf": 5.71875, "calib/ece": 0.23920634920634917, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.07142857142857142, "calib/gap": 0.018214285714285627, "calib/mean_conf": 0.4847619047619047, "calib/mu_c": 0.4969047619047619, "calib/mu_w": 0.4786904761904763, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19531746031746028, "calib/std_conf": 0.2714210525274376, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2415.0, "completions/max_terminated_length": 2415.0, "completions/mean_length": 369.02734375, "completions/mean_terminated_length": 369.02734375, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.034133333333333335, "grad_norm": 0.006477469112724066, "learning_rate": 4.666666666666667e-06, "loss": 0.0744, "num_tokens": 7328978.0, "reward": 1.3680706024169922, "reward_std": 0.4866165518760681, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.6714035272598267, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8790041208267212, "step": 32 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5659700452803901, "calib/avg_num_step_conf": 5.7890625, "calib/ece": 0.1629746031746032, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.051587301587301584, "calib/gap": 0.052554231974921484, "calib/mean_conf": 0.42996190476190477, "calib/mu_c": 0.46437241379310334, "calib/mu_w": 0.41181818181818186, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.12384920634920635, "calib/std_conf": 0.24329715806022356, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1542.0, "completions/max_terminated_length": 1542.0, "completions/mean_length": 387.45703125, "completions/mean_terminated_length": 387.45703125, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.0352, "grad_norm": 0.005612695589661598, "learning_rate": 4.638888888888889e-06, "loss": 0.0391, "num_tokens": 7535039.0, "reward": 1.3998472690582275, "reward_std": 0.5582293272018433, "rewards/accuracy_reward_step": 0.33984375, "rewards/final_brier_reward_step": 0.716796875, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8825923204421997, "step": 33 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4201649175412294, "calib/avg_num_step_conf": 5.58984375, "calib/ece": 0.2755590551181103, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.04330708661417323, "calib/gap": -0.06942753623188402, "calib/mean_conf": 0.37822047244094487, "calib/mu_c": 0.34049999999999997, "calib/mu_w": 0.409927536231884, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.09854330708661418, "calib/std_conf": 0.23812968455758163, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1212.0, "completions/max_terminated_length": 1212.0, "completions/mean_length": 342.25, "completions/mean_terminated_length": 343.5921630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.03626666666666667, "grad_norm": 0.006945520173758268, "learning_rate": 4.611111111111112e-06, "loss": 0.005, "num_tokens": 7727767.0, "reward": 1.5494495630264282, "reward_std": 0.5377414226531982, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6412307024002075, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8846927285194397, "step": 34 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4798036465638149, "calib/avg_num_step_conf": 6.08984375, "calib/ece": 0.2348582995951417, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.032388663967611336, "calib/gap": -0.018398316970547002, "calib/mean_conf": 0.398502024291498, "calib/mu_c": 0.3869565217391305, "calib/mu_w": 0.4053548387096775, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.13044534412955466, "calib/std_conf": 0.23782708777620928, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2501.0, "completions/max_terminated_length": 2501.0, "completions/mean_length": 460.26171875, "completions/mean_terminated_length": 460.26171875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.037333333333333336, "grad_norm": 0.005454621743410826, "learning_rate": 4.583333333333333e-06, "loss": 0.1171, "num_tokens": 7954850.0, "reward": 1.403791904449463, "reward_std": 0.6024346947669983, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.6721000075340271, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.8493179082870483, "step": 35 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5275575447570333, "calib/avg_num_step_conf": 5.6640625, "calib/ece": 0.2941195219123506, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.01195219123505976, "calib/gap": 0.02005012787723781, "calib/mean_conf": 0.28787250996015934, "calib/mu_c": 0.29705882352941176, "calib/mu_w": 0.27700869565217395, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.020079681274900396, "calib/std_conf": 0.2075100911511454, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1636.0, "completions/max_terminated_length": 1636.0, "completions/mean_length": 356.33984375, "completions/mean_terminated_length": 357.7372741699219, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.0384, "grad_norm": 0.0070097423158586025, "learning_rate": 4.555555555555556e-06, "loss": 0.0373, "num_tokens": 8148785.0, "reward": 1.6728756427764893, "reward_std": 0.44604384899139404, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.6388723254203796, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.888568103313446, "step": 36 }, { "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.4613553113553114, "calib/avg_num_step_conf": 5.44921875, "calib/ece": 0.22573651452282156, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.008298755186721992, "calib/gap": -0.027555128205128204, "calib/mean_conf": 0.25061203319502073, "calib/mu_c": 0.23346153846153847, "calib/mu_w": 0.2610166666666667, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04937759336099585, "calib/std_conf": 0.1813629127488925, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2560.0, "completions/max_terminated_length": 2560.0, "completions/mean_length": 440.265625, "completions/mean_terminated_length": 441.9921875, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.039466666666666664, "grad_norm": 0.005869188345968723, "learning_rate": 4.527777777777778e-06, "loss": 0.1705, "num_tokens": 8368589.0, "reward": 1.3879544734954834, "reward_std": 0.45529523491859436, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.6618225574493408, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.8509328961372375, "step": 37 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.48789722785665984, "calib/avg_num_step_conf": 5.08203125, "calib/ece": 0.20344129554655874, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.004048582995951417, "calib/gap": 0.001409060175794452, "calib/mean_conf": 0.27113360323886637, "calib/mu_c": 0.2719607843137255, "calib/mu_w": 0.27055172413793105, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.030809716599190285, "calib/std_conf": 0.18329289619049413, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2715.0, "completions/max_terminated_length": 2715.0, "completions/mean_length": 392.77734375, "completions/mean_terminated_length": 394.3176574707031, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.04053333333333333, "grad_norm": 0.0068012080155313015, "learning_rate": 4.5e-06, "loss": 0.1799, "num_tokens": 8576028.0, "reward": 1.4690444469451904, "reward_std": 0.450836718082428, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.6724566221237183, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.8755960464477539, "step": 38 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.511598357985274, "calib/avg_num_step_conf": 4.78515625, "calib/ece": 0.23355436507936506, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.007936507936507936, "calib/gap": -0.0040859646836515495, "calib/mean_conf": 0.23882658730158732, "calib/mu_c": 0.2364106796116505, "calib/mu_w": 0.24049664429530204, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.031825396825396826, "calib/std_conf": 0.165378487399972, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2279.0, "completions/max_terminated_length": 2279.0, "completions/mean_length": 358.375, "completions/mean_terminated_length": 358.375, "completions/min_length": 73.0, "completions/min_terminated_length": 73.0, "epoch": 0.0416, "grad_norm": 0.007424731273204088, "learning_rate": 4.472222222222223e-06, "loss": 0.1441, "num_tokens": 8773860.0, "reward": 1.4880990982055664, "reward_std": 0.42358171939849854, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.6842081546783447, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9010008573532104, "step": 39 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5260904739432346, "calib/avg_num_step_conf": 4.2890625, "calib/ece": 0.15598425196850393, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006416773410638482, "calib/mean_conf": 0.24346456692913387, "calib/mu_c": 0.24758241758241764, "calib/mu_w": 0.24116564417177916, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.020590551181102368, "calib/std_conf": 0.1314478906528968, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2257.0, "completions/max_terminated_length": 2257.0, "completions/mean_length": 358.2890625, "completions/mean_terminated_length": 358.2890625, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 0.042666666666666665, "grad_norm": 0.007885615341365337, "learning_rate": 4.444444444444444e-06, "loss": 0.054, "num_tokens": 8972342.0, "reward": 1.439058780670166, "reward_std": 0.41545605659484863, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.7350699305534363, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9117902517318726, "step": 40 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48192687815329327, "calib/avg_num_step_conf": 3.69921875, "calib/ece": 0.42384, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.017163591125855332, "calib/mean_conf": 0.23304000000000002, "calib/mu_c": 0.22679245283018865, "calib/mu_w": 0.24395604395604398, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.01044, "calib/std_conf": 0.13034246583519893, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2443.0, "completions/max_terminated_length": 2443.0, "completions/mean_length": 322.546875, "completions/mean_terminated_length": 322.546875, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.04373333333333333, "grad_norm": 0.008142044767737389, "learning_rate": 4.416666666666667e-06, "loss": 0.0904, "num_tokens": 9162162.0, "reward": 1.7881989479064941, "reward_std": 0.5068119168281555, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.5658527612686157, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.9150683879852295, "step": 41 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.533406318423522, "calib/avg_num_step_conf": 3.50390625, "calib/ece": 0.2625223097112861, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.007689750808049234, "calib/mean_conf": 0.2148005249343832, "calib/mu_c": 0.21900869565217396, "calib/mu_w": 0.21131894484412472, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.012283464566929131, "calib/std_conf": 0.13146999005453217, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2084.0, "completions/max_terminated_length": 2084.0, "completions/mean_length": 275.0078125, "completions/mean_terminated_length": 275.0078125, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.0448, "grad_norm": 0.009993243031203747, "learning_rate": 4.388888888888889e-06, "loss": 0.0874, "num_tokens": 9336932.0, "reward": 1.5709275007247925, "reward_std": 0.37025392055511475, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6732515692710876, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.938583493232727, "step": 42 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5023479980227384, "calib/avg_num_step_conf": 3.33984375, "calib/ece": 0.23219607843137252, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003823529411764698, "calib/mean_conf": 0.24678431372549017, "calib/mu_c": 0.2488235294117647, "calib/mu_w": 0.245, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.006156862745098041, "calib/std_conf": 0.12070646842090392, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2040.0, "completions/max_terminated_length": 2040.0, "completions/mean_length": 305.984375, "completions/mean_terminated_length": 305.984375, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.04586666666666667, "grad_norm": 0.00938818883150816, "learning_rate": 4.361111111111112e-06, "loss": -0.006, "num_tokens": 9520488.0, "reward": 1.5983197689056396, "reward_std": 0.46041882038116455, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6835503578186035, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9362914562225342, "step": 43 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5968408011563081, "calib/avg_num_step_conf": 2.80859375, "calib/ece": 0.09551181102362206, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0395691375868952, "calib/mean_conf": 0.2628346456692914, "calib/mu_c": 0.2888505747126437, "calib/mu_w": 0.2492814371257485, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.007913385826771657, "calib/std_conf": 0.1133866469786607, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1992.0, "completions/max_terminated_length": 1992.0, "completions/mean_length": 298.0625, "completions/mean_terminated_length": 298.0625, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 0.046933333333333334, "grad_norm": 0.009420789778232574, "learning_rate": 4.333333333333334e-06, "loss": 0.0713, "num_tokens": 9703112.0, "reward": 1.4322527647018433, "reward_std": 0.35099905729293823, "rewards/accuracy_reward_step": 0.33984375, "rewards/final_brier_reward_step": 0.7673734426498413, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9381999969482422, "step": 44 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5263662511984659, "calib/avg_num_step_conf": 2.49609375, "calib/ece": 0.17224409448818898, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.0072860338766378985, "calib/mean_conf": 0.27153543307086614, "calib/mu_c": 0.27580952380952384, "calib/mu_w": 0.26852348993288594, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.015196850393700778, "calib/std_conf": 0.1375016274936232, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2572.0, "completions/max_terminated_length": 2572.0, "completions/mean_length": 291.578125, "completions/mean_terminated_length": 292.7215881347656, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.048, "grad_norm": 0.010626120492815971, "learning_rate": 4.305555555555556e-06, "loss": 0.0287, "num_tokens": 9882804.0, "reward": 1.5220348834991455, "reward_std": 0.42932039499282837, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.7141109704971313, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9365284442901611, "step": 45 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4949874686716792, "calib/avg_num_step_conf": 2.73046875, "calib/ece": 0.21122529644268775, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005614661654135322, "calib/mean_conf": 0.273201581027668, "calib/mu_c": 0.27025, "calib/mu_w": 0.2758646616541353, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.005059288537549407, "calib/std_conf": 0.1139222867938069, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2797.0, "completions/max_terminated_length": 2797.0, "completions/mean_length": 320.91796875, "completions/mean_terminated_length": 320.91796875, "completions/min_length": 73.0, "completions/min_terminated_length": 73.0, "epoch": 0.04906666666666667, "grad_norm": 0.010083785280585289, "learning_rate": 4.277777777777778e-06, "loss": 0.0758, "num_tokens": 10069727.0, "reward": 1.592787265777588, "reward_std": 0.49180591106414795, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6757726669311523, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9297513961791992, "step": 46 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6044896640826873, "calib/avg_num_step_conf": 2.13671875, "calib/ece": 0.24012048192771088, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.028325581395348853, "calib/mean_conf": 0.2546586345381526, "calib/mu_c": 0.2693333333333333, "calib/mu_w": 0.24100775193798446, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.00642570281124498, "calib/std_conf": 0.1037605364841029, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2751.0, "completions/max_terminated_length": 2751.0, "completions/mean_length": 315.53125, "completions/mean_terminated_length": 316.7686462402344, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.050133333333333335, "grad_norm": 0.010103489272296429, "learning_rate": 4.25e-06, "loss": 0.0558, "num_tokens": 10256479.0, "reward": 1.5880870819091797, "reward_std": 0.44787827134132385, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6811476945877075, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.9212011098861694, "step": 47 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5208504238446814, "calib/avg_num_step_conf": 1.8515625, "calib/ece": 0.14577689243027883, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006519688269073093, "calib/mean_conf": 0.24880478087649405, "calib/mu_c": 0.2529347826086957, "calib/mu_w": 0.24641509433962258, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.014023904382470117, "calib/std_conf": 0.09218553598516582, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1606.0, "completions/max_terminated_length": 1606.0, "completions/mean_length": 260.640625, "completions/mean_terminated_length": 262.6929016113281, "completions/min_length": 0.0, "completions/min_terminated_length": 75.0, "epoch": 0.0512, "grad_norm": 0.012790133245289326, "learning_rate": 4.222222222222223e-06, "loss": 0.05, "num_tokens": 10426891.0, "reward": 1.4405946731567383, "reward_std": 0.4182698130607605, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.7264277338981628, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.9343888759613037, "step": 48 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4991228070175438, "calib/avg_num_step_conf": 2.27734375, "calib/ece": 0.20771653543307084, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0014010025062656806, "calib/mean_conf": 0.2632283464566929, "calib/mu_c": 0.26245614035087717, "calib/mu_w": 0.26385714285714285, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.011062992125984257, "calib/std_conf": 0.09765665299222448, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2046.0, "completions/max_terminated_length": 2046.0, "completions/mean_length": 268.12109375, "completions/mean_terminated_length": 270.2322692871094, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.05226666666666667, "grad_norm": 0.01150052435696125, "learning_rate": 4.194444444444445e-06, "loss": 0.044, "num_tokens": 10600066.0, "reward": 1.5686070919036865, "reward_std": 0.315078467130661, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6947246193885803, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9390791654586792, "step": 49 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5348462301587302, "calib/avg_num_step_conf": 1.9140625, "calib/ece": 0.2641732283464567, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008423859126984151, "calib/mean_conf": 0.2451181102362205, "calib/mu_c": 0.24929687500000003, "calib/mu_w": 0.24087301587301588, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.002677165354330709, "calib/std_conf": 0.09437624602489562, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1302.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 290.515625, "completions/mean_terminated_length": 291.6549072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.05333333333333334, "grad_norm": 0.012038069777190685, "learning_rate": 4.166666666666667e-06, "loss": -0.0097, "num_tokens": 10779798.0, "reward": 1.6445038318634033, "reward_std": 0.4098130464553833, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6655043363571167, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9437607526779175, "step": 50 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6397450532724506, "calib/avg_num_step_conf": 2.0546875, "calib/ece": 0.18196850393700786, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.048555301877219625, "calib/mean_conf": 0.24496062992125983, "calib/mu_c": 0.2728703703703703, "calib/mu_w": 0.2243150684931507, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0008661417322834646, "calib/std_conf": 0.09215165535059676, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2313.0, "completions/max_terminated_length": 2313.0, "completions/mean_length": 284.765625, "completions/mean_terminated_length": 284.765625, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 0.0544, "grad_norm": 0.012309834361076355, "learning_rate": 4.138888888888889e-06, "loss": 0.0642, "num_tokens": 10961994.0, "reward": 1.550361156463623, "reward_std": 0.3914136290550232, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.7325843572616577, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.953235387802124, "step": 51 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6822704081632652, "calib/avg_num_step_conf": 2.05859375, "calib/ece": 0.31448412698412687, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05291071428571423, "calib/mean_conf": 0.24805555555555558, "calib/mu_c": 0.2715714285714285, "calib/mu_w": 0.2186607142857143, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.003492063492063492, "calib/std_conf": 0.09835042672703444, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2819.0, "completions/max_terminated_length": 2819.0, "completions/mean_length": 284.51171875, "completions/mean_terminated_length": 285.6274719238281, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.055466666666666664, "grad_norm": 0.012607217766344547, "learning_rate": 4.111111111111111e-06, "loss": 0.0793, "num_tokens": 11142781.0, "reward": 1.711374044418335, "reward_std": 0.4108741581439972, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6605429649353027, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9427658319473267, "step": 52 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6280425640372117, "calib/avg_num_step_conf": 2.05859375, "calib/ece": 0.28673306772908363, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03888619854721545, "calib/mean_conf": 0.24314741035856574, "calib/mu_c": 0.2614285714285714, "calib/mu_w": 0.22254237288135595, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.09202921642913564, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2553.0, "completions/max_terminated_length": 2553.0, "completions/mean_length": 310.7265625, "completions/mean_terminated_length": 311.94512939453125, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.05653333333333333, "grad_norm": 0.013702677562832832, "learning_rate": 4.083333333333334e-06, "loss": 0.0836, "num_tokens": 11328151.0, "reward": 1.6691620349884033, "reward_std": 0.46202585101127625, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.6626269221305847, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9437087774276733, "step": 53 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6580402494695559, "calib/avg_num_step_conf": 2.38671875, "calib/ece": 0.3486614173228347, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04364174114318775, "calib/mean_conf": 0.25448818897637804, "calib/mu_c": 0.2721854304635762, "calib/mu_w": 0.22854368932038843, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.004330708661417323, "calib/std_conf": 0.08621259806561991, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1577.0, "completions/max_terminated_length": 1577.0, "completions/mean_length": 302.8125, "completions/mean_terminated_length": 302.8125, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 0.0576, "grad_norm": 0.01350472867488861, "learning_rate": 4.055555555555556e-06, "loss": 0.0647, "num_tokens": 11511903.0, "reward": 1.7834925651550293, "reward_std": 0.2862309515476227, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6496000289916992, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9453080892562866, "step": 54 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6802901292967518, "calib/avg_num_step_conf": 1.87890625, "calib/ece": 0.15003906249999993, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.044869757174392955, "calib/mean_conf": 0.26886718750000005, "calib/mu_c": 0.29533333333333334, "calib/mu_w": 0.2504635761589404, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.004375, "calib/std_conf": 0.09419418698008834, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 906.0, "completions/max_terminated_length": 906.0, "completions/mean_length": 272.95703125, "completions/mean_terminated_length": 274.0274658203125, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.058666666666666666, "grad_norm": 0.014819178730249405, "learning_rate": 4.027777777777779e-06, "loss": 0.0191, "num_tokens": 11689604.0, "reward": 1.5323195457458496, "reward_std": 0.35186684131622314, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.7427335977554321, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.949044942855835, "step": 55 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6308443604284815, "calib/avg_num_step_conf": 2.00390625, "calib/ece": 0.1781818181818182, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.043971014492753646, "calib/mean_conf": 0.28679841897233205, "calib/mu_c": 0.3107826086956522, "calib/mu_w": 0.26681159420289857, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.005217391304347826, "calib/std_conf": 0.09575385768898549, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2281.0, "completions/max_terminated_length": 2281.0, "completions/mean_length": 305.90625, "completions/mean_terminated_length": 305.90625, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 0.05973333333333333, "grad_norm": 0.013396499678492546, "learning_rate": 4.000000000000001e-06, "loss": 0.04, "num_tokens": 11874756.0, "reward": 1.5809111595153809, "reward_std": 0.42818328738212585, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.7240632772445679, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9355190396308899, "step": 56 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.680624286258089, "calib/avg_num_step_conf": 1.69921875, "calib/ece": 0.251699604743083, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05620733409465811, "calib/mean_conf": 0.3095652173913044, "calib/mu_c": 0.3342253521126761, "calib/mu_w": 0.278018018018018, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.08552678368165743, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2786.0, "completions/max_terminated_length": 2786.0, "completions/mean_length": 284.23828125, "completions/mean_terminated_length": 284.23828125, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.0608, "grad_norm": 0.01563195139169693, "learning_rate": 3.972222222222223e-06, "loss": 0.0421, "num_tokens": 12054313.0, "reward": 1.7328195571899414, "reward_std": 0.4466170072555542, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7000335454940796, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9343697428703308, "step": 57 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5945839874411304, "calib/avg_num_step_conf": 1.86328125, "calib/ece": 0.08501992031872514, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02580913134484558, "calib/mean_conf": 0.3405577689243028, "calib/mu_c": 0.35567307692307687, "calib/mu_w": 0.3298639455782313, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.005617529880478088, "calib/std_conf": 0.09560297033906312, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1508.0, "completions/max_terminated_length": 1508.0, "completions/mean_length": 308.59765625, "completions/mean_terminated_length": 312.2569274902344, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.06186666666666667, "grad_norm": 0.011491983197629452, "learning_rate": 3.944444444444445e-06, "loss": 0.037, "num_tokens": 12239634.0, "reward": 1.5119364261627197, "reward_std": 0.47350311279296875, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.7385351657867432, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9185852408409119, "step": 58 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5770900520962541, "calib/avg_num_step_conf": 1.6484375, "calib/ece": 0.08125490196078433, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.022528528901017142, "calib/mean_conf": 0.3803921568627451, "calib/mu_c": 0.39267241379310347, "calib/mu_w": 0.37014388489208633, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0033725490196078456, "calib/std_conf": 0.08218966072835476, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 950.0, "completions/max_terminated_length": 950.0, "completions/mean_length": 285.0, "completions/mean_terminated_length": 286.11767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.06293333333333333, "grad_norm": 0.013964063487946987, "learning_rate": 3.916666666666667e-06, "loss": 0.0, "num_tokens": 12418842.0, "reward": 1.5842854976654053, "reward_std": 0.39020073413848877, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.7388551235198975, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9107863903045654, "step": 59 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6187847027263085, "calib/avg_num_step_conf": 1.96484375, "calib/ece": 0.09157480314960624, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03150040551500405, "calib/mean_conf": 0.3807874015748031, "calib/mu_c": 0.39777777777777784, "calib/mu_w": 0.3662773722627738, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.005866141732283469, "calib/std_conf": 0.08282993010738718, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1115.0, "completions/max_terminated_length": 1115.0, "completions/mean_length": 322.37890625, "completions/mean_terminated_length": 323.6431579589844, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.064, "grad_norm": 0.013174435123801231, "learning_rate": 3.88888888888889e-06, "loss": 0.0401, "num_tokens": 12610227.0, "reward": 1.5859081745147705, "reward_std": 0.4918421506881714, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.7384917736053467, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9098280668258667, "step": 60 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6021793852676206, "calib/avg_num_step_conf": 2.19140625, "calib/ece": 0.17679999999999996, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.020727344992050867, "calib/mean_conf": 0.4208, "calib/mu_c": 0.4292567567567568, "calib/mu_w": 0.4085294117647059, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0028, "calib/std_conf": 0.07755875192394474, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 851.0, "completions/max_terminated_length": 851.0, "completions/mean_length": 296.55859375, "completions/mean_terminated_length": 298.8937072753906, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.06506666666666666, "grad_norm": 0.014439865946769714, "learning_rate": 3.861111111111112e-06, "loss": -0.0231, "num_tokens": 12790210.0, "reward": 1.7645703554153442, "reward_std": 0.41605138778686523, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7159687876701355, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8969999551773071, "step": 61 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5541274201988488, "calib/avg_num_step_conf": 2.1953125, "calib/ece": 0.05904382470119514, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010890894819466312, "calib/mean_conf": 0.42573705179282867, "calib/mu_c": 0.4321153846153847, "calib/mu_w": 0.42122448979591837, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03521912350597604, "calib/std_conf": 0.0894146223105681, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1442.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 363.63671875, "completions/mean_terminated_length": 365.0627746582031, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.06613333333333334, "grad_norm": 0.012277968227863312, "learning_rate": 3.833333333333334e-06, "loss": 0.0016, "num_tokens": 12990381.0, "reward": 1.510021686553955, "reward_std": 0.48530229926109314, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.7397617101669312, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.901887834072113, "step": 62 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5801273634453782, "calib/avg_num_step_conf": 2.19140625, "calib/ece": 0.0829554655870445, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.029581144957983252, "calib/mean_conf": 0.43315789473684213, "calib/mu_c": 0.4484873949579833, "calib/mu_w": 0.41890625000000004, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.017165991902834014, "calib/std_conf": 0.08432962226115784, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2682.0, "completions/max_terminated_length": 2682.0, "completions/mean_length": 399.46875, "completions/mean_terminated_length": 401.0353088378906, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.0672, "grad_norm": 0.010792006738483906, "learning_rate": 3.8055555555555556e-06, "loss": 0.0388, "num_tokens": 13201285.0, "reward": 1.5720921754837036, "reward_std": 0.5513401031494141, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.7202058434486389, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.8728504180908203, "step": 63 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5626911314984709, "calib/avg_num_step_conf": 2.18359375, "calib/ece": 0.09184426229508194, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.033306829765545276, "calib/mean_conf": 0.46356557377049185, "calib/mu_c": 0.47844444444444445, "calib/mu_w": 0.4451376146788992, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.0010655737704918034, "calib/std_conf": 0.09594222029173002, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3028.0, "completions/max_terminated_length": 3028.0, "completions/mean_length": 380.2890625, "completions/mean_terminated_length": 381.7804260253906, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.06826666666666667, "grad_norm": 0.012527639046311378, "learning_rate": 3.777777777777778e-06, "loss": 0.0231, "num_tokens": 13402415.0, "reward": 1.65690279006958, "reward_std": 0.5391913652420044, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.707330048084259, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.8499686121940613, "step": 64 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5567651142244099, "calib/avg_num_step_conf": 2.4453125, "calib/ece": 0.08656126482213439, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013242458664647128, "calib/mean_conf": 0.47604743083003953, "calib/mu_c": 0.48201438848920863, "calib/mu_w": 0.4687719298245615, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.00660079051383399, "calib/std_conf": 0.08234333383939904, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1972.0, "completions/max_terminated_length": 1972.0, "completions/mean_length": 341.32421875, "completions/mean_terminated_length": 342.6627502441406, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.06933333333333333, "grad_norm": 0.013505609706044197, "learning_rate": 3.7500000000000005e-06, "loss": -0.012, "num_tokens": 13594818.0, "reward": 1.7095803022384644, "reward_std": 0.30704957246780396, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7325637340545654, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8870077133178711, "step": 65 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6078224101479915, "calib/avg_num_step_conf": 2.47265625, "calib/ece": 0.039399999999999956, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03412582484464094, "calib/mean_conf": 0.47892000000000007, "calib/mu_c": 0.49652892561983475, "calib/mu_w": 0.4624031007751938, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.017159999999999998, "calib/std_conf": 0.10243453323952818, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2699.0, "completions/max_terminated_length": 2699.0, "completions/mean_length": 424.5078125, "completions/mean_terminated_length": 427.85040283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.0704, "grad_norm": 0.012787997722625732, "learning_rate": 3.7222222222222225e-06, "loss": 0.0821, "num_tokens": 13809844.0, "reward": 1.5854953527450562, "reward_std": 0.48220348358154297, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.7278234362602234, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.8641577959060669, "step": 66 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6565740622199463, "calib/avg_num_step_conf": 2.828125, "calib/ece": 0.05438735177865616, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.06275380873127645, "calib/mean_conf": 0.5335968379446641, "calib/mu_c": 0.5601369863013699, "calib/mu_w": 0.4973831775700934, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.005454545454545452, "calib/std_conf": 0.11480223375604197, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1222.0, "completions/max_terminated_length": 1222.0, "completions/mean_length": 399.72265625, "completions/mean_terminated_length": 402.8700866699219, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.07146666666666666, "grad_norm": 0.011641331948339939, "learning_rate": 3.694444444444445e-06, "loss": -0.0325, "num_tokens": 14017181.0, "reward": 1.7519131898880005, "reward_std": 0.28649717569351196, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.7556644678115845, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8691759705543518, "step": 67 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5697100362454692, "calib/avg_num_step_conf": 2.859375, "calib/ece": 0.1192094861660079, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.027567804024496767, "calib/mean_conf": 0.5072727272727273, "calib/mu_c": 0.5211111111111111, "calib/mu_w": 0.4935433070866143, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.06422924901185773, "calib/std_conf": 0.10645227886321025, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1862.0, "completions/max_terminated_length": 1862.0, "completions/mean_length": 415.78515625, "completions/mean_terminated_length": 419.0590515136719, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.07253333333333334, "grad_norm": 0.01155012845993042, "learning_rate": 3.6666666666666666e-06, "loss": 0.0184, "num_tokens": 14227710.0, "reward": 1.6360442638397217, "reward_std": 0.3913213014602661, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.7412973046302795, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8810046911239624, "step": 68 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5540638606676344, "calib/avg_num_step_conf": 3.01953125, "calib/ece": 0.08558232931726908, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.008032128514056224, "calib/gap": 0.010903813167964183, "calib/mean_conf": 0.511285140562249, "calib/mu_c": 0.5175471698113208, "calib/mu_w": 0.5066433566433566, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.08558232931726908, "calib/std_conf": 0.1164637723119289, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2306.0, "completions/max_terminated_length": 2306.0, "completions/mean_length": 479.484375, "completions/mean_terminated_length": 485.16998291015625, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.0736, "grad_norm": 0.010634348727762699, "learning_rate": 3.638888888888889e-06, "loss": 0.0674, "num_tokens": 14454954.0, "reward": 1.4911199808120728, "reward_std": 0.4545786380767822, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.706221878528595, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.8598206043243408, "step": 69 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6284987277353689, "calib/avg_num_step_conf": 3.15625, "calib/ece": 0.029043824701195244, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05819147582697187, "calib/mean_conf": 0.5035458167330678, "calib/mu_c": 0.5339166666666666, "calib/mu_w": 0.4757251908396947, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.027250996015936294, "calib/std_conf": 0.11774346181943768, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2717.0, "completions/max_terminated_length": 2717.0, "completions/mean_length": 476.625, "completions/mean_terminated_length": 478.494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.07466666666666667, "grad_norm": 0.010567680932581425, "learning_rate": 3.6111111111111115e-06, "loss": 0.0672, "num_tokens": 14683962.0, "reward": 1.5975029468536377, "reward_std": 0.410000205039978, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.7483577728271484, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8760287165641785, "step": 70 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6081660986999874, "calib/avg_num_step_conf": 3.4921875, "calib/ece": 0.10715415019762846, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03986684336741142, "calib/mean_conf": 0.5569565217391306, "calib/mu_c": 0.5788596491228071, "calib/mu_w": 0.5389928057553957, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10675889328063241, "calib/std_conf": 0.10736433707912163, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2378.0, "completions/max_terminated_length": 2378.0, "completions/mean_length": 473.33984375, "completions/mean_terminated_length": 475.19610595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.07573333333333333, "grad_norm": 0.0112195685505867, "learning_rate": 3.5833333333333335e-06, "loss": 0.021, "num_tokens": 14909545.0, "reward": 1.567267894744873, "reward_std": 0.528032660484314, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.7405582070350647, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8800756931304932, "step": 71 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6617248123146804, "calib/avg_num_step_conf": 3.53515625, "calib/ece": 0.05758730158730162, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.015873015873015872, "calib/gap": 0.06981692006813456, "calib/mean_conf": 0.5544920634920636, "calib/mu_c": 0.5880152671755725, "calib/mu_w": 0.518198347107438, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04611904761904761, "calib/std_conf": 0.14388952833887278, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2528.0, "completions/max_terminated_length": 2528.0, "completions/mean_length": 461.5859375, "completions/mean_terminated_length": 461.5859375, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 0.0768, "grad_norm": 0.012084675952792168, "learning_rate": 3.555555555555556e-06, "loss": 0.0357, "num_tokens": 15132119.0, "reward": 1.665956974029541, "reward_std": 0.4454992413520813, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.7514150738716125, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8733506798744202, "step": 72 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5393425605536333, "calib/avg_num_step_conf": 3.7578125, "calib/ece": 0.1385098039215686, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": 0.01276470588235279, "calib/mean_conf": 0.5752156862745098, "calib/mu_c": 0.5794705882352942, "calib/mu_w": 0.5667058823529414, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.02352941176470588, "calib/std_conf": 0.1267506914898553, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2663.0, "completions/max_terminated_length": 2663.0, "completions/mean_length": 454.59765625, "completions/mean_terminated_length": 454.59765625, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.07786666666666667, "grad_norm": 0.010945611633360386, "learning_rate": 3.5277777777777784e-06, "loss": 0.0264, "num_tokens": 15355528.0, "reward": 1.8979418277740479, "reward_std": 0.4913485646247864, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.7496898174285889, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8811398148536682, "step": 73 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5702736318407959, "calib/avg_num_step_conf": 3.68359375, "calib/ece": 0.061653225806451606, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.016129032258064516, "calib/gap": 0.04322204765645454, "calib/mean_conf": 0.5732661290322582, "calib/mu_c": 0.593134328358209, "calib/mu_w": 0.5499122807017545, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.047298387096774176, "calib/std_conf": 0.12196731508700083, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3026.0, "completions/max_terminated_length": 3026.0, "completions/mean_length": 460.96875, "completions/mean_terminated_length": 466.434814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.07893333333333333, "grad_norm": 0.0119440583512187, "learning_rate": 3.5e-06, "loss": 0.0263, "num_tokens": 15577464.0, "reward": 1.656386375427246, "reward_std": 0.3758211135864258, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.7251109480857849, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.8457469344139099, "step": 74 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5690184049079754, "calib/avg_num_step_conf": 4.08984375, "calib/ece": 0.05826086956521743, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.033805725971370104, "calib/mean_conf": 0.6194466403162056, "calib/mu_c": 0.6314723926380369, "calib/mu_w": 0.5976666666666668, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01671936758893279, "calib/std_conf": 0.12093055743654747, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1472.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 442.0859375, "completions/mean_terminated_length": 443.81964111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 177.0, "epoch": 0.08, "grad_norm": 0.011777788400650024, "learning_rate": 3.4722222222222224e-06, "loss": -0.0068, "num_tokens": 15795390.0, "reward": 1.8508899211883545, "reward_std": 0.4994873106479645, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.7589175701141357, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8555798530578613, "step": 75 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6155647382920111, "calib/avg_num_step_conf": 4.125, "calib/ece": 0.050395256916996076, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.023715415019762844, "calib/gap": 0.04662878787878788, "calib/mean_conf": 0.6134782608695653, "calib/mu_c": 0.6296969696969698, "calib/mu_w": 0.5830681818181819, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0058498023715415045, "calib/std_conf": 0.1204264348357623, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3035.0, "completions/max_terminated_length": 3035.0, "completions/mean_length": 487.5625, "completions/mean_terminated_length": 489.47454833984375, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.08106666666666666, "grad_norm": 0.011100761592388153, "learning_rate": 3.444444444444445e-06, "loss": 0.0342, "num_tokens": 16023262.0, "reward": 1.8673896789550781, "reward_std": 0.3366937041282654, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.7664664387702942, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8671547174453735, "step": 76 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5162422037422038, "calib/avg_num_step_conf": 4.3671875, "calib/ece": 0.12722222222222224, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.031746031746031744, "calib/gap": 0.010109147609147384, "calib/mean_conf": 0.6042063492063492, "calib/mu_c": 0.6083783783783783, "calib/mu_w": 0.5982692307692309, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.07206349206349204, "calib/std_conf": 0.13086921360961365, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2711.0, "completions/max_terminated_length": 2711.0, "completions/mean_length": 515.22265625, "completions/mean_terminated_length": 521.33203125, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.08213333333333334, "grad_norm": 0.010618972592055798, "learning_rate": 3.416666666666667e-06, "loss": -0.0007, "num_tokens": 16259823.0, "reward": 1.756075143814087, "reward_std": 0.45902228355407715, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.7307425737380981, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8638701438903809, "step": 77 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5577266922094508, "calib/avg_num_step_conf": 4.9140625, "calib/ece": 0.09317667984189726, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.03162055335968379, "calib/gap": 0.011251621966794412, "calib/mean_conf": 0.6279300395256917, "calib/mu_c": 0.6327331034482759, "calib/mu_w": 0.6214814814814815, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0739920948616601, "calib/std_conf": 0.15163950974534543, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1562.0, "completions/max_terminated_length": 1562.0, "completions/mean_length": 539.32421875, "completions/mean_terminated_length": 541.4392700195312, "completions/min_length": 0.0, "completions/min_terminated_length": 221.0, "epoch": 0.0832, "grad_norm": 0.010913941077888012, "learning_rate": 3.3888888888888893e-06, "loss": 0.0031, "num_tokens": 16505914.0, "reward": 1.7397754192352295, "reward_std": 0.5139069557189941, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.7233127355575562, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8686016798019409, "step": 78 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6401579986833443, "calib/avg_num_step_conf": 4.875, "calib/ece": 0.09833992094861659, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.07114624505928854, "calib/gap": 0.06974522712310727, "calib/mean_conf": 0.6607905138339921, "calib/mu_c": 0.6878064516129032, "calib/mu_w": 0.618061224489796, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0732411067193676, "calib/std_conf": 0.1553936973459181, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1514.0, "completions/max_terminated_length": 1514.0, "completions/mean_length": 532.37109375, "completions/mean_terminated_length": 534.4588623046875, "completions/min_length": 0.0, "completions/min_terminated_length": 223.0, "epoch": 0.08426666666666667, "grad_norm": 0.010380618274211884, "learning_rate": 3.3611111111111117e-06, "loss": 0.0087, "num_tokens": 16748577.0, "reward": 1.799513339996338, "reward_std": 0.4986400902271271, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.7558000087738037, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8485037088394165, "step": 79 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6805725971370142, "calib/avg_num_step_conf": 5.2421875, "calib/ece": 0.07553359683794474, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.1067193675889328, "calib/gap": 0.08949488752556234, "calib/mean_conf": 0.70399209486166, "calib/mu_c": 0.7358282208588957, "calib/mu_w": 0.6463333333333333, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06762845849802375, "calib/std_conf": 0.14632072607194188, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 486.0078125, "completions/mean_terminated_length": 487.91375732421875, "completions/min_length": 0.0, "completions/min_terminated_length": 247.0, "epoch": 0.08533333333333333, "grad_norm": 0.01169226597994566, "learning_rate": 3.3333333333333333e-06, "loss": -0.0085, "num_tokens": 16975155.0, "reward": 1.8564797639846802, "reward_std": 0.43828481435775757, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.7751379013061523, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8382811546325684, "step": 80 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5273809523809525, "calib/avg_num_step_conf": 4.95703125, "calib/ece": 0.1275708502024292, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.032388663967611336, "calib/gap": 0.005582312925169974, "calib/mean_conf": 0.6635222672064778, "calib/mu_c": 0.6657823129251701, "calib/mu_w": 0.6602000000000001, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.09797570850202432, "calib/std_conf": 0.13415254516637715, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2979.0, "completions/max_terminated_length": 2979.0, "completions/mean_length": 538.74609375, "completions/mean_terminated_length": 545.1343994140625, "completions/min_length": 0.0, "completions/min_terminated_length": 224.0, "epoch": 0.0864, "grad_norm": 0.011217792518436909, "learning_rate": 3.3055555555555558e-06, "loss": 0.0133, "num_tokens": 17219322.0, "reward": 1.7309539318084717, "reward_std": 0.40973007678985596, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.7130863666534424, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.8357298374176025, "step": 81 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5857142857142857, "calib/avg_num_step_conf": 5.0625, "calib/ece": 0.07434782608695656, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.03162055335968379, "calib/gap": 0.03528442728442727, "calib/mean_conf": 0.6445454545454545, "calib/mu_c": 0.6591891891891892, "calib/mu_w": 0.623904761904762, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0669565217391304, "calib/std_conf": 0.1319431055862712, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2933.0, "completions/max_terminated_length": 2933.0, "completions/mean_length": 523.328125, "completions/mean_terminated_length": 523.328125, "completions/min_length": 253.0, "completions/min_terminated_length": 253.0, "epoch": 0.08746666666666666, "grad_norm": 0.010938949882984161, "learning_rate": 3.277777777777778e-06, "loss": 0.0451, "num_tokens": 17458846.0, "reward": 1.768110752105713, "reward_std": 0.41400328278541565, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7428593635559082, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8686458468437195, "step": 82 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.527170868347339, "calib/avg_num_step_conf": 5.05859375, "calib/ece": 0.12930314960629918, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.05511811023622047, "calib/gap": 0.020962029256146764, "calib/mean_conf": 0.6515866141732283, "calib/mu_c": 0.6614074074074073, "calib/mu_w": 0.6404453781512606, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.12469685039370072, "calib/std_conf": 0.14549304708457206, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2630.0, "completions/max_terminated_length": 2630.0, "completions/mean_length": 587.734375, "completions/mean_terminated_length": 590.0392456054688, "completions/min_length": 0.0, "completions/min_terminated_length": 274.0, "epoch": 0.08853333333333334, "grad_norm": 0.010783905163407326, "learning_rate": 3.2500000000000002e-06, "loss": 0.0689, "num_tokens": 17716570.0, "reward": 1.6735692024230957, "reward_std": 0.3873692750930786, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.7130337357521057, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8562429547309875, "step": 83 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.622803666921314, "calib/avg_num_step_conf": 5.4921875, "calib/ece": 0.1501179282868526, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.07569721115537849, "calib/gap": 0.06809253883371513, "calib/mean_conf": 0.6459776892430279, "calib/mu_c": 0.678260606060606, "calib/mu_w": 0.6101680672268909, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.13509960159362547, "calib/std_conf": 0.16050626125057085, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1755.0, "completions/max_terminated_length": 1755.0, "completions/mean_length": 501.8515625, "completions/mean_terminated_length": 507.8023986816406, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.0896, "grad_norm": 0.012522642500698566, "learning_rate": 3.2222222222222227e-06, "loss": -0.0018, "num_tokens": 17950964.0, "reward": 1.6547077894210815, "reward_std": 0.45172953605651855, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.7247183322906494, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.8550503253936768, "step": 84 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6427714646464646, "calib/avg_num_step_conf": 5.515625, "calib/ece": 0.12132539682539682, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.04365079365079365, "calib/gap": 0.07116363636363643, "calib/mean_conf": 0.6216428571428572, "calib/mu_c": 0.655530303030303, "calib/mu_w": 0.5843666666666666, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10957936507936504, "calib/std_conf": 0.15777913482297817, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2914.0, "completions/max_terminated_length": 2914.0, "completions/mean_length": 558.80078125, "completions/mean_terminated_length": 563.2008056640625, "completions/min_length": 0.0, "completions/min_terminated_length": 205.0, "epoch": 0.09066666666666667, "grad_norm": 0.0107113691046834, "learning_rate": 3.1944444444444443e-06, "loss": 0.0019, "num_tokens": 18201841.0, "reward": 1.6682491302490234, "reward_std": 0.46407395601272583, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.7398586273193359, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8706381320953369, "step": 85 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6746666666666665, "calib/avg_num_step_conf": 5.3671875, "calib/ece": 0.11095617529880475, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.00796812749003984, "calib/gap": 0.08622095238095251, "calib/mean_conf": 0.6094422310756973, "calib/mu_c": 0.6523809523809525, "calib/mu_w": 0.56616, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.10920318725099598, "calib/std_conf": 0.1339578620841114, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2788.0, "completions/max_terminated_length": 2788.0, "completions/mean_length": 577.26953125, "completions/mean_terminated_length": 577.26953125, "completions/min_length": 241.0, "completions/min_terminated_length": 241.0, "epoch": 0.09173333333333333, "grad_norm": 0.010854917578399181, "learning_rate": 3.1666666666666667e-06, "loss": 0.0634, "num_tokens": 18455134.0, "reward": 1.6256598234176636, "reward_std": 0.42664429545402527, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.7426445484161377, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.8693699836730957, "step": 86 }, { "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.562534435261708, "calib/avg_num_step_conf": 6.09765625, "calib/ece": 0.12314049586776857, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.02066115702479339, "calib/gap": 0.026233766233766276, "calib/mean_conf": 0.6025619834710744, "calib/mu_c": 0.6109090909090908, "calib/mu_w": 0.5846753246753246, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.021942148760330567, "calib/std_conf": 0.1339392296245931, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2985.0, "completions/max_terminated_length": 2985.0, "completions/mean_length": 551.4140625, "completions/mean_terminated_length": 564.6480102539062, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.0928, "grad_norm": 0.011104823090136051, "learning_rate": 3.138888888888889e-06, "loss": -0.0038, "num_tokens": 18701792.0, "reward": 1.8306856155395508, "reward_std": 0.44113269448280334, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.726388692855835, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.8463533520698547, "step": 87 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5883070209215762, "calib/avg_num_step_conf": 6.16015625, "calib/ece": 0.13539525691699608, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.03557312252964427, "calib/gap": 0.03860672570915158, "calib/mean_conf": 0.5782806324110672, "calib/mu_c": 0.5944557823129252, "calib/mu_w": 0.5558490566037736, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.06632411067193678, "calib/std_conf": 0.1581033905867141, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2336.0, "completions/max_terminated_length": 2336.0, "completions/mean_length": 573.1015625, "completions/mean_terminated_length": 573.1015625, "completions/min_length": 303.0, "completions/min_terminated_length": 303.0, "epoch": 0.09386666666666667, "grad_norm": 0.010853197425603867, "learning_rate": 3.1111111111111116e-06, "loss": 0.0224, "num_tokens": 18958354.0, "reward": 1.760920763015747, "reward_std": 0.4879763424396515, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.7380582094192505, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8915627002716064, "step": 88 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.6754781788793104, "calib/avg_num_step_conf": 6.64453125, "calib/ece": 0.1219959016393443, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.04918032786885246, "calib/gap": 0.08492914870689638, "calib/mean_conf": 0.6147254098360656, "calib/mu_c": 0.6551015625, "calib/mu_w": 0.5701724137931036, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.10606557377049185, "calib/std_conf": 0.15300354386845894, "calib/step_conf_rate": 0.9609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2446.0, "completions/max_terminated_length": 2446.0, "completions/mean_length": 618.3125, "completions/mean_terminated_length": 630.6295166015625, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.09493333333333333, "grad_norm": 0.010586491785943508, "learning_rate": 3.0833333333333336e-06, "loss": -0.0082, "num_tokens": 19225530.0, "reward": 1.6018019914627075, "reward_std": 0.37154436111450195, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.7094407677650452, "rewards/format_reward_step": 0.93359375, "rewards/stepwise_brier_reward": 0.8305795192718506, "step": 89 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5019526248399488, "calib/avg_num_step_conf": 7.43359375, "calib/ece": 0.14103174603174606, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.06349206349206349, "calib/gap": -0.006066581306017893, "calib/mean_conf": 0.6491269841269841, "calib/mu_c": 0.6464788732394365, "calib/mu_w": 0.6525454545454544, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11333333333333337, "calib/std_conf": 0.14228029479773335, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2055.0, "completions/max_terminated_length": 2055.0, "completions/mean_length": 583.7578125, "completions/mean_terminated_length": 588.3543090820312, "completions/min_length": 0.0, "completions/min_terminated_length": 219.0, "epoch": 0.096, "grad_norm": 0.00980350561439991, "learning_rate": 3.055555555555556e-06, "loss": 0.0095, "num_tokens": 19478292.0, "reward": 1.7174973487854004, "reward_std": 0.47956204414367676, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7093156576156616, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8716117143630981, "step": 90 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5488736793142402, "calib/avg_num_step_conf": 7.3046875, "calib/ece": 0.07418666666666668, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.024, "calib/gap": 0.02385762066139474, "calib/mean_conf": 0.6261333333333333, "calib/mu_c": 0.6357718120805368, "calib/mu_w": 0.611914191419142, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.05216, "calib/std_conf": 0.12623427954930996, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2756.0, "completions/max_terminated_length": 2756.0, "completions/mean_length": 626.83203125, "completions/mean_terminated_length": 631.7677001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 273.0, "epoch": 0.09706666666666666, "grad_norm": 0.009960450232028961, "learning_rate": 3.0277777777777776e-06, "loss": 0.0244, "num_tokens": 19746473.0, "reward": 1.7600347995758057, "reward_std": 0.4663000702857971, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7324761152267456, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.8701634407043457, "step": 91 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6787819889860706, "calib/avg_num_step_conf": 7.16015625, "calib/ece": 0.0928571428571429, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.04365079365079365, "calib/gap": 0.08712925170068031, "calib/mean_conf": 0.6153968253968256, "calib/mu_c": 0.6517006802721088, "calib/mu_w": 0.5645714285714285, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.062460317460317466, "calib/std_conf": 0.15089495344128934, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1871.0, "completions/max_terminated_length": 1871.0, "completions/mean_length": 561.0859375, "completions/mean_terminated_length": 563.2863159179688, "completions/min_length": 0.0, "completions/min_terminated_length": 255.0, "epoch": 0.09813333333333334, "grad_norm": 0.010641069151461124, "learning_rate": 3e-06, "loss": 0.0441, "num_tokens": 19996831.0, "reward": 1.762449026107788, "reward_std": 0.4561864733695984, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.7633843421936035, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8723491430282593, "step": 92 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5030359795878819, "calib/avg_num_step_conf": 7.390625, "calib/ece": 0.138484, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.072, "calib/gap": 0.0032170402428783884, "calib/mean_conf": 0.657524, "calib/mu_c": 0.658978102189781, "calib/mu_w": 0.6557610619469026, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.124004, "calib/std_conf": 0.14997290896691975, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1824.0, "completions/max_terminated_length": 1824.0, "completions/mean_length": 612.09375, "completions/mean_terminated_length": 619.351806640625, "completions/min_length": 0.0, "completions/min_terminated_length": 254.0, "epoch": 0.0992, "grad_norm": 0.010396548546850681, "learning_rate": 2.9722222222222225e-06, "loss": 0.0022, "num_tokens": 20259303.0, "reward": 1.6816561222076416, "reward_std": 0.5101436376571655, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.702549159526825, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8600127696990967, "step": 93 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6179757343550447, "calib/avg_num_step_conf": 6.43359375, "calib/ece": 0.08928286852589642, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.01195219123505976, "calib/gap": 0.05509259259259247, "calib/mean_conf": 0.6271314741035857, "calib/mu_c": 0.6525925925925925, "calib/mu_w": 0.5975, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.08928286852589642, "calib/std_conf": 0.13229712215801656, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2211.0, "completions/max_terminated_length": 2211.0, "completions/mean_length": 548.9296875, "completions/mean_terminated_length": 553.251953125, "completions/min_length": 0.0, "completions/min_terminated_length": 278.0, "epoch": 0.10026666666666667, "grad_norm": 0.012168372049927711, "learning_rate": 2.944444444444445e-06, "loss": 0.0315, "num_tokens": 20508509.0, "reward": 1.681666612625122, "reward_std": 0.43118709325790405, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.7372269630432129, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8722525835037231, "step": 94 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6383513931888545, "calib/avg_num_step_conf": 7.109375, "calib/ece": 0.08560236220472447, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.07480314960629922, "calib/gap": 0.08595614035087717, "calib/mean_conf": 0.6512716535433071, "calib/mu_c": 0.6857894736842106, "calib/mu_w": 0.5998333333333334, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06922440944881895, "calib/std_conf": 0.17228837590525065, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1856.0, "completions/max_terminated_length": 1856.0, "completions/mean_length": 599.09765625, "completions/mean_terminated_length": 601.4470825195312, "completions/min_length": 0.0, "completions/min_terminated_length": 245.0, "epoch": 0.10133333333333333, "grad_norm": 0.009556005708873272, "learning_rate": 2.916666666666667e-06, "loss": 0.0179, "num_tokens": 20768006.0, "reward": 1.7951675653457642, "reward_std": 0.4173787236213684, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.7625200748443604, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8712753057479858, "step": 95 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6868736714242333, "calib/avg_num_step_conf": 7.08203125, "calib/ece": 0.07134920634920633, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.09126984126984126, "calib/gap": 0.10139690252049816, "calib/mean_conf": 0.6749999999999999, "calib/mu_c": 0.7047752808988764, "calib/mu_w": 0.6033783783783783, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.019999999999999966, "calib/std_conf": 0.15763505169793146, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2668.0, "completions/max_terminated_length": 2668.0, "completions/mean_length": 561.0078125, "completions/mean_terminated_length": 563.2078857421875, "completions/min_length": 0.0, "completions/min_terminated_length": 227.0, "epoch": 0.1024, "grad_norm": 0.01089701522141695, "learning_rate": 2.888888888888889e-06, "loss": 0.0017, "num_tokens": 21017440.0, "reward": 1.9514626264572144, "reward_std": 0.3948931396007538, "rewards/accuracy_reward_step": 0.69921875, "rewards/final_brier_reward_step": 0.7932445406913757, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8563559055328369, "step": 96 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.626808347202663, "calib/avg_num_step_conf": 6.82421875, "calib/ece": 0.09111857707509882, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.08695652173913043, "calib/gap": 0.06799491102291633, "calib/mean_conf": 0.6514110671936758, "calib/mu_c": 0.6801678082191781, "calib/mu_w": 0.6121728971962618, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08272727272727275, "calib/std_conf": 0.16670608854078667, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2574.0, "completions/max_terminated_length": 2574.0, "completions/mean_length": 601.69140625, "completions/mean_terminated_length": 601.69140625, "completions/min_length": 242.0, "completions/min_terminated_length": 242.0, "epoch": 0.10346666666666667, "grad_norm": 0.009758230298757553, "learning_rate": 2.861111111111111e-06, "loss": 0.0657, "num_tokens": 21276545.0, "reward": 1.754123568534851, "reward_std": 0.42388081550598145, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.7469562292098999, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8711007833480835, "step": 97 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.45511760966306425, "calib/avg_num_step_conf": 6.828125, "calib/ece": 0.16434782608695653, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.09486166007905138, "calib/gap": -0.023517482517482535, "calib/mean_conf": 0.7031620553359685, "calib/mu_c": 0.6929370629370629, "calib/mu_w": 0.7164545454545455, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.15114624505928853, "calib/std_conf": 0.14104172504784265, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2613.0, "completions/max_terminated_length": 2613.0, "completions/mean_length": 591.82421875, "completions/mean_terminated_length": 594.1451416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 271.0, "epoch": 0.10453333333333334, "grad_norm": 0.0114395497366786, "learning_rate": 2.8333333333333335e-06, "loss": 0.0575, "num_tokens": 21534236.0, "reward": 1.7173734903335571, "reward_std": 0.49968981742858887, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.6920980215072632, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8570834398269653, "step": 98 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5435785864978904, "calib/avg_num_step_conf": 7.51171875, "calib/ece": 0.3524015748031496, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.17716535433070865, "calib/gap": 0.028568037974683547, "calib/mean_conf": 0.7303543307086614, "calib/mu_c": 0.748125, "calib/mu_w": 0.7195569620253165, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3524015748031496, "calib/std_conf": 0.16391216332046432, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2838.0, "completions/max_terminated_length": 2838.0, "completions/mean_length": 648.99609375, "completions/mean_terminated_length": 651.5411987304688, "completions/min_length": 0.0, "completions/min_terminated_length": 259.0, "epoch": 0.1056, "grad_norm": 0.009856244549155235, "learning_rate": 2.805555555555556e-06, "loss": 0.0082, "num_tokens": 21806179.0, "reward": 1.422696828842163, "reward_std": 0.4133927822113037, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.6222191452980042, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.849818229675293, "step": 99 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.619396551724138, "calib/avg_num_step_conf": 6.41015625, "calib/ece": 0.14656250000000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0546875, "calib/gap": 0.05310344827586211, "calib/mean_conf": 0.6934375, "calib/mu_c": 0.7175000000000001, "calib/mu_w": 0.664396551724138, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14656250000000004, "calib/std_conf": 0.1285370660305812, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1506.0, "completions/max_terminated_length": 1506.0, "completions/mean_length": 560.96484375, "completions/mean_terminated_length": 563.1647338867188, "completions/min_length": 0.0, "completions/min_terminated_length": 269.0, "epoch": 0.10666666666666667, "grad_norm": 0.010851933620870113, "learning_rate": 2.7777777777777783e-06, "loss": 0.0178, "num_tokens": 22057194.0, "reward": 1.725217580795288, "reward_std": 0.3864947557449341, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.7405133247375488, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8791074752807617, "step": 100 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5550820707070706, "calib/avg_num_step_conf": 6.8125, "calib/ece": 0.23219246031746035, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.12301587301587301, "calib/gap": 0.024342424242424276, "calib/mean_conf": 0.6928075396825396, "calib/mu_c": 0.7055583333333334, "calib/mu_w": 0.6812159090909091, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22440476190476194, "calib/std_conf": 0.16549637237278048, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2709.0, "completions/max_terminated_length": 2709.0, "completions/mean_length": 591.1171875, "completions/mean_terminated_length": 595.7716674804688, "completions/min_length": 0.0, "completions/min_terminated_length": 224.0, "epoch": 0.10773333333333333, "grad_norm": 0.010762743651866913, "learning_rate": 2.7500000000000004e-06, "loss": -0.013, "num_tokens": 22315512.0, "reward": 1.5801358222961426, "reward_std": 0.4919942617416382, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.677642285823822, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8616510629653931, "step": 101 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7328753215107621, "calib/avg_num_step_conf": 6.734375, "calib/ece": 0.06649999999999999, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.047058823529411764, "calib/gap": 0.10636895898199539, "calib/mean_conf": 0.6635980392156864, "calib/mu_c": 0.7007228915662651, "calib/mu_w": 0.5943539325842697, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.039558823529411806, "calib/std_conf": 0.13501931668656994, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1378.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 488.171875, "completions/mean_terminated_length": 490.0863037109375, "completions/min_length": 0.0, "completions/min_terminated_length": 259.0, "epoch": 0.1088, "grad_norm": 0.012388897128403187, "learning_rate": 2.7222222222222224e-06, "loss": -0.0081, "num_tokens": 22547180.0, "reward": 1.8868448734283447, "reward_std": 0.3592468798160553, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.796177327632904, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8762019872665405, "step": 102 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5109670987038883, "calib/avg_num_step_conf": 6.59375, "calib/ece": 0.1766141732283465, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.09448818897637795, "calib/gap": -0.0017746759720835836, "calib/mean_conf": 0.6902362204724409, "calib/mu_c": 0.6894117647058825, "calib/mu_w": 0.6911864406779661, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16570866141732285, "calib/std_conf": 0.14031160242364688, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1306.0, "completions/max_terminated_length": 1306.0, "completions/mean_length": 581.57421875, "completions/mean_terminated_length": 583.8549194335938, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.10986666666666667, "grad_norm": 0.0105710718780756, "learning_rate": 2.6944444444444444e-06, "loss": 0.0269, "num_tokens": 22800615.0, "reward": 1.6926425695419312, "reward_std": 0.408366858959198, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.701200008392334, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8740577697753906, "step": 103 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5903319707667534, "calib/avg_num_step_conf": 6.10546875, "calib/ece": 0.19290196078431376, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.023529411764705882, "calib/gap": 0.04008547008546992, "calib/mean_conf": 0.6517254901960784, "calib/mu_c": 0.6734188034188033, "calib/mu_w": 0.6333333333333334, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19290196078431376, "calib/std_conf": 0.12792216972484732, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1129.0, "completions/max_terminated_length": 1129.0, "completions/mean_length": 517.54296875, "completions/mean_terminated_length": 519.5725708007812, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.11093333333333333, "grad_norm": 0.011246602050960064, "learning_rate": 2.666666666666667e-06, "loss": 0.0073, "num_tokens": 23039786.0, "reward": 1.584836721420288, "reward_std": 0.4208795428276062, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.7152222990989685, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8897498846054077, "step": 104 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.465679012345679, "calib/avg_num_step_conf": 6.61328125, "calib/ece": 0.17496470588235297, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.09019607843137255, "calib/gap": -0.034275925925925876, "calib/mean_conf": 0.6586039215686276, "calib/mu_c": 0.6424740740740741, "calib/mu_w": 0.67675, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.15207843137254906, "calib/std_conf": 0.15082412814825125, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2774.0, "completions/max_terminated_length": 2774.0, "completions/mean_length": 569.0390625, "completions/mean_terminated_length": 569.0390625, "completions/min_length": 264.0, "completions/min_terminated_length": 264.0, "epoch": 0.112, "grad_norm": 0.01068261917680502, "learning_rate": 2.6388888888888893e-06, "loss": 0.0117, "num_tokens": 23291220.0, "reward": 1.679351806640625, "reward_std": 0.4076327979564667, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.6891355514526367, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8798341751098633, "step": 105 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.47643656945982527, "calib/avg_num_step_conf": 6.28125, "calib/ece": 0.1744313725490196, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.03529411764705882, "calib/gap": -0.006450719822812756, "calib/mean_conf": 0.6525490196078432, "calib/mu_c": 0.6492857142857142, "calib/mu_w": 0.655736434108527, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16643137254901963, "calib/std_conf": 0.12793753281566905, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 496.4296875, "completions/mean_terminated_length": 498.3764953613281, "completions/min_length": 0.0, "completions/min_terminated_length": 250.0, "epoch": 0.11306666666666666, "grad_norm": 0.011055233888328075, "learning_rate": 2.6111111111111113e-06, "loss": 0.0335, "num_tokens": 23522890.0, "reward": 1.6351900100708008, "reward_std": 0.38616669178009033, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.7025859355926514, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8928613662719727, "step": 106 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5764588776906806, "calib/avg_num_step_conf": 6.27734375, "calib/ece": 0.0989411764705883, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.03529411764705882, "calib/gap": 0.03592571855169835, "calib/mean_conf": 0.6518823529411766, "calib/mu_c": 0.6679432624113476, "calib/mu_w": 0.6320175438596493, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0989411764705883, "calib/std_conf": 0.12448003967229689, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1292.0, "completions/max_terminated_length": 1292.0, "completions/mean_length": 485.84375, "completions/mean_terminated_length": 487.7490539550781, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.11413333333333334, "grad_norm": 0.01080196350812912, "learning_rate": 2.5833333333333337e-06, "loss": -0.02, "num_tokens": 23751882.0, "reward": 1.7295396327972412, "reward_std": 0.3548358678817749, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.7417683601379395, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8873279094696045, "step": 107 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5119723183391003, "calib/avg_num_step_conf": 6.49609375, "calib/ece": 0.09463529411764712, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.043137254901960784, "calib/gap": 4.705882352940005e-05, "calib/mean_conf": 0.6680313725490197, "calib/mu_c": 0.6680470588235294, "calib/mu_w": 0.668, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04800000000000009, "calib/std_conf": 0.13613137036023448, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1659.0, "completions/max_terminated_length": 1659.0, "completions/mean_length": 530.328125, "completions/mean_terminated_length": 532.4078979492188, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.1152, "grad_norm": 0.01080621313303709, "learning_rate": 2.5555555555555557e-06, "loss": -0.0048, "num_tokens": 23990878.0, "reward": 1.9048094749450684, "reward_std": 0.45833563804626465, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.7562992572784424, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8863762617111206, "step": 108 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5950152847963067, "calib/avg_num_step_conf": 6.70703125, "calib/ece": 0.12139763779527565, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.03937007874015748, "calib/gap": 0.04088059142803657, "calib/mean_conf": 0.660767716535433, "calib/mu_c": 0.6795985401459853, "calib/mu_w": 0.6387179487179487, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12139763779527565, "calib/std_conf": 0.1292966328747139, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1938.0, "completions/max_terminated_length": 1938.0, "completions/mean_length": 520.4609375, "completions/mean_terminated_length": 524.55908203125, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.11626666666666667, "grad_norm": 0.010906565003097057, "learning_rate": 2.5277777777777778e-06, "loss": 0.0007, "num_tokens": 24228716.0, "reward": 1.7045462131500244, "reward_std": 0.27251356840133667, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.7346241474151611, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8882478475570679, "step": 109 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.48100837232208815, "calib/avg_num_step_conf": 6.04296875, "calib/ece": 0.1586596078431373, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": -0.0097410859394238, "calib/mean_conf": 0.6281639215686274, "calib/mu_c": 0.6231596774193549, "calib/mu_w": 0.6329007633587787, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1502745098039216, "calib/std_conf": 0.11000198907432343, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1159.0, "completions/max_terminated_length": 1159.0, "completions/mean_length": 506.15234375, "completions/mean_terminated_length": 508.1372985839844, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.11733333333333333, "grad_norm": 0.011566469445824623, "learning_rate": 2.5e-06, "loss": 0.0107, "num_tokens": 24463211.0, "reward": 1.6272175312042236, "reward_std": 0.4574228525161743, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.7103029489517212, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9001297950744629, "step": 110 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5219117279319829, "calib/avg_num_step_conf": 6.13671875, "calib/ece": 0.1483003952569171, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.015810276679841896, "calib/gap": 0.014963115778944824, "calib/mean_conf": 0.6384189723320157, "calib/mu_c": 0.6460483870967743, "calib/mu_w": 0.6310852713178294, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1483003952569171, "calib/std_conf": 0.11171971543964937, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2605.0, "completions/max_terminated_length": 2605.0, "completions/mean_length": 543.1953125, "completions/mean_terminated_length": 543.1953125, "completions/min_length": 183.0, "completions/min_terminated_length": 183.0, "epoch": 0.1184, "grad_norm": 0.010652108117938042, "learning_rate": 2.4722222222222226e-06, "loss": 0.0414, "num_tokens": 24709677.0, "reward": 1.6171307563781738, "reward_std": 0.41484975814819336, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.7135441303253174, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8799788951873779, "step": 111 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.651970756516211, "calib/avg_num_step_conf": 6.03125, "calib/ece": 0.11434262948207179, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0199203187250996, "calib/gap": 0.05719707565162102, "calib/mean_conf": 0.6278884462151394, "calib/mu_c": 0.6554615384615384, "calib/mu_w": 0.5982644628099174, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11215139442231084, "calib/std_conf": 0.12040735766082009, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2921.0, "completions/max_terminated_length": 2921.0, "completions/mean_length": 551.93359375, "completions/mean_terminated_length": 556.279541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 227.0, "epoch": 0.11946666666666667, "grad_norm": 0.012287004850804806, "learning_rate": 2.4444444444444447e-06, "loss": 0.0056, "num_tokens": 24958892.0, "reward": 1.6569784879684448, "reward_std": 0.40331631898880005, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.7376008033752441, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8825008869171143, "step": 112 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5629016312407317, "calib/avg_num_step_conf": 5.44921875, "calib/ece": 0.07247058823529404, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.011764705882352941, "calib/gap": 0.023644957983193216, "calib/mean_conf": 0.6058039215686274, "calib/mu_c": 0.6168382352941176, "calib/mu_w": 0.5931932773109244, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07247058823529404, "calib/std_conf": 0.105149943285085, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2071.0, "completions/max_terminated_length": 2071.0, "completions/mean_length": 465.55859375, "completions/mean_terminated_length": 465.55859375, "completions/min_length": 244.0, "completions/min_terminated_length": 244.0, "epoch": 0.12053333333333334, "grad_norm": 0.012438077479600906, "learning_rate": 2.4166666666666667e-06, "loss": 0.0378, "num_tokens": 25183275.0, "reward": 1.7068159580230713, "reward_std": 0.45404136180877686, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.7436562776565552, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9039199948310852, "step": 113 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6661144977471509, "calib/avg_num_step_conf": 6.02734375, "calib/ece": 0.062289682539682506, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.01984126984126984, "calib/gap": 0.060761595547309866, "calib/mean_conf": 0.6243769841269841, "calib/mu_c": 0.6480064935064935, "calib/mu_w": 0.5872448979591837, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.03777777777777778, "calib/std_conf": 0.11961116212624232, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1409.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 473.33203125, "completions/mean_terminated_length": 477.0590515136719, "completions/min_length": 0.0, "completions/min_terminated_length": 206.0, "epoch": 0.1216, "grad_norm": 0.011878692544996738, "learning_rate": 2.388888888888889e-06, "loss": -0.0168, "num_tokens": 25409472.0, "reward": 1.810877799987793, "reward_std": 0.4106457233428955, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.7618821859359741, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8878784775733948, "step": 114 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6275689223057644, "calib/avg_num_step_conf": 5.671875, "calib/ece": 0.09525691699604741, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": 0.04418358395989985, "calib/mean_conf": 0.6158102766798419, "calib/mu_c": 0.6367669172932331, "calib/mu_w": 0.5925833333333332, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0926877470355731, "calib/std_conf": 0.09809970916301491, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2786.0, "completions/max_terminated_length": 2786.0, "completions/mean_length": 480.609375, "completions/mean_terminated_length": 482.494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 221.0, "epoch": 0.12266666666666666, "grad_norm": 0.011466645635664463, "learning_rate": 2.361111111111111e-06, "loss": 0.0137, "num_tokens": 25637772.0, "reward": 1.6898902654647827, "reward_std": 0.4999661445617676, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.7461015582084656, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8962720632553101, "step": 115 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5467625899280576, "calib/avg_num_step_conf": 6.3046875, "calib/ece": 0.068046875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.01171875, "calib/gap": 0.015604746971653372, "calib/mean_conf": 0.603515625, "calib/mu_c": 0.6106474820143885, "calib/mu_w": 0.5950427350427351, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06429687499999999, "calib/std_conf": 0.09694626285143422, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1236.0, "completions/max_terminated_length": 1236.0, "completions/mean_length": 545.19140625, "completions/mean_terminated_length": 547.3294677734375, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.12373333333333333, "grad_norm": 0.010517150163650513, "learning_rate": 2.3333333333333336e-06, "loss": 0.0064, "num_tokens": 25881861.0, "reward": 1.7288788557052612, "reward_std": 0.48391813039779663, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7465265393257141, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9111762642860413, "step": 116 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5255786752031519, "calib/avg_num_step_conf": 6.04296875, "calib/ece": 0.09160784313725492, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": -0.002237749322826943, "calib/mean_conf": 0.592156862745098, "calib/mu_c": 0.5910687022900764, "calib/mu_w": 0.5933064516129033, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08501960784313728, "calib/std_conf": 0.0972305472351556, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2147.0, "completions/max_terminated_length": 2147.0, "completions/mean_length": 508.953125, "completions/mean_terminated_length": 508.953125, "completions/min_length": 208.0, "completions/min_terminated_length": 208.0, "epoch": 0.1248, "grad_norm": 0.010903061367571354, "learning_rate": 2.305555555555556e-06, "loss": 0.0075, "num_tokens": 26118753.0, "reward": 1.6741626262664795, "reward_std": 0.32085680961608887, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.7305999994277954, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9035505652427673, "step": 117 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4932255244755245, "calib/avg_num_step_conf": 5.91015625, "calib/ece": 0.1524313725490196, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": 0.002771603396603428, "calib/mean_conf": 0.5936078431372549, "calib/mu_c": 0.5948251748251748, "calib/mu_w": 0.5920535714285714, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.09262745098039213, "calib/std_conf": 0.10274431970966981, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1289.0, "completions/max_terminated_length": 1289.0, "completions/mean_length": 492.77734375, "completions/mean_terminated_length": 494.7098388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 229.0, "epoch": 0.12586666666666665, "grad_norm": 0.011059942655265331, "learning_rate": 2.277777777777778e-06, "loss": -0.0104, "num_tokens": 26348912.0, "reward": 1.7454500198364258, "reward_std": 0.23404794931411743, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.7382664084434509, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9075959920883179, "step": 118 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6213670267489713, "calib/avg_num_step_conf": 5.828125, "calib/ece": 0.0653571428571429, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03456018518518511, "calib/mean_conf": 0.5662301587301587, "calib/mu_c": 0.5810416666666667, "calib/mu_w": 0.5464814814814816, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.030079365079365098, "calib/std_conf": 0.08284903680776745, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2963.0, "completions/max_terminated_length": 2963.0, "completions/mean_length": 529.625, "completions/mean_terminated_length": 535.9051513671875, "completions/min_length": 0.0, "completions/min_terminated_length": 237.0, "epoch": 0.12693333333333334, "grad_norm": 0.01092610228806734, "learning_rate": 2.25e-06, "loss": -0.0133, "num_tokens": 26589560.0, "reward": 1.7466264963150024, "reward_std": 0.3065892457962036, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.7506831884384155, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8998852968215942, "step": 119 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6439732866498088, "calib/avg_num_step_conf": 6.0234375, "calib/ece": 0.059648437499999894, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03892303702262845, "calib/mean_conf": 0.5687109374999999, "calib/mu_c": 0.5834591194968552, "calib/mu_w": 0.5445360824742268, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0036328125000000006, "calib/std_conf": 0.08146784760180603, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1108.0, "completions/max_terminated_length": 1108.0, "completions/mean_length": 487.7265625, "completions/mean_terminated_length": 489.6392517089844, "completions/min_length": 0.0, "completions/min_terminated_length": 215.0, "epoch": 0.128, "grad_norm": 0.012312943115830421, "learning_rate": 2.222222222222222e-06, "loss": 0.0096, "num_tokens": 26821106.0, "reward": 1.8510717153549194, "reward_std": 0.36764204502105713, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.7707937955856323, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9147434830665588, "step": 120 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5252698412698412, "calib/avg_num_step_conf": 6.2734375, "calib/ece": 0.09816733067729083, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00796812749003984, "calib/gap": -0.0029422222222222905, "calib/mean_conf": 0.5907569721115538, "calib/mu_c": 0.58928, "calib/mu_w": 0.5922222222222223, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0954581673306773, "calib/std_conf": 0.10277341679828322, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2747.0, "completions/max_terminated_length": 2747.0, "completions/mean_length": 559.203125, "completions/mean_terminated_length": 568.0794067382812, "completions/min_length": 0.0, "completions/min_terminated_length": 229.0, "epoch": 0.12906666666666666, "grad_norm": 0.009793057106435299, "learning_rate": 2.1944444444444445e-06, "loss": -0.0068, "num_tokens": 27069318.0, "reward": 1.6255204677581787, "reward_std": 0.49173569679260254, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.715122640132904, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8963341116905212, "step": 121 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6879084967320261, "calib/avg_num_step_conf": 6.08203125, "calib/ece": 0.08913725490196082, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05862745098039224, "calib/mean_conf": 0.5873333333333333, "calib/mu_c": 0.6107843137254902, "calib/mu_w": 0.552156862745098, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.038235294117647076, "calib/std_conf": 0.09221429999833543, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2373.0, "completions/max_terminated_length": 2373.0, "completions/mean_length": 495.89453125, "completions/mean_terminated_length": 495.89453125, "completions/min_length": 239.0, "completions/min_terminated_length": 239.0, "epoch": 0.13013333333333332, "grad_norm": 0.012019730173051357, "learning_rate": 2.166666666666667e-06, "loss": 0.0493, "num_tokens": 27303611.0, "reward": 1.8140134811401367, "reward_std": 0.3575590252876282, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7735027074813843, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9122384190559387, "step": 122 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5302514653053507, "calib/avg_num_step_conf": 6.2421875, "calib/ece": 0.09357142857142857, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.01984126984126984, "calib/gap": -0.0023255813953487747, "calib/mean_conf": 0.6054761904761905, "calib/mu_c": 0.6043410852713179, "calib/mu_w": 0.6066666666666667, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09357142857142857, "calib/std_conf": 0.10294941848519548, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2533.0, "completions/max_terminated_length": 2533.0, "completions/mean_length": 557.578125, "completions/mean_terminated_length": 564.1897583007812, "completions/min_length": 0.0, "completions/min_terminated_length": 259.0, "epoch": 0.1312, "grad_norm": 0.01179084088653326, "learning_rate": 2.138888888888889e-06, "loss": 0.0182, "num_tokens": 27551639.0, "reward": 1.650080680847168, "reward_std": 0.5606234669685364, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.7182250022888184, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.889910101890564, "step": 123 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5170613700025465, "calib/avg_num_step_conf": 5.90234375, "calib/ece": 0.06238281249999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": 0.0016768525592053551, "calib/mean_conf": 0.6077734375, "calib/mu_c": 0.6084415584415583, "calib/mu_w": 0.606764705882353, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03429687499999999, "calib/std_conf": 0.10597816069565273, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1461.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 508.66015625, "completions/mean_terminated_length": 510.6549377441406, "completions/min_length": 0.0, "completions/min_terminated_length": 215.0, "epoch": 0.13226666666666667, "grad_norm": 0.011192714795470238, "learning_rate": 2.1111111111111114e-06, "loss": 0.0022, "num_tokens": 27788672.0, "reward": 1.8174453973770142, "reward_std": 0.3154680132865906, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.7498488426208496, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9105578660964966, "step": 124 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4941504004004004, "calib/avg_num_step_conf": 6.1796875, "calib/ece": 0.10752941176470596, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": -0.0013475975975975052, "calib/mean_conf": 0.6323921568627451, "calib/mu_c": 0.6318055555555556, "calib/mu_w": 0.6331531531531531, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.087607843137255, "calib/std_conf": 0.104225261028814, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2864.0, "completions/max_terminated_length": 2864.0, "completions/mean_length": 517.63671875, "completions/mean_terminated_length": 517.63671875, "completions/min_length": 246.0, "completions/min_terminated_length": 246.0, "epoch": 0.13333333333333333, "grad_norm": 0.010911756195127964, "learning_rate": 2.0833333333333334e-06, "loss": 0.0293, "num_tokens": 28025995.0, "reward": 1.7478008270263672, "reward_std": 0.4162144064903259, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.7334879040718079, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8983402252197266, "step": 125 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6463255813953488, "calib/avg_num_step_conf": 5.625, "calib/ece": 0.13480314960629927, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.015748031496062992, "calib/gap": 0.04857674418604663, "calib/mean_conf": 0.6269291338582678, "calib/mu_c": 0.6516000000000001, "calib/mu_w": 0.6030232558139534, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.13480314960629927, "calib/std_conf": 0.10181434246863584, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2426.0, "completions/max_terminated_length": 2426.0, "completions/mean_length": 532.0390625, "completions/mean_terminated_length": 534.1255493164062, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.1344, "grad_norm": 0.010709105059504509, "learning_rate": 2.0555555555555555e-06, "loss": -0.0286, "num_tokens": 28267661.0, "reward": 1.6336421966552734, "reward_std": 0.3790965676307678, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.7370499968528748, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8912684917449951, "step": 126 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6661931818181818, "calib/avg_num_step_conf": 5.73046875, "calib/ece": 0.15080321285140563, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.024096385542168676, "calib/gap": 0.062597494834711, "calib/mean_conf": 0.6367469879518073, "calib/mu_c": 0.6689256198347109, "calib/mu_w": 0.6063281249999999, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.15080321285140563, "calib/std_conf": 0.10699369534856351, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2885.0, "completions/max_terminated_length": 2885.0, "completions/mean_length": 502.5234375, "completions/mean_terminated_length": 510.5000305175781, "completions/min_length": 0.0, "completions/min_terminated_length": 217.0, "epoch": 0.13546666666666668, "grad_norm": 0.0120786027982831, "learning_rate": 2.027777777777778e-06, "loss": 0.012, "num_tokens": 28499979.0, "reward": 1.597495436668396, "reward_std": 0.394074410200119, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.726848840713501, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.881882905960083, "step": 127 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.610711209548419, "calib/avg_num_step_conf": 5.61328125, "calib/ece": 0.14647058823529419, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0196078431372549, "calib/gap": 0.03744001476559622, "calib/mean_conf": 0.6523529411764707, "calib/mu_c": 0.6708527131782945, "calib/mu_w": 0.6334126984126983, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14647058823529419, "calib/std_conf": 0.09856197881444989, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2792.0, "completions/max_terminated_length": 2792.0, "completions/mean_length": 521.640625, "completions/mean_terminated_length": 521.640625, "completions/min_length": 230.0, "completions/min_terminated_length": 230.0, "epoch": 0.13653333333333334, "grad_norm": 0.012539071962237358, "learning_rate": 2.0000000000000003e-06, "loss": 0.0146, "num_tokens": 28740183.0, "reward": 1.661848545074463, "reward_std": 0.31563234329223633, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.7347027063369751, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8970666527748108, "step": 128 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5719571825029734, "calib/avg_num_step_conf": 5.90234375, "calib/ece": 0.05937254901960794, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0392156862745098, "calib/gap": 0.02695189639222928, "calib/mean_conf": 0.6818039215686275, "calib/mu_c": 0.6917391304347825, "calib/mu_w": 0.6647872340425532, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.054901960784313794, "calib/std_conf": 0.11230159316660897, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2448.0, "completions/max_terminated_length": 2448.0, "completions/mean_length": 465.0625, "completions/mean_terminated_length": 465.0625, "completions/min_length": 218.0, "completions/min_terminated_length": 218.0, "epoch": 0.1376, "grad_norm": 0.012660709209740162, "learning_rate": 1.9722222222222224e-06, "loss": 0.0116, "num_tokens": 28961623.0, "reward": 1.8541687726974487, "reward_std": 0.4114261865615845, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.761662483215332, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8893877267837524, "step": 129 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6036775724275724, "calib/avg_num_step_conf": 5.6171875, "calib/ece": 0.12439215686274516, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.023529411764705882, "calib/gap": 0.046009615384615454, "calib/mean_conf": 0.6851764705882354, "calib/mu_c": 0.7053846153846154, "calib/mu_w": 0.6593749999999999, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12439215686274516, "calib/std_conf": 0.115445718656738, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2436.0, "completions/max_terminated_length": 2436.0, "completions/mean_length": 465.10546875, "completions/mean_terminated_length": 465.10546875, "completions/min_length": 183.0, "completions/min_terminated_length": 183.0, "epoch": 0.13866666666666666, "grad_norm": 0.011984642595052719, "learning_rate": 1.944444444444445e-06, "loss": 0.0308, "num_tokens": 29185978.0, "reward": 1.7440496683120728, "reward_std": 0.3274148404598236, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.7446382641792297, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8878102898597717, "step": 130 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5754952118588482, "calib/avg_num_step_conf": 5.4140625, "calib/ece": 0.30043478260869566, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.03162055335968379, "calib/gap": 0.02551948051948072, "calib/mean_conf": 0.6917391304347826, "calib/mu_c": 0.7072727272727274, "calib/mu_w": 0.6817532467532467, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30043478260869566, "calib/std_conf": 0.11124936883257819, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2219.0, "completions/max_terminated_length": 2219.0, "completions/mean_length": 462.59765625, "completions/mean_terminated_length": 466.2401428222656, "completions/min_length": 0.0, "completions/min_terminated_length": 210.0, "epoch": 0.13973333333333332, "grad_norm": 0.012838898226618767, "learning_rate": 1.916666666666667e-06, "loss": -0.0023, "num_tokens": 29410611.0, "reward": 1.4626030921936035, "reward_std": 0.3501003384590149, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.6607421636581421, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8771703243255615, "step": 131 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5402930402930404, "calib/avg_num_step_conf": 6.671875, "calib/ece": 0.14232283464566922, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.06692913385826772, "calib/gap": 0.011493982208267695, "calib/mean_conf": 0.7383858267716535, "calib/mu_c": 0.7428205128205128, "calib/mu_w": 0.7313265306122451, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.133267716535433, "calib/std_conf": 0.11589158300635134, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2958.0, "completions/max_terminated_length": 2958.0, "completions/mean_length": 514.578125, "completions/mean_terminated_length": 514.578125, "completions/min_length": 197.0, "completions/min_terminated_length": 197.0, "epoch": 0.1408, "grad_norm": 0.012298737652599812, "learning_rate": 1.888888888888889e-06, "loss": 0.0328, "num_tokens": 29647935.0, "reward": 1.8090349435806274, "reward_std": 0.4609593152999878, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.7315890789031982, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8717383146286011, "step": 132 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6335258152173914, "calib/avg_num_step_conf": 6.1171875, "calib/ece": 0.3766269841269841, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.09126984126984126, "calib/gap": 0.05637499999999995, "calib/mean_conf": 0.7417063492063491, "calib/mu_c": 0.7775, "calib/mu_w": 0.721125, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3766269841269841, "calib/std_conf": 0.11357058971660076, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1853.0, "completions/max_terminated_length": 1853.0, "completions/mean_length": 543.19140625, "completions/mean_terminated_length": 547.468505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 258.0, "epoch": 0.14186666666666667, "grad_norm": 0.011609735898673534, "learning_rate": 1.8611111111111113e-06, "loss": 0.0158, "num_tokens": 29893336.0, "reward": 1.4053130149841309, "reward_std": 0.4048590064048767, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.62959885597229, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8666534423828125, "step": 133 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5933084344849051, "calib/avg_num_step_conf": 5.859375, "calib/ece": 0.1909350393700788, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.08267716535433071, "calib/gap": 0.04812807345160297, "calib/mean_conf": 0.7146555118110235, "calib/mu_c": 0.7372037037037038, "calib/mu_w": 0.6890756302521008, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18704724409448822, "calib/std_conf": 0.1408914406916585, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2487.0, "completions/max_terminated_length": 2487.0, "completions/mean_length": 555.2734375, "completions/mean_terminated_length": 555.2734375, "completions/min_length": 207.0, "completions/min_terminated_length": 207.0, "epoch": 0.14293333333333333, "grad_norm": 0.012096771970391273, "learning_rate": 1.8333333333333333e-06, "loss": 0.0467, "num_tokens": 30144438.0, "reward": 1.6862773895263672, "reward_std": 0.514042854309082, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.7159255743026733, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8807463645935059, "step": 134 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5513161146811667, "calib/avg_num_step_conf": 6.140625, "calib/ece": 0.18443137254901962, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.06666666666666667, "calib/gap": 0.01707983193277285, "calib/mean_conf": 0.717764705882353, "calib/mu_c": 0.7257352941176469, "calib/mu_w": 0.7086554621848741, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18443137254901962, "calib/std_conf": 0.11799012724323284, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1626.0, "completions/max_terminated_length": 1626.0, "completions/mean_length": 519.21484375, "completions/mean_terminated_length": 521.2510375976562, "completions/min_length": 0.0, "completions/min_terminated_length": 250.0, "epoch": 0.144, "grad_norm": 0.011714962311089039, "learning_rate": 1.8055555555555557e-06, "loss": -0.0191, "num_tokens": 30383237.0, "reward": 1.6944434642791748, "reward_std": 0.5428236722946167, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.7088964581489563, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8891900181770325, "step": 135 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5992043912668065, "calib/avg_num_step_conf": 6.1171875, "calib/ece": 0.219843137254902, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.043137254901960784, "calib/gap": 0.04109966695448375, "calib/mean_conf": 0.6943529411764706, "calib/mu_c": 0.7159504132231405, "calib/mu_w": 0.6748507462686567, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.219843137254902, "calib/std_conf": 0.11744115314545321, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1315.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 476.47265625, "completions/mean_terminated_length": 478.3412170410156, "completions/min_length": 0.0, "completions/min_terminated_length": 179.0, "epoch": 0.14506666666666668, "grad_norm": 0.011427158489823341, "learning_rate": 1.777777777777778e-06, "loss": 0.0088, "num_tokens": 30613702.0, "reward": 1.602003812789917, "reward_std": 0.36013883352279663, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.7039972543716431, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8837054371833801, "step": 136 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.562200956937799, "calib/avg_num_step_conf": 6.2578125, "calib/ece": 0.22330708661417323, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.051181102362204724, "calib/gap": 0.018671472068601314, "calib/mean_conf": 0.7428346456692912, "calib/mu_c": 0.7517293233082707, "calib/mu_w": 0.7330578512396694, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.22125984251968503, "calib/std_conf": 0.10044701378938628, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1439.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 501.15234375, "completions/mean_terminated_length": 503.11767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 258.0, "epoch": 0.14613333333333334, "grad_norm": 0.011493817903101444, "learning_rate": 1.75e-06, "loss": 0.0039, "num_tokens": 30848981.0, "reward": 1.6694798469543457, "reward_std": 0.41539496183395386, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.696246862411499, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8801102042198181, "step": 137 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6077588910493634, "calib/avg_num_step_conf": 6.10546875, "calib/ece": 0.13539062500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.03515625, "calib/gap": 0.04363545129523916, "calib/mean_conf": 0.7125, "calib/mu_c": 0.730738255033557, "calib/mu_w": 0.6871028037383179, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1329296875, "calib/std_conf": 0.1132095678377053, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1178.0, "completions/max_terminated_length": 1178.0, "completions/mean_length": 502.73828125, "completions/mean_terminated_length": 504.7098388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.1472, "grad_norm": 0.012307239696383476, "learning_rate": 1.7222222222222224e-06, "loss": -0.0015, "num_tokens": 31082018.0, "reward": 1.7836527824401855, "reward_std": 0.44293880462646484, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7481210827827454, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8943023681640625, "step": 138 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6541900868676545, "calib/avg_num_step_conf": 6.2421875, "calib/ece": 0.10200000000000006, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.03137254901960784, "calib/gap": 0.06101686254471117, "calib/mean_conf": 0.6921960784313725, "calib/mu_c": 0.7168421052631578, "calib/mu_w": 0.6558252427184467, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09905882352941184, "calib/std_conf": 0.1281293800181558, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2699.0, "completions/max_terminated_length": 2699.0, "completions/mean_length": 503.67578125, "completions/mean_terminated_length": 503.67578125, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 0.14826666666666666, "grad_norm": 0.011360560543835163, "learning_rate": 1.6944444444444446e-06, "loss": 0.0123, "num_tokens": 31314055.0, "reward": 1.8030741214752197, "reward_std": 0.4602932929992676, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.7599769830703735, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8976321816444397, "step": 139 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6284209085933223, "calib/avg_num_step_conf": 6.7421875, "calib/ece": 0.06776470588235292, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.07450980392156863, "calib/gap": 0.054493021346469606, "calib/mean_conf": 0.7167058823529413, "calib/mu_c": 0.735297619047619, "calib/mu_w": 0.6808045977011494, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06282352941176467, "calib/std_conf": 0.12182972593854369, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 976.0, "completions/max_terminated_length": 976.0, "completions/mean_length": 501.578125, "completions/mean_terminated_length": 503.5451354980469, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.14933333333333335, "grad_norm": 0.012145115062594414, "learning_rate": 1.6666666666666667e-06, "loss": -0.022, "num_tokens": 31547475.0, "reward": 1.9001531600952148, "reward_std": 0.39148375391960144, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.7784765362739563, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8924486637115479, "step": 140 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6302139891408496, "calib/avg_num_step_conf": 6.3515625, "calib/ece": 0.11343750000000007, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.03515625, "calib/gap": 0.04672628553177893, "calib/mean_conf": 0.678984375, "calib/mu_c": 0.6974193548387096, "calib/mu_w": 0.6506930693069307, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09347656250000004, "calib/std_conf": 0.11790382629863787, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1514.0, "completions/max_terminated_length": 1514.0, "completions/mean_length": 512.33203125, "completions/mean_terminated_length": 514.3411865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 256.0, "epoch": 0.1504, "grad_norm": 0.011024919338524342, "learning_rate": 1.638888888888889e-06, "loss": -0.0074, "num_tokens": 31785728.0, "reward": 1.826422095298767, "reward_std": 0.45660847425460815, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.7641414403915405, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9087342023849487, "step": 141 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6165994623655914, "calib/avg_num_step_conf": 6.546875, "calib/ece": 0.17617187500000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.04296875, "calib/gap": 0.0524389051808406, "calib/mean_conf": 0.691796875, "calib/mu_c": 0.7171969696969696, "calib/mu_w": 0.664758064516129, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.17617187500000006, "calib/std_conf": 0.12313885451080975, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1168.0, "completions/max_terminated_length": 1168.0, "completions/mean_length": 525.8671875, "completions/mean_terminated_length": 527.929443359375, "completions/min_length": 0.0, "completions/min_terminated_length": 253.0, "epoch": 0.15146666666666667, "grad_norm": 0.010916810482740402, "learning_rate": 1.6111111111111113e-06, "loss": 0.0101, "num_tokens": 32025510.0, "reward": 1.6825264692306519, "reward_std": 0.3293830454349518, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.7302383184432983, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9061173796653748, "step": 142 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.552674280721463, "calib/avg_num_step_conf": 6.80078125, "calib/ece": 0.08445312500000007, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.01171875, "calib/gap": 0.02032827810023108, "calib/mean_conf": 0.65671875, "calib/mu_c": 0.665374149659864, "calib/mu_w": 0.6450458715596329, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.08347656250000007, "calib/std_conf": 0.11319997084115128, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1372.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 544.05078125, "completions/mean_terminated_length": 546.184326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 258.0, "epoch": 0.15253333333333333, "grad_norm": 0.011365286074578762, "learning_rate": 1.5833333333333333e-06, "loss": 0.0016, "num_tokens": 32272123.0, "reward": 1.7716703414916992, "reward_std": 0.31434881687164307, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.7426109313964844, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.906570315361023, "step": 143 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5436744988716316, "calib/avg_num_step_conf": 6.4921875, "calib/ece": 0.09929411764705885, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.03137254901960784, "calib/gap": 0.01858622062923143, "calib/mean_conf": 0.6730980392156863, "calib/mu_c": 0.6798765432098766, "calib/mu_w": 0.6612903225806451, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0685490196078432, "calib/std_conf": 0.12247212646500918, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 503.2265625, "completions/mean_terminated_length": 505.2000427246094, "completions/min_length": 0.0, "completions/min_terminated_length": 236.0, "epoch": 0.1536, "grad_norm": 0.014862559735774994, "learning_rate": 1.5555555555555558e-06, "loss": 0.0214, "num_tokens": 32505077.0, "reward": 1.8636364936828613, "reward_std": 0.3292680084705353, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.75751793384552, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9079656004905701, "step": 144 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5333991339786042, "calib/avg_num_step_conf": 7.046875, "calib/ece": 0.12090196078431377, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0196078431372549, "calib/gap": 0.01643912379011714, "calib/mean_conf": 0.6881960784313725, "calib/mu_c": 0.6949006622516556, "calib/mu_w": 0.6784615384615384, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10847058823529417, "calib/std_conf": 0.12198955590265578, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1079.0, "completions/max_terminated_length": 1079.0, "completions/mean_length": 483.30078125, "completions/mean_terminated_length": 485.19610595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.15466666666666667, "grad_norm": 0.01164257898926735, "learning_rate": 1.527777777777778e-06, "loss": -0.003, "num_tokens": 32731506.0, "reward": 1.7948551177978516, "reward_std": 0.42932164669036865, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.7394285202026367, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9087417125701904, "step": 145 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6459713755632122, "calib/avg_num_step_conf": 6.359375, "calib/ece": 0.26928571428571424, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.05508348794063078, "calib/mean_conf": 0.6581746031746032, "calib/mu_c": 0.6918367346938775, "calib/mu_w": 0.6367532467532467, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26928571428571424, "calib/std_conf": 0.11439211867658855, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2741.0, "completions/max_terminated_length": 2741.0, "completions/mean_length": 526.6796875, "completions/mean_terminated_length": 528.7451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 243.0, "epoch": 0.15573333333333333, "grad_norm": 0.011714852415025234, "learning_rate": 1.5e-06, "loss": 0.01, "num_tokens": 32973552.0, "reward": 1.4650468826293945, "reward_std": 0.42327678203582764, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6919437646865845, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9026187658309937, "step": 146 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6474015453639083, "calib/avg_num_step_conf": 6.65234375, "calib/ece": 0.2271259842519685, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.027559055118110236, "calib/gap": 0.056592721834496706, "calib/mean_conf": 0.676732283464567, "calib/mu_c": 0.7070338983050849, "calib/mu_w": 0.6504411764705882, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2196456692913386, "calib/std_conf": 0.13867465742276497, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1332.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 510.58203125, "completions/mean_terminated_length": 512.5843505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 234.0, "epoch": 0.1568, "grad_norm": 0.011596626602113247, "learning_rate": 1.4722222222222225e-06, "loss": 0.0021, "num_tokens": 33207941.0, "reward": 1.5923233032226562, "reward_std": 0.3285621702671051, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.7095776796340942, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9097157716751099, "step": 147 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6376507936507938, "calib/avg_num_step_conf": 6.57421875, "calib/ece": 0.0972156862745098, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.01568627450980392, "calib/gap": 0.06232380952380956, "calib/mean_conf": 0.678470588235294, "calib/mu_c": 0.7041333333333334, "calib/mu_w": 0.6418095238095238, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09372549019607843, "calib/std_conf": 0.12719451723002634, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2408.0, "completions/max_terminated_length": 2408.0, "completions/mean_length": 486.48828125, "completions/mean_terminated_length": 486.48828125, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 0.15786666666666666, "grad_norm": 0.012608281336724758, "learning_rate": 1.4444444444444445e-06, "loss": 0.0174, "num_tokens": 33437594.0, "reward": 1.7900137901306152, "reward_std": 0.3942890465259552, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.7568058371543884, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.903249204158783, "step": 148 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6321003134796238, "calib/avg_num_step_conf": 7.16796875, "calib/ece": 0.24623529411764708, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.047058823529411764, "calib/gap": 0.059203761755485806, "calib/mean_conf": 0.677607843137255, "calib/mu_c": 0.7112727272727273, "calib/mu_w": 0.6520689655172415, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24623529411764708, "calib/std_conf": 0.13426109746097853, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2843.0, "completions/max_terminated_length": 2843.0, "completions/mean_length": 588.4765625, "completions/mean_terminated_length": 588.4765625, "completions/min_length": 283.0, "completions/min_terminated_length": 283.0, "epoch": 0.15893333333333334, "grad_norm": 0.010876238346099854, "learning_rate": 1.4166666666666667e-06, "loss": 0.0435, "num_tokens": 33692700.0, "reward": 1.5448906421661377, "reward_std": 0.3771207332611084, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.7023417949676514, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9069079160690308, "step": 149 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.603125, "calib/avg_num_step_conf": 6.51953125, "calib/ece": 0.09409448818897644, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.015748031496062992, "calib/gap": 0.043016414141414105, "calib/mean_conf": 0.661023622047244, "calib/mu_c": 0.6796527777777778, "calib/mu_w": 0.6366363636363637, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09409448818897644, "calib/std_conf": 0.11753595292250137, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2878.0, "completions/max_terminated_length": 2878.0, "completions/mean_length": 517.515625, "completions/mean_terminated_length": 517.515625, "completions/min_length": 191.0, "completions/min_terminated_length": 191.0, "epoch": 0.16, "grad_norm": 0.01054420880973339, "learning_rate": 1.3888888888888892e-06, "loss": 0.0181, "num_tokens": 33930144.0, "reward": 1.7520647048950195, "reward_std": 0.4901063144207001, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.7453425526618958, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9113534688949585, "step": 150 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6249682499364999, "calib/avg_num_step_conf": 6.59765625, "calib/ece": 0.1635458167330677, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.027888446215139442, "calib/gap": 0.058841757683515516, "calib/mean_conf": 0.6511952191235061, "calib/mu_c": 0.680967741935484, "calib/mu_w": 0.6221259842519685, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.16035856573705176, "calib/std_conf": 0.1367645697791074, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2563.0, "completions/max_terminated_length": 2563.0, "completions/mean_length": 575.10546875, "completions/mean_terminated_length": 575.10546875, "completions/min_length": 261.0, "completions/min_terminated_length": 261.0, "epoch": 0.16106666666666666, "grad_norm": 0.011571124196052551, "learning_rate": 1.3611111111111112e-06, "loss": 0.0523, "num_tokens": 34184395.0, "reward": 1.6189985275268555, "reward_std": 0.4970731735229492, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.7191691398620605, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8974502086639404, "step": 151 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5705645161290323, "calib/avg_num_step_conf": 6.83984375, "calib/ece": 0.14840000000000003, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.008, "calib/gap": 0.031455453149001555, "calib/mean_conf": 0.6358400000000001, "calib/mu_c": 0.6516935483870969, "calib/mu_w": 0.6202380952380954, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14412000000000003, "calib/std_conf": 0.1292358092790075, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2007.0, "completions/max_terminated_length": 2007.0, "completions/mean_length": 501.0078125, "completions/mean_terminated_length": 513.0320434570312, "completions/min_length": 0.0, "completions/min_terminated_length": 246.0, "epoch": 0.16213333333333332, "grad_norm": 0.01134491991251707, "learning_rate": 1.3333333333333334e-06, "loss": -0.0132, "num_tokens": 34418045.0, "reward": 1.6180343627929688, "reward_std": 0.458588182926178, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.712388277053833, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9003739953041077, "step": 152 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.591959215281363, "calib/avg_num_step_conf": 6.7578125, "calib/ece": 0.08079051383399205, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.011857707509881422, "calib/gap": 0.03945211667527104, "calib/mean_conf": 0.6437154150197629, "calib/mu_c": 0.6599328859060403, "calib/mu_w": 0.6204807692307692, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06778656126482208, "calib/std_conf": 0.12695751849145287, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2129.0, "completions/max_terminated_length": 2129.0, "completions/mean_length": 523.40234375, "completions/mean_terminated_length": 527.5236206054688, "completions/min_length": 0.0, "completions/min_terminated_length": 257.0, "epoch": 0.1632, "grad_norm": 0.011443248949944973, "learning_rate": 1.3055555555555556e-06, "loss": -0.0423, "num_tokens": 34659356.0, "reward": 1.7793679237365723, "reward_std": 0.4185265600681305, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7461199760437012, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9104143381118774, "step": 153 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5889194980104072, "calib/avg_num_step_conf": 5.8046875, "calib/ece": 0.161953125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.015625, "calib/gap": 0.029592898683807545, "calib/mean_conf": 0.62546875, "calib/mu_c": 0.6410743801652891, "calib/mu_w": 0.6114814814814815, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1573828125, "calib/std_conf": 0.1136734589666273, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1215.0, "completions/max_terminated_length": 1215.0, "completions/mean_length": 478.8125, "completions/mean_terminated_length": 480.6902160644531, "completions/min_length": 0.0, "completions/min_terminated_length": 233.0, "epoch": 0.16426666666666667, "grad_norm": 0.012007861398160458, "learning_rate": 1.2777777777777779e-06, "loss": 0.0362, "num_tokens": 34886372.0, "reward": 1.6205209493637085, "reward_std": 0.3650030791759491, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.7261112928390503, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9278475046157837, "step": 154 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6128, "calib/avg_num_step_conf": 5.8984375, "calib/ece": 0.1398431372549019, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.057043076923077085, "calib/mean_conf": 0.6180392156862746, "calib/mu_c": 0.64712, "calib/mu_w": 0.5900769230769229, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13384313725490193, "calib/std_conf": 0.14083810212847075, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1113.0, "completions/max_terminated_length": 1113.0, "completions/mean_length": 467.5, "completions/mean_terminated_length": 469.3333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 241.0, "epoch": 0.16533333333333333, "grad_norm": 0.012868484482169151, "learning_rate": 1.25e-06, "loss": 0.0052, "num_tokens": 35113268.0, "reward": 1.648886799812317, "reward_std": 0.3540608584880829, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.7395272850990295, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9341446757316589, "step": 155 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6274998402657976, "calib/avg_num_step_conf": 6.26953125, "calib/ece": 0.05103174603174597, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.01984126984126984, "calib/gap": 0.07234809277362453, "calib/mean_conf": 0.6077777777777776, "calib/mu_c": 0.6396453900709219, "calib/mu_w": 0.5672972972972974, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.049642857142857086, "calib/std_conf": 0.14023443460516388, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2975.0, "completions/max_terminated_length": 2975.0, "completions/mean_length": 542.72265625, "completions/mean_terminated_length": 544.8510131835938, "completions/min_length": 0.0, "completions/min_terminated_length": 221.0, "epoch": 0.1664, "grad_norm": 0.010311256162822247, "learning_rate": 1.2222222222222223e-06, "loss": 0.058, "num_tokens": 35356965.0, "reward": 1.7402732372283936, "reward_std": 0.4105079770088196, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7552226185798645, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9089950919151306, "step": 156 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6612486612486611, "calib/avg_num_step_conf": 6.85546875, "calib/ece": 0.08480314960629923, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.031496062992125984, "calib/gap": 0.08308196308196314, "calib/mean_conf": 0.6142519685039369, "calib/mu_c": 0.6505594405594406, "calib/mu_w": 0.5674774774774775, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06803149606299214, "calib/std_conf": 0.15597051853990004, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2884.0, "completions/max_terminated_length": 2884.0, "completions/mean_length": 515.26171875, "completions/mean_terminated_length": 515.26171875, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 0.16746666666666668, "grad_norm": 0.010643880814313889, "learning_rate": 1.1944444444444446e-06, "loss": 0.0583, "num_tokens": 35592600.0, "reward": 1.7511237859725952, "reward_std": 0.4374259114265442, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.7581789493560791, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9181913733482361, "step": 157 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5130963213310802, "calib/avg_num_step_conf": 5.84375, "calib/ece": 0.1289019607843137, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": -0.0014753672169505183, "calib/mean_conf": 0.6472549019607844, "calib/mu_c": 0.6466878980891719, "calib/mu_w": 0.6481632653061224, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08023529411764703, "calib/std_conf": 0.13824500035455026, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2684.0, "completions/max_terminated_length": 2684.0, "completions/mean_length": 468.140625, "completions/mean_terminated_length": 468.140625, "completions/min_length": 232.0, "completions/min_terminated_length": 232.0, "epoch": 0.16853333333333334, "grad_norm": 0.012019583955407143, "learning_rate": 1.1666666666666668e-06, "loss": 0.039, "num_tokens": 35817684.0, "reward": 1.8365700244903564, "reward_std": 0.4772275984287262, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.7396761775016785, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9347293376922607, "step": 158 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5564952213279678, "calib/avg_num_step_conf": 5.64453125, "calib/ece": 0.12019685039370082, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.02273390342052295, "calib/mean_conf": 0.6061023622047244, "calib/mu_c": 0.6161267605633801, "calib/mu_w": 0.5933928571428572, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08362204724409446, "calib/std_conf": 0.1555387049405307, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1125.0, "completions/max_terminated_length": 1125.0, "completions/mean_length": 447.15625, "completions/mean_terminated_length": 448.9098205566406, "completions/min_length": 0.0, "completions/min_terminated_length": 217.0, "epoch": 0.1696, "grad_norm": 0.01172886323183775, "learning_rate": 1.138888888888889e-06, "loss": -0.0039, "num_tokens": 36036940.0, "reward": 1.7441353797912598, "reward_std": 0.3740137219429016, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7325222492218018, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9315195679664612, "step": 159 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5577742946708463, "calib/avg_num_step_conf": 5.7734375, "calib/ece": 0.09682352941176474, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.023529411764705882, "calib/gap": 0.027965517241379412, "calib/mean_conf": 0.6369019607843138, "calib/mu_c": 0.6489655172413793, "calib/mu_w": 0.6209999999999999, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08254901960784318, "calib/std_conf": 0.1524833973478541, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2676.0, "completions/max_terminated_length": 2676.0, "completions/mean_length": 498.484375, "completions/mean_terminated_length": 498.484375, "completions/min_length": 198.0, "completions/min_terminated_length": 198.0, "epoch": 0.17066666666666666, "grad_norm": 0.011179703287780285, "learning_rate": 1.111111111111111e-06, "loss": 0.0211, "num_tokens": 36269392.0, "reward": 1.7648649215698242, "reward_std": 0.3944992423057556, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.7376238107681274, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9312103986740112, "step": 160 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6658963585434174, "calib/avg_num_step_conf": 5.90625, "calib/ece": 0.08980314960629919, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.015748031496062992, "calib/gap": 0.0897212885154064, "calib/mean_conf": 0.6137401574803149, "calib/mu_c": 0.6434117647058825, "calib/mu_w": 0.5536904761904761, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.017125984251968505, "calib/std_conf": 0.16111260367997945, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2777.0, "completions/max_terminated_length": 2777.0, "completions/mean_length": 477.1328125, "completions/mean_terminated_length": 477.1328125, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.17173333333333332, "grad_norm": 0.012054560706019402, "learning_rate": 1.0833333333333335e-06, "loss": 0.0817, "num_tokens": 36495458.0, "reward": 1.9207186698913574, "reward_std": 0.40740329027175903, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.7831676006317139, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9309569597244263, "step": 161 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5349786931818181, "calib/avg_num_step_conf": 5.734375, "calib/ece": 0.134921875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.01171875, "calib/gap": 0.024159090909090852, "calib/mean_conf": 0.617734375, "calib/mu_c": 0.6252840909090909, "calib/mu_w": 0.601125, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03257812500000002, "calib/std_conf": 0.16187241717896034, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 978.0, "completions/max_terminated_length": 978.0, "completions/mean_length": 450.125, "completions/mean_terminated_length": 451.8902282714844, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.1728, "grad_norm": 0.011963211931288242, "learning_rate": 1.0555555555555557e-06, "loss": 0.0336, "num_tokens": 36714834.0, "reward": 1.9583287239074707, "reward_std": 0.3349304795265198, "rewards/accuracy_reward_step": 0.6875, "rewards/final_brier_reward_step": 0.764467179775238, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9438478946685791, "step": 162 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6884183768367537, "calib/avg_num_step_conf": 6.05078125, "calib/ece": 0.1347637795275591, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.023622047244094488, "calib/gap": 0.09440944881889746, "calib/mean_conf": 0.6204330708661416, "calib/mu_c": 0.6676377952755905, "calib/mu_w": 0.5732283464566931, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.12759842519685038, "calib/std_conf": 0.15417359530410835, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2032.0, "completions/max_terminated_length": 2032.0, "completions/mean_length": 519.28515625, "completions/mean_terminated_length": 521.3215942382812, "completions/min_length": 0.0, "completions/min_terminated_length": 193.0, "epoch": 0.17386666666666667, "grad_norm": 0.010253848508000374, "learning_rate": 1.0277777777777777e-06, "loss": 0.0155, "num_tokens": 36952603.0, "reward": 1.65916907787323, "reward_std": 0.3765425980091095, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.7530019283294678, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9227369427680969, "step": 163 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6116176005064894, "calib/avg_num_step_conf": 6.2265625, "calib/ece": 0.12186507936507934, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.027777777777777776, "calib/gap": 0.06605698005698024, "calib/mean_conf": 0.6238492063492063, "calib/mu_c": 0.6545185185185186, "calib/mu_w": 0.5884615384615384, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10499999999999998, "calib/std_conf": 0.16777448807424944, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2938.0, "completions/max_terminated_length": 2938.0, "completions/mean_length": 537.68359375, "completions/mean_terminated_length": 539.7921752929688, "completions/min_length": 0.0, "completions/min_terminated_length": 253.0, "epoch": 0.17493333333333333, "grad_norm": 0.010474850423634052, "learning_rate": 1.0000000000000002e-06, "loss": 0.0236, "num_tokens": 37196386.0, "reward": 1.6978435516357422, "reward_std": 0.41331610083580017, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.7365285158157349, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9220330715179443, "step": 164 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5495690991381983, "calib/avg_num_step_conf": 5.90234375, "calib/ece": 0.1635826771653543, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.027559055118110236, "calib/gap": 0.03362204724409468, "calib/mean_conf": 0.6525590551181102, "calib/mu_c": 0.6693700787401576, "calib/mu_w": 0.635748031496063, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.15807086614173227, "calib/std_conf": 0.1584426746749824, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2493.0, "completions/max_terminated_length": 2493.0, "completions/mean_length": 505.2109375, "completions/mean_terminated_length": 505.2109375, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.176, "grad_norm": 0.010923215188086033, "learning_rate": 9.722222222222224e-07, "loss": 0.0323, "num_tokens": 37431296.0, "reward": 1.6498363018035889, "reward_std": 0.42734792828559875, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.7128199338912964, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.925588071346283, "step": 165 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6211840888066605, "calib/avg_num_step_conf": 6.22265625, "calib/ece": 0.10615686274509803, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.027450980392156862, "calib/gap": 0.06878485529271827, "calib/mean_conf": 0.6639607843137255, "calib/mu_c": 0.6893167701863353, "calib/mu_w": 0.620531914893617, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.06937254901960783, "calib/std_conf": 0.17315186135176522, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3072.0, "completions/max_terminated_length": 3072.0, "completions/mean_length": 499.42578125, "completions/mean_terminated_length": 499.42578125, "completions/min_length": 216.0, "completions/min_terminated_length": 216.0, "epoch": 0.17706666666666668, "grad_norm": 0.011256815865635872, "learning_rate": 9.444444444444445e-07, "loss": 0.0294, "num_tokens": 37665333.0, "reward": 1.859373688697815, "reward_std": 0.33840852975845337, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.7575469017028809, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9299476146697998, "step": 166 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5554705710955712, "calib/avg_num_step_conf": 6.4765625, "calib/ece": 0.12082677165354336, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.047244094488188976, "calib/gap": 0.04326194638694636, "calib/mean_conf": 0.6717716535433071, "calib/mu_c": 0.6850568181818182, "calib/mu_w": 0.6417948717948718, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0498425196850394, "calib/std_conf": 0.173406158741239, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2718.0, "completions/max_terminated_length": 2718.0, "completions/mean_length": 500.79296875, "completions/mean_terminated_length": 502.75689697265625, "completions/min_length": 0.0, "completions/min_terminated_length": 237.0, "epoch": 0.17813333333333334, "grad_norm": 0.011857429519295692, "learning_rate": 9.166666666666666e-07, "loss": 0.0083, "num_tokens": 37899144.0, "reward": 1.94930100440979, "reward_std": 0.2797129452228546, "rewards/accuracy_reward_step": 0.6875, "rewards/final_brier_reward_step": 0.7690542936325073, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9187746644020081, "step": 167 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6973217677599088, "calib/avg_num_step_conf": 5.78515625, "calib/ece": 0.0985882352941177, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.09019607843137255, "calib/gap": 0.1266328985690769, "calib/mean_conf": 0.6668235294117648, "calib/mu_c": 0.7194630872483222, "calib/mu_w": 0.5928301886792453, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.09054901960784317, "calib/std_conf": 0.18775586003046227, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2249.0, "completions/max_terminated_length": 2249.0, "completions/mean_length": 497.48828125, "completions/mean_terminated_length": 497.48828125, "completions/min_length": 214.0, "completions/min_terminated_length": 214.0, "epoch": 0.1792, "grad_norm": 0.012170032598078251, "learning_rate": 8.88888888888889e-07, "loss": 0.0252, "num_tokens": 38131173.0, "reward": 1.7952685356140137, "reward_std": 0.471716046333313, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7702500224113464, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9342615604400635, "step": 168 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6149819269599901, "calib/avg_num_step_conf": 5.953125, "calib/ece": 0.17509803921568623, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.08627450980392157, "calib/gap": 0.08630188208899436, "calib/mean_conf": 0.7182352941176472, "calib/mu_c": 0.7564788732394367, "calib/mu_w": 0.6701769911504424, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.168235294117647, "calib/std_conf": 0.18560030724231816, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2180.0, "completions/max_terminated_length": 2180.0, "completions/mean_length": 493.14453125, "completions/mean_terminated_length": 495.0784606933594, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.18026666666666666, "grad_norm": 0.01110941730439663, "learning_rate": 8.611111111111112e-07, "loss": -0.0118, "num_tokens": 38361602.0, "reward": 1.7430405616760254, "reward_std": 0.4263700544834137, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7285687327384949, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9310938119888306, "step": 169 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6479359250348233, "calib/avg_num_step_conf": 6.6796875, "calib/ece": 0.15203921568627451, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.08627450980392157, "calib/gap": 0.09772825123464624, "calib/mean_conf": 0.7312549019607842, "calib/mu_c": 0.7718791946308726, "calib/mu_w": 0.6741509433962264, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1494901960784314, "calib/std_conf": 0.1699919078970543, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2789.0, "completions/max_terminated_length": 2789.0, "completions/mean_length": 507.7734375, "completions/mean_terminated_length": 507.7734375, "completions/min_length": 231.0, "completions/min_terminated_length": 231.0, "epoch": 0.18133333333333335, "grad_norm": 0.010340871289372444, "learning_rate": 8.333333333333333e-07, "loss": 0.0158, "num_tokens": 38595744.0, "reward": 1.7924294471740723, "reward_std": 0.3177809715270996, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7511488199234009, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9341940879821777, "step": 170 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5383407243163341, "calib/avg_num_step_conf": 5.66796875, "calib/ece": 0.29325490196078435, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.13333333333333333, "calib/gap": 0.021783074648928258, "calib/mean_conf": 0.7370980392156863, "calib/mu_c": 0.7483739837398373, "calib/mu_w": 0.7265909090909091, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.274, "calib/std_conf": 0.19498066821238325, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1677.0, "completions/max_terminated_length": 1677.0, "completions/mean_length": 497.26953125, "completions/mean_terminated_length": 499.2196350097656, "completions/min_length": 0.0, "completions/min_terminated_length": 211.0, "epoch": 0.1824, "grad_norm": 0.010973098687827587, "learning_rate": 8.055555555555557e-07, "loss": 0.0116, "num_tokens": 38829941.0, "reward": 1.6121565103530884, "reward_std": 0.4386375844478607, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6518382430076599, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9296000003814697, "step": 171 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6243540051679586, "calib/avg_num_step_conf": 5.921875, "calib/ece": 0.13403162055335982, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.17391304347826086, "calib/gap": 0.06261915015790998, "calib/mean_conf": 0.7865217391304348, "calib/mu_c": 0.8065697674418606, "calib/mu_w": 0.7439506172839506, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12035573122529655, "calib/std_conf": 0.1586970057877189, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2656.0, "completions/max_terminated_length": 2656.0, "completions/mean_length": 497.6328125, "completions/mean_terminated_length": 497.6328125, "completions/min_length": 204.0, "completions/min_terminated_length": 204.0, "epoch": 0.18346666666666667, "grad_norm": 0.011852969415485859, "learning_rate": 7.777777777777779e-07, "loss": 0.0177, "num_tokens": 39060687.0, "reward": 1.9252095222473145, "reward_std": 0.41335391998291016, "rewards/accuracy_reward_step": 0.671875, "rewards/final_brier_reward_step": 0.7639777660369873, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9290477633476257, "step": 172 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5206237424547284, "calib/avg_num_step_conf": 6.6015625, "calib/ece": 0.24763779527559063, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.15748031496062992, "calib/gap": 0.05143234406438624, "calib/mean_conf": 0.7633070866141732, "calib/mu_c": 0.7859859154929577, "calib/mu_w": 0.7345535714285715, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22594488188976383, "calib/std_conf": 0.20169187975530575, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2978.0, "completions/max_terminated_length": 2978.0, "completions/mean_length": 534.05859375, "completions/mean_terminated_length": 534.05859375, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 0.18453333333333333, "grad_norm": 0.010401809588074684, "learning_rate": 7.5e-07, "loss": 0.002, "num_tokens": 39300566.0, "reward": 1.7318034172058105, "reward_std": 0.47870710492134094, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.6910054683685303, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9237080812454224, "step": 173 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5059820538384845, "calib/avg_num_step_conf": 6.05078125, "calib/ece": 0.29574803149606305, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.21653543307086615, "calib/gap": 0.016704885343967857, "calib/mean_conf": 0.7960629921259843, "calib/mu_c": 0.8038235294117646, "calib/mu_w": 0.7871186440677967, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2781889763779528, "calib/std_conf": 0.17347834501594825, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2614.0, "completions/max_terminated_length": 2614.0, "completions/mean_length": 549.515625, "completions/mean_terminated_length": 549.515625, "completions/min_length": 249.0, "completions/min_terminated_length": 249.0, "epoch": 0.1856, "grad_norm": 0.011031479574739933, "learning_rate": 7.222222222222222e-07, "loss": 0.0039, "num_tokens": 39545474.0, "reward": 1.6889164447784424, "reward_std": 0.5492876768112183, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.656374990940094, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9274158477783203, "step": 174 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6095146871008942, "calib/avg_num_step_conf": 5.74609375, "calib/ece": 0.34766798418972344, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.16996047430830039, "calib/gap": 0.0715989782886336, "calib/mean_conf": 0.7741501976284585, "calib/mu_c": 0.8151851851851852, "calib/mu_w": 0.7435862068965516, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3474703557312254, "calib/std_conf": 0.1827365789396574, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1173.0, "completions/max_terminated_length": 1173.0, "completions/mean_length": 506.0078125, "completions/mean_terminated_length": 507.9921875, "completions/min_length": 0.0, "completions/min_terminated_length": 233.0, "epoch": 0.18666666666666668, "grad_norm": 0.011073246598243713, "learning_rate": 6.944444444444446e-07, "loss": -0.0056, "num_tokens": 39780836.0, "reward": 1.515343189239502, "reward_std": 0.41250574588775635, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.628931999206543, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9246280193328857, "step": 175 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6658396946564885, "calib/avg_num_step_conf": 5.921875, "calib/ece": 0.2586454183266932, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.14741035856573706, "calib/gap": 0.10823982188295167, "calib/mean_conf": 0.7529083665338645, "calib/mu_c": 0.8046564885496182, "calib/mu_w": 0.6964166666666666, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24482071713147407, "calib/std_conf": 0.21151866816381465, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2594.0, "completions/max_terminated_length": 2594.0, "completions/mean_length": 515.3125, "completions/mean_terminated_length": 517.3333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 0.18773333333333334, "grad_norm": 0.01043481845408678, "learning_rate": 6.666666666666667e-07, "loss": 0.0522, "num_tokens": 40016820.0, "reward": 1.6600654125213623, "reward_std": 0.4616318941116333, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6926000118255615, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9164116382598877, "step": 176 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6783281339530146, "calib/avg_num_step_conf": 6.18359375, "calib/ece": 0.24646825396825398, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.2222222222222222, "calib/gap": 0.11471764181575084, "calib/mean_conf": 0.7980555555555556, "calib/mu_c": 0.8494964028776978, "calib/mu_w": 0.734778761061947, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.24646825396825398, "calib/std_conf": 0.18074962098164638, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2641.0, "completions/max_terminated_length": 2641.0, "completions/mean_length": 528.4921875, "completions/mean_terminated_length": 530.5647583007812, "completions/min_length": 0.0, "completions/min_terminated_length": 219.0, "epoch": 0.1888, "grad_norm": 0.011687429621815681, "learning_rate": 6.388888888888889e-07, "loss": 0.0367, "num_tokens": 40255946.0, "reward": 1.7122498750686646, "reward_std": 0.40932321548461914, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7048050165176392, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9176322221755981, "step": 177 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5829978915085298, "calib/avg_num_step_conf": 5.95703125, "calib/ece": 0.28884920634920624, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.25, "calib/gap": 0.04688709986582351, "calib/mean_conf": 0.8301984126984128, "calib/mu_c": 0.8508510638297875, "calib/mu_w": 0.803963963963964, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.27976190476190466, "calib/std_conf": 0.1538125286370861, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2468.0, "completions/max_terminated_length": 2468.0, "completions/mean_length": 478.14453125, "completions/mean_terminated_length": 481.9094543457031, "completions/min_length": 0.0, "completions/min_terminated_length": 235.0, "epoch": 0.18986666666666666, "grad_norm": 0.011508328840136528, "learning_rate": 6.111111111111112e-07, "loss": 0.0095, "num_tokens": 40484423.0, "reward": 1.7157710790634155, "reward_std": 0.5118218660354614, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6691105365753174, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9205360412597656, "step": 178 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6051773729626079, "calib/avg_num_step_conf": 5.796875, "calib/ece": 0.24692913385826748, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.25196850393700787, "calib/gap": 0.07034196228827116, "calib/mean_conf": 0.8307874015748031, "calib/mu_c": 0.8598657718120806, "calib/mu_w": 0.7895238095238094, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.245551181102362, "calib/std_conf": 0.14758212073609975, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1484.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 496.484375, "completions/mean_terminated_length": 500.3937072753906, "completions/min_length": 0.0, "completions/min_terminated_length": 217.0, "epoch": 0.19093333333333334, "grad_norm": 0.010500260628759861, "learning_rate": 5.833333333333334e-07, "loss": 0.0041, "num_tokens": 40717787.0, "reward": 1.777552843093872, "reward_std": 0.45863401889801025, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7046679258346558, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9289811849594116, "step": 179 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5870261437908497, "calib/avg_num_step_conf": 6.9765625, "calib/ece": 0.20739130434782602, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.2490118577075099, "calib/gap": 0.07275947712418296, "calib/mean_conf": 0.8066007905138339, "calib/mu_c": 0.835359477124183, "calib/mu_w": 0.7626000000000001, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20462450592885367, "calib/std_conf": 0.1668929822049343, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2883.0, "completions/max_terminated_length": 2883.0, "completions/mean_length": 528.54296875, "completions/mean_terminated_length": 530.61572265625, "completions/min_length": 0.0, "completions/min_terminated_length": 237.0, "epoch": 0.192, "grad_norm": 0.013314173556864262, "learning_rate": 5.555555555555555e-07, "loss": 0.0148, "num_tokens": 40956950.0, "reward": 1.8024948835372925, "reward_std": 0.40265530347824097, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7186331748962402, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9288461208343506, "step": 180 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.636540513399862, "calib/avg_num_step_conf": 5.78125, "calib/ece": 0.3806692913385827, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.2047244094488189, "calib/gap": 0.06348145358689505, "calib/mean_conf": 0.821220472440945, "calib/mu_c": 0.8564601769911504, "calib/mu_w": 0.7929787234042553, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.378503937007874, "calib/std_conf": 0.1561535895132963, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2767.0, "completions/max_terminated_length": 2767.0, "completions/mean_length": 492.48046875, "completions/mean_terminated_length": 494.41180419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 195.0, "epoch": 0.19306666666666666, "grad_norm": 0.01176389493048191, "learning_rate": 5.277777777777779e-07, "loss": 0.0108, "num_tokens": 41189289.0, "reward": 1.5452067852020264, "reward_std": 0.444354772567749, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6135472655296326, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9344671964645386, "step": 181 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6405498505134538, "calib/avg_num_step_conf": 5.984375, "calib/ece": 0.22262745098039208, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.33725490196078434, "calib/gap": 0.0973293903548682, "calib/mean_conf": 0.8363529411764705, "calib/mu_c": 0.8737579617834396, "calib/mu_w": 0.7764285714285714, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.22164705882352934, "calib/std_conf": 0.15809786290793232, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2870.0, "completions/max_terminated_length": 2870.0, "completions/mean_length": 515.68359375, "completions/mean_terminated_length": 515.68359375, "completions/min_length": 266.0, "completions/min_terminated_length": 266.0, "epoch": 0.19413333333333332, "grad_norm": 0.010333042591810226, "learning_rate": 5.000000000000001e-07, "loss": 0.0207, "num_tokens": 41427464.0, "reward": 1.8302533626556396, "reward_std": 0.40433841943740845, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.729537844657898, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9274133443832397, "step": 182 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6521644753162986, "calib/avg_num_step_conf": 6.12890625, "calib/ece": 0.2826274509803923, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.3176470588235294, "calib/gap": 0.10953671545522214, "calib/mean_conf": 0.8277254901960784, "calib/mu_c": 0.8775539568345324, "calib/mu_w": 0.7680172413793103, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2826274509803923, "calib/std_conf": 0.17051572044499988, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1567.0, "completions/max_terminated_length": 1567.0, "completions/mean_length": 526.44921875, "completions/mean_terminated_length": 528.5137329101562, "completions/min_length": 0.0, "completions/min_terminated_length": 229.0, "epoch": 0.1952, "grad_norm": 0.010599082335829735, "learning_rate": 4.7222222222222226e-07, "loss": -0.0175, "num_tokens": 41668915.0, "reward": 1.7200071811676025, "reward_std": 0.47910067439079285, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.694678544998169, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9353501796722412, "step": 183 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5091120060064159, "calib/avg_num_step_conf": 6.234375, "calib/ece": 0.22690476190476203, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.3333333333333333, "calib/gap": 0.04325848064978499, "calib/mean_conf": 0.8535714285714286, "calib/mu_c": 0.8691925465838509, "calib/mu_w": 0.8259340659340659, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2207936507936509, "calib/std_conf": 0.13639906603573024, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3048.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 523.75390625, "completions/mean_terminated_length": 527.8779296875, "completions/min_length": 0.0, "completions/min_terminated_length": 245.0, "epoch": 0.19626666666666667, "grad_norm": 0.011380488984286785, "learning_rate": 4.444444444444445e-07, "loss": 0.0052, "num_tokens": 41908276.0, "reward": 1.8424732685089111, "reward_std": 0.541731595993042, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.7132359743118286, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9144695997238159, "step": 184 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6891203703703704, "calib/avg_num_step_conf": 5.3828125, "calib/ece": 0.25826612903225804, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.2701612903225806, "calib/gap": 0.1236746031746031, "calib/mean_conf": 0.8159274193548386, "calib/mu_c": 0.8697857142857144, "calib/mu_w": 0.7461111111111113, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.25483870967741934, "calib/std_conf": 0.17083271733685884, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2932.0, "completions/max_terminated_length": 2932.0, "completions/mean_length": 525.87890625, "completions/mean_terminated_length": 534.2261962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 241.0, "epoch": 0.19733333333333333, "grad_norm": 0.009946313686668873, "learning_rate": 4.1666666666666667e-07, "loss": 0.0001, "num_tokens": 42149821.0, "reward": 1.7037546634674072, "reward_std": 0.407586008310318, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6962480545043945, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.9078329205513, "step": 185 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4974448460675558, "calib/avg_num_step_conf": 5.62109375, "calib/ece": 0.30156862745098034, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.23529411764705882, "calib/gap": -0.0019986289417922576, "calib/mean_conf": 0.8072941176470587, "calib/mu_c": 0.8064084507042254, "calib/mu_w": 0.8084070796460177, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.27599999999999997, "calib/std_conf": 0.17284735350145441, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1195.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 495.16015625, "completions/mean_terminated_length": 497.10198974609375, "completions/min_length": 0.0, "completions/min_terminated_length": 179.0, "epoch": 0.1984, "grad_norm": 0.01056612841784954, "learning_rate": 3.8888888888888895e-07, "loss": -0.0087, "num_tokens": 42381622.0, "reward": 1.7294514179229736, "reward_std": 0.42536190152168274, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.6570781469345093, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9404152631759644, "step": 186 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.549240974614109, "calib/avg_num_step_conf": 6.76171875, "calib/ece": 0.2944223107569721, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.2749003984063745, "calib/gap": 0.032536037759918424, "calib/mean_conf": 0.8079681274900399, "calib/mu_c": 0.8231343283582089, "calib/mu_w": 0.7905982905982905, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.28426294820717135, "calib/std_conf": 0.17380054219194718, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2903.0, "completions/max_terminated_length": 2903.0, "completions/mean_length": 550.0546875, "completions/mean_terminated_length": 554.3858032226562, "completions/min_length": 0.0, "completions/min_terminated_length": 221.0, "epoch": 0.19946666666666665, "grad_norm": 0.009696499444544315, "learning_rate": 3.611111111111111e-07, "loss": 0.0437, "num_tokens": 42623980.0, "reward": 1.6667132377624512, "reward_std": 0.49259153008461, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.6490710973739624, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9162194132804871, "step": 187 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5434809809809811, "calib/avg_num_step_conf": 5.87109375, "calib/ece": 0.263046875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.31640625, "calib/gap": 0.04141641641641658, "calib/mean_conf": 0.823203125, "calib/mu_c": 0.8406756756756758, "calib/mu_w": 0.7992592592592592, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.25406249999999997, "calib/std_conf": 0.17621867449346668, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1513.0, "completions/max_terminated_length": 1513.0, "completions/mean_length": 506.57421875, "completions/mean_terminated_length": 508.5608215332031, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.20053333333333334, "grad_norm": 0.011942905373871326, "learning_rate": 3.3333333333333335e-07, "loss": 0.0329, "num_tokens": 42857735.0, "reward": 1.7743031978607178, "reward_std": 0.4323691129684448, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.6851898431777954, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9432730674743652, "step": 188 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5701013513513514, "calib/avg_num_step_conf": 5.5234375, "calib/ece": 0.2772549019607843, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.2823529411764706, "calib/gap": 0.048393393393393325, "calib/mean_conf": 0.816156862745098, "calib/mu_c": 0.8372222222222222, "calib/mu_w": 0.7888288288288289, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26435294117647057, "calib/std_conf": 0.18745806041268814, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3064.0, "completions/max_terminated_length": 3064.0, "completions/mean_length": 493.03125, "completions/mean_terminated_length": 493.03125, "completions/min_length": 197.0, "completions/min_terminated_length": 197.0, "epoch": 0.2016, "grad_norm": 0.012970229610800743, "learning_rate": 3.055555555555556e-07, "loss": 0.0477, "num_tokens": 43091719.0, "reward": 1.7471036911010742, "reward_std": 0.34374135732650757, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6769554615020752, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9442719221115112, "step": 189 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6959006211180124, "calib/avg_num_step_conf": 5.6328125, "calib/ece": 0.27568627450980404, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.23137254901960785, "calib/gap": 0.09738198757763972, "calib/mean_conf": 0.8097254901960784, "calib/mu_c": 0.8536428571428571, "calib/mu_w": 0.7562608695652174, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26819607843137266, "calib/std_conf": 0.1693873711559178, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1197.0, "completions/max_terminated_length": 1197.0, "completions/mean_length": 519.3828125, "completions/mean_terminated_length": 521.4196166992188, "completions/min_length": 0.0, "completions/min_terminated_length": 264.0, "epoch": 0.20266666666666666, "grad_norm": 0.010747202672064304, "learning_rate": 2.7777777777777776e-07, "loss": 0.0033, "num_tokens": 43330289.0, "reward": 1.7282007932662964, "reward_std": 0.4465250074863434, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.7012163996696472, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9381492733955383, "step": 190 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5797524142847884, "calib/avg_num_step_conf": 6.37890625, "calib/ece": 0.38432, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.276, "calib/gap": 0.05565493551105072, "calib/mean_conf": 0.8132, "calib/mu_c": 0.8441441441441442, "calib/mu_w": 0.7884892086330935, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.37676, "calib/std_conf": 0.17352625161629, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3028.0, "completions/max_terminated_length": 3028.0, "completions/mean_length": 523.70703125, "completions/mean_terminated_length": 523.70703125, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 0.20373333333333332, "grad_norm": 0.011537563987076283, "learning_rate": 2.5000000000000004e-07, "loss": 0.0868, "num_tokens": 43568526.0, "reward": 1.5195637941360474, "reward_std": 0.4578823149204254, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.5997992157936096, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9237686395645142, "step": 191 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6606349206349206, "calib/avg_num_step_conf": 5.88671875, "calib/ece": 0.22733333333333322, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.24313725490196078, "calib/gap": 0.10824761904761904, "calib/mean_conf": 0.8106274509803921, "calib/mu_c": 0.8552, "calib/mu_w": 0.7469523809523809, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2248627450980391, "calib/std_conf": 0.18391280551053915, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1478.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 497.00390625, "completions/mean_terminated_length": 498.9529724121094, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.2048, "grad_norm": 0.01216538529843092, "learning_rate": 2.2222222222222224e-07, "loss": 0.0048, "num_tokens": 43800735.0, "reward": 1.7913602590560913, "reward_std": 0.41253167390823364, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.7241019606590271, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9335265159606934, "step": 192 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6474137931034483, "calib/avg_num_step_conf": 5.59375, "calib/ece": 0.242806324110672, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.25296442687747034, "calib/gap": 0.08819476372924662, "calib/mean_conf": 0.7889723320158103, "calib/mu_c": 0.8266206896551725, "calib/mu_w": 0.7384259259259259, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.22932806324110677, "calib/std_conf": 0.19632526055027869, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2200.0, "completions/max_terminated_length": 2200.0, "completions/mean_length": 498.4375, "completions/mean_terminated_length": 500.3921813964844, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.20586666666666667, "grad_norm": 0.010590976104140282, "learning_rate": 1.9444444444444447e-07, "loss": 0.0293, "num_tokens": 44034047.0, "reward": 1.7486381530761719, "reward_std": 0.504930853843689, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.7042644023895264, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9231005907058716, "step": 193 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5860252148278242, "calib/avg_num_step_conf": 5.29296875, "calib/ece": 0.24011718750000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.328125, "calib/gap": 0.06934516715800043, "calib/mean_conf": 0.7983984375, "calib/mu_c": 0.8273825503355705, "calib/mu_w": 0.7580373831775701, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22824218750000005, "calib/std_conf": 0.20291858126489695, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1110.0, "completions/max_terminated_length": 1110.0, "completions/mean_length": 455.2109375, "completions/mean_terminated_length": 456.99609375, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.20693333333333333, "grad_norm": 0.012992671690881252, "learning_rate": 1.6666666666666668e-07, "loss": 0.0068, "num_tokens": 44256525.0, "reward": 1.7848504781723022, "reward_std": 0.37915557622909546, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7024776935577393, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9447369575500488, "step": 194 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6206853890882097, "calib/avg_num_step_conf": 5.703125, "calib/ece": 0.30819607843137264, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.2980392156862745, "calib/gap": 0.04618644067796607, "calib/mean_conf": 0.8186274509803921, "calib/mu_c": 0.8399999999999999, "calib/mu_w": 0.7938135593220338, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2947843137254903, "calib/std_conf": 0.17406579337398334, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1543.0, "completions/max_terminated_length": 1543.0, "completions/mean_length": 483.6640625, "completions/mean_terminated_length": 485.5608215332031, "completions/min_length": 0.0, "completions/min_terminated_length": 204.0, "epoch": 0.208, "grad_norm": 0.011555323377251625, "learning_rate": 1.3888888888888888e-07, "loss": -0.0162, "num_tokens": 44486327.0, "reward": 1.7013813257217407, "reward_std": 0.37319985032081604, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.6622863411903381, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.94011390209198, "step": 195 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6548038229376258, "calib/avg_num_step_conf": 5.53125, "calib/ece": 0.2738188976377952, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.31496062992125984, "calib/gap": 0.09700704225352119, "calib/mean_conf": 0.8167322834645668, "calib/mu_c": 0.8595070422535213, "calib/mu_w": 0.7625000000000001, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.26574803149606296, "calib/std_conf": 0.17315947539308332, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 981.0, "completions/max_terminated_length": 981.0, "completions/mean_length": 454.4375, "completions/mean_terminated_length": 456.2196350097656, "completions/min_length": 0.0, "completions/min_terminated_length": 252.0, "epoch": 0.20906666666666668, "grad_norm": 0.01108147669583559, "learning_rate": 1.1111111111111112e-07, "loss": 0.0098, "num_tokens": 44705207.0, "reward": 1.7390491962432861, "reward_std": 0.34051012992858887, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.6994253993034363, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.944271445274353, "step": 196 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6603300539511265, "calib/avg_num_step_conf": 5.6953125, "calib/ece": 0.3408730158730157, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.19444444444444445, "calib/gap": 0.09362361155188825, "calib/mean_conf": 0.7966666666666667, "calib/mu_c": 0.8475652173913043, "calib/mu_w": 0.753941605839416, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.34059523809523795, "calib/std_conf": 0.16966119366231228, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2876.0, "completions/max_terminated_length": 2876.0, "completions/mean_length": 518.34375, "completions/mean_terminated_length": 520.3765258789062, "completions/min_length": 0.0, "completions/min_terminated_length": 214.0, "epoch": 0.21013333333333334, "grad_norm": 0.010996755212545395, "learning_rate": 8.333333333333334e-08, "loss": 0.0487, "num_tokens": 44942959.0, "reward": 1.5602569580078125, "reward_std": 0.5935218334197998, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6435444951057434, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9334204196929932, "step": 197 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6740728692257644, "calib/avg_num_step_conf": 5.43359375, "calib/ece": 0.19486055776892441, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.18326693227091634, "calib/gap": 0.1336389069616135, "calib/mean_conf": 0.7723904382470119, "calib/mu_c": 0.8288275862068966, "calib/mu_w": 0.695188679245283, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.19478087649402404, "calib/std_conf": 0.19325648932324366, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2876.0, "completions/max_terminated_length": 2876.0, "completions/mean_length": 456.890625, "completions/mean_terminated_length": 462.3083190917969, "completions/min_length": 0.0, "completions/min_terminated_length": 213.0, "epoch": 0.2112, "grad_norm": 0.0115453926846385, "learning_rate": 5.555555555555556e-08, "loss": 0.0046, "num_tokens": 45165307.0, "reward": 1.7558231353759766, "reward_std": 0.3816918134689331, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.7314152121543884, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9325023293495178, "step": 198 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6003032649374114, "calib/avg_num_step_conf": 5.578125, "calib/ece": 0.3161847389558233, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.22088353413654618, "calib/gap": 0.07058072009291538, "calib/mean_conf": 0.8013253012048194, "calib/mu_c": 0.8361904761904764, "calib/mu_w": 0.765609756097561, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.3057429718875502, "calib/std_conf": 0.18988542995302038, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2610.0, "completions/max_terminated_length": 2610.0, "completions/mean_length": 557.78515625, "completions/mean_terminated_length": 559.9725952148438, "completions/min_length": 0.0, "completions/min_terminated_length": 204.0, "epoch": 0.21226666666666666, "grad_norm": 0.010695649310946465, "learning_rate": 2.777777777777778e-08, "loss": 0.0343, "num_tokens": 45412300.0, "reward": 1.615269422531128, "reward_std": 0.5218293070793152, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.6439590454101562, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.918681263923645, "step": 199 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6688959937361346, "calib/avg_num_step_conf": 5.4140625, "calib/ece": 0.2097647058823529, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.26666666666666666, "calib/gap": 0.10869307059898237, "calib/mean_conf": 0.8246666666666668, "calib/mu_c": 0.8660126582278482, "calib/mu_w": 0.7573195876288659, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20741176470588232, "calib/std_conf": 0.1617835235638414, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3055.0, "completions/max_terminated_length": 3055.0, "completions/mean_length": 504.14453125, "completions/mean_terminated_length": 504.14453125, "completions/min_length": 218.0, "completions/min_terminated_length": 218.0, "epoch": 0.21333333333333335, "grad_norm": 0.010254484601318836, "learning_rate": 0.0, "loss": 0.0446, "num_tokens": 45649409.0, "reward": 1.8421803712844849, "reward_std": 0.38195812702178955, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.7412852048873901, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9399361610412598, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.02880778172169812, "train_runtime": 12568.5189, "train_samples_per_second": 4.074, "train_steps_per_second": 0.016 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 45649409, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }