{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": 5.791089203391117e-07, "aux_brier/mean_group_std": 0.06289231620091193, "aux_brier/mean_r": 0.4665906001184907, "aux_brier/n_active_tok": 24.615384615384617, "aux_brier/n_groups": 5.3076923076923075, "aux_brier/n_step_records": 6.153846153846154, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.06397800147533417, "learning_rate": 2.5000000000000004e-07, "loss": 0.0332, "num_tokens": 264685.0, "reward": 0.03929531201720238, "reward_std": 0.08434611558914185, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step_strict": 0.0390625, "step": 1 }, { "aux_brier/lambda": 0.020000000000000004, "aux_brier/loss": 2.461345396504181e-08, "aux_brier/mean_group_std": 0.046398653263787254, "aux_brier/mean_r": 0.430243897442093, "aux_brier/n_active_tok": 28.42105263157895, "aux_brier/n_groups": 5.894736842105263, "aux_brier/n_step_records": 7.105263157894737, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.0062493993900716305, "learning_rate": 5.000000000000001e-07, "loss": 0.0643, "num_tokens": 533467.0, "reward": 0.08358447253704071, "reward_std": 0.15892045199871063, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step_strict": 0.08984375, "step": 2 }, { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": -6.791685690034553e-09, "aux_brier/mean_group_std": 0.026330289244419776, "aux_brier/mean_r": 0.3936683974066279, "aux_brier/n_active_tok": 20.285714285714285, "aux_brier/n_groups": 4.357142857142857, "aux_brier/n_step_records": 5.071428571428571, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.5, "calib/avg_num_step_conf": 0.29296875, "calib/ece": 0.6122222222222221, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.0546875, "calib/frac_conf_gt_0.9": 0.8888888888888888, "calib/gap": 0.0066666666666664876, "calib/mean_conf": 0.9455555555555555, "calib/mu_c": 0.9499999999999998, "calib/mu_w": 0.9433333333333334, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.09375, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6122222222222221, "calib/std_conf": 0.04099081499043713, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 2987.0, "completions/max_terminated_length": 2987.0, "completions/mean_length": 608.94140625, "completions/mean_terminated_length": 686.7356567382812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.004697302356362343, "learning_rate": 7.5e-07, "loss": 0.0239, "num_tokens": 794612.0, "reward": 0.05449219048023224, "reward_std": 0.11826249957084656, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.02265624888241291, "rewards/format_reward_step_strict": 0.05078125, "step": 3 }, { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": -1.3787337449500114e-07, "aux_brier/mean_group_std": 0.030162715461268252, "aux_brier/mean_r": 0.45508114387727977, "aux_brier/n_active_tok": 25.666666666666668, "aux_brier/n_groups": 5.583333333333333, "aux_brier/n_step_records": 6.416666666666667, "calib/answer_extract_rate": 0.09375, "calib/auroc": 0.8125, "calib/avg_num_step_conf": 0.3125, "calib/ece": 0.6748750000000001, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.875, "calib/gap": 0.07683333333333309, "calib/mean_conf": 0.924875, "calib/mu_c": 0.9824999999999999, "calib/mu_w": 0.9056666666666668, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.10546875, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.6748750000000001, "calib/std_conf": 0.15038778000555764, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 2920.0, "completions/max_terminated_length": 2920.0, "completions/mean_length": 697.66796875, "completions/mean_terminated_length": 783.3464965820312, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.004266666666666667, "grad_norm": 0.004250648897141218, "learning_rate": 1.0000000000000002e-06, "loss": 0.0249, "num_tokens": 1079383.0, "reward": 0.03757001459598541, "reward_std": 0.08593931794166565, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.009655077941715717, "rewards/format_reward_step_strict": 0.0390625, "step": 4 }, { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": -8.765388948006603e-08, "aux_brier/mean_group_std": 0.026502882796535628, "aux_brier/mean_r": 0.4053858897904748, "aux_brier/n_active_tok": 24.4, "aux_brier/n_groups": 5.3, "aux_brier/n_step_records": 6.1, "calib/answer_extract_rate": 0.0625, "calib/avg_num_step_conf": 0.24609375, "calib/ece": 0.9410000000000001, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 0.7, "calib/mean_conf": 0.9410000000000001, "calib/mu_c": NaN, "calib/mu_w": 0.9410000000000001, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.9410000000000001, "calib/std_conf": 0.05717516943569121, "calib/step_conf_rate": 0.05078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 2912.0, "completions/max_terminated_length": 2912.0, "completions/mean_length": 681.4296875, "completions/mean_terminated_length": 778.7767944335938, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.005333333333333333, "grad_norm": 0.05814728885889053, "learning_rate": 1.25e-06, "loss": 0.0012, "num_tokens": 1360517.0, "reward": 0.012670507654547691, "reward_std": 0.035837605595588684, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0038070310838520527, "rewards/format_reward_step_strict": 0.0234375, "step": 5 }, { "aux_brier/lambda": 0.02, "aux_brier/loss": 2.170477235849563e-09, "aux_brier/mean_group_std": 0.012296618979620234, "aux_brier/mean_r": 0.45331120539135794, "aux_brier/n_active_tok": 20.941176470588236, "aux_brier/n_groups": 4.9411764705882355, "aux_brier/n_step_records": 5.235294117647059, "calib/answer_extract_rate": 0.09765625, "calib/auroc": 0.7142857142857143, "calib/avg_num_step_conf": 0.359375, "calib/ece": 0.9026666666666666, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.0546875, "calib/frac_conf_gt_0.9": 0.9333333333333333, "calib/gap": 0.02214285714285713, "calib/mean_conf": 0.9693333333333334, "calib/mu_c": 0.99, "calib/mu_w": 0.9678571428571429, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.11328125, "calib/nonempty_step_conf_rate": 0.08203125, "calib/pce": 0.9026666666666666, "calib/std_conf": 0.03213858878185053, "calib/step_conf_rate": 0.08203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 587.89453125, "completions/mean_terminated_length": 624.4855346679688, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0064, "grad_norm": 0.04504093527793884, "learning_rate": 1.5e-06, "loss": 0.0441, "num_tokens": 1616970.0, "reward": 0.029170311987400055, "reward_std": 0.06491636484861374, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.007306249812245369, "rewards/format_reward_step_strict": 0.046875, "step": 6 }, { "aux_brier/lambda": 0.02, "aux_brier/loss": 1.3944936871901348e-08, "aux_brier/mean_group_std": 0.011126655246623984, "aux_brier/mean_r": 0.396032043567839, "aux_brier/n_active_tok": 20.0, "aux_brier/n_groups": 4.375, "aux_brier/n_step_records": 5.0, "calib/answer_extract_rate": 0.0859375, "calib/auroc": 0.6282051282051282, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.7468750000000001, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.0546875, "calib/frac_conf_gt_0.9": 0.875, "calib/gap": 0.03974358974358971, "calib/mean_conf": 0.934375, "calib/mu_c": 0.9666666666666667, "calib/mu_w": 0.926923076923077, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.1015625, "calib/nonempty_step_conf_rate": 0.07421875, "calib/pce": 0.7468750000000001, "calib/std_conf": 0.08313909654909656, "calib/step_conf_rate": 0.07421875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2920.0, "completions/max_terminated_length": 2920.0, "completions/mean_length": 679.609375, "completions/mean_terminated_length": 753.16015625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.0054075587540864944, "learning_rate": 1.75e-06, "loss": 0.0273, "num_tokens": 1898374.0, "reward": 0.04263964667916298, "reward_std": 0.09595522284507751, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.014308594167232513, "rewards/format_reward_step_strict": 0.0546875, "step": 7 }, { "aux_brier/lambda": 0.02, "aux_brier/loss": 1.041622826684662e-07, "aux_brier/mean_group_std": 0.03613882228162013, "aux_brier/mean_r": 0.53472203147931, "aux_brier/n_active_tok": 25.41176470588235, "aux_brier/n_groups": 5.117647058823529, "aux_brier/n_step_records": 6.352941176470588, "calib/answer_extract_rate": 0.09765625, "calib/auroc": 0.73125, "calib/avg_num_step_conf": 0.44921875, "calib/ece": 0.47611111111111104, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 0.7222222222222222, "calib/gap": 0.048499999999999766, "calib/mean_conf": 0.9205555555555556, "calib/mu_c": 0.9475, "calib/mu_w": 0.8990000000000002, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.125, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.47611111111111104, "calib/std_conf": 0.05758782878952249, "calib/step_conf_rate": 0.09375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12890625, "completions/max_length": 2998.0, "completions/max_terminated_length": 2998.0, "completions/mean_length": 596.51953125, "completions/mean_terminated_length": 684.7937622070312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.030084656551480293, "learning_rate": 2.0000000000000003e-06, "loss": 0.0551, "num_tokens": 2157595.0, "reward": 0.07156562805175781, "reward_std": 0.16391682624816895, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.036262497305870056, "rewards/format_reward_step_strict": 0.0625, "step": 8 }, { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": 2.292486337515024e-09, "aux_brier/mean_group_std": 0.01077561165240097, "aux_brier/mean_r": 0.381424414499452, "aux_brier/n_active_tok": 28.0, "aux_brier/n_groups": 6.846153846153846, "aux_brier/n_step_records": 7.0, "calib/answer_extract_rate": 0.07421875, "calib/auroc": 0.5416666666666666, "calib/avg_num_step_conf": 0.36328125, "calib/ece": 0.7832142857142855, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.7857142857142857, "calib/gap": 0.026249999999999996, "calib/mean_conf": 0.9074999999999999, "calib/mu_c": 0.9299999999999999, "calib/mu_w": 0.9037499999999999, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.0859375, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.7739285714285713, "calib/std_conf": 0.14668661931380886, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2702.0, "completions/max_terminated_length": 2702.0, "completions/mean_length": 596.78125, "completions/mean_terminated_length": 644.6244506835938, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0096, "grad_norm": 0.0032177777029573917, "learning_rate": 2.25e-06, "loss": 0.0054, "num_tokens": 2417907.0, "reward": 0.02889160066843033, "reward_std": 0.06506296992301941, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.014003905467689037, "rewards/format_reward_step_strict": 0.03515625, "step": 9 }, { "aux_brier/lambda": 0.02, "aux_brier/loss": -2.218205237792357e-08, "aux_brier/mean_group_std": 0.04597241399111326, "aux_brier/mean_r": 0.45356199130795183, "aux_brier/n_active_tok": 31.764705882352942, "aux_brier/n_groups": 5.529411764705882, "aux_brier/n_step_records": 7.9411764705882355, "calib/answer_extract_rate": 0.109375, "calib/avg_num_step_conf": 0.56640625, "calib/ece": 0.9502173913043477, "calib/final_conf_rate": 0.08984375, "calib/format_rate": 0.07421875, "calib/frac_conf_gt_0.9": 0.8695652173913043, "calib/mean_conf": 0.9502173913043477, "calib/mu_c": NaN, "calib/mu_w": 0.9502173913043477, "calib/nonempty_final_conf_rate": 0.08984375, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.11328125, "calib/pce": 0.9502173913043477, "calib/std_conf": 0.03946595287932331, "calib/step_conf_rate": 0.11328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 3011.0, "completions/max_terminated_length": 3011.0, "completions/mean_length": 606.12890625, "completions/mean_terminated_length": 671.727294921875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.008876045234501362, "learning_rate": 2.5e-06, "loss": 0.0425, "num_tokens": 2679876.0, "reward": 0.0369962640106678, "reward_std": 0.08365122228860855, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0073600588366389275, "rewards/format_reward_step_strict": 0.0703125, "step": 10 }, { "aux_brier/lambda": 0.020000000000000004, "aux_brier/loss": 2.922880463978273e-09, "aux_brier/mean_group_std": 0.0769800248991916, "aux_brier/mean_r": 0.46133195413483064, "aux_brier/n_active_tok": 30.526315789473685, "aux_brier/n_groups": 4.631578947368421, "aux_brier/n_step_records": 7.631578947368421, "calib/answer_extract_rate": 0.171875, "calib/auroc": 0.7111111111111111, "calib/avg_num_step_conf": 0.58984375, "calib/ece": 0.5960714285714286, "calib/final_conf_rate": 0.109375, "calib/format_rate": 0.09765625, "calib/frac_conf_gt_0.9": 0.8928571428571429, "calib/gap": 0.02922222222222215, "calib/mean_conf": 0.9532142857142858, "calib/mu_c": 0.9719999999999999, "calib/mu_w": 0.9427777777777777, "calib/nonempty_final_conf_rate": 0.109375, "calib/nonempty_reasoning_rate": 0.1953125, "calib/nonempty_step_conf_rate": 0.140625, "calib/pce": 0.5960714285714286, "calib/std_conf": 0.04268268061509018, "calib/step_conf_rate": 0.140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2982.0, "completions/max_terminated_length": 2982.0, "completions/mean_length": 662.08984375, "completions/mean_terminated_length": 736.9347534179688, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.011733333333333333, "grad_norm": 0.009959953837096691, "learning_rate": 2.7500000000000004e-06, "loss": 0.0603, "num_tokens": 2953851.0, "reward": 0.09444931149482727, "reward_std": 0.17308209836483002, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.041859764605760574, "rewards/format_reward_step_strict": 0.08984375, "step": 11 }, { "aux_brier/lambda": 0.020000000000000004, "aux_brier/loss": 8.366272684412479e-08, "aux_brier/mean_group_std": 0.07124658917862621, "aux_brier/mean_r": 0.5592102827551356, "aux_brier/n_active_tok": 33.28, "aux_brier/n_groups": 6.08, "aux_brier/n_step_records": 8.32, "calib/answer_extract_rate": 0.22265625, "calib/auroc": 0.6133004926108375, "calib/avg_num_step_conf": 0.83984375, "calib/ece": 0.5939511627906976, "calib/final_conf_rate": 0.16796875, "calib/format_rate": 0.13671875, "calib/frac_conf_gt_0.9": 0.7906976744186046, "calib/gap": 0.04199852216748767, "calib/mean_conf": 0.9195325581395349, "calib/mu_c": 0.9478571428571428, "calib/mu_w": 0.9058586206896552, "calib/nonempty_final_conf_rate": 0.16796875, "calib/nonempty_reasoning_rate": 0.24609375, "calib/nonempty_step_conf_rate": 0.171875, "calib/pce": 0.5939511627906976, "calib/std_conf": 0.15529226866592363, "calib/step_conf_rate": 0.171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2853.0, "completions/max_terminated_length": 2853.0, "completions/mean_length": 537.01171875, "completions/mean_terminated_length": 582.5211791992188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.07206593453884125, "learning_rate": 3e-06, "loss": 0.097, "num_tokens": 3195502.0, "reward": 0.13977420330047607, "reward_std": 0.26127028465270996, "rewards/accuracy_reward_step": 0.05859375, "rewards/final_brier_reward_step": 0.06690938025712967, "rewards/format_reward_step_strict": 0.12890625, "step": 12 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -9.631975352574351e-08, "aux_brier/mean_group_std": 0.08108022968982807, "aux_brier/mean_r": 0.5177187568628713, "aux_brier/n_active_tok": 35.82608695652174, "aux_brier/n_groups": 5.3478260869565215, "aux_brier/n_step_records": 8.956521739130435, "calib/answer_extract_rate": 0.17578125, "calib/auroc": 0.4206730769230769, "calib/avg_num_step_conf": 0.82421875, "calib/ece": 0.6829411764705884, "calib/final_conf_rate": 0.1328125, "calib/format_rate": 0.11328125, "calib/frac_conf_gt_0.9": 0.7647058823529411, "calib/gap": 0.03423076923076929, "calib/mean_conf": 0.8988235294117648, "calib/mu_c": 0.925, "calib/mu_w": 0.8907692307692308, "calib/nonempty_final_conf_rate": 0.1328125, "calib/nonempty_reasoning_rate": 0.2265625, "calib/nonempty_step_conf_rate": 0.1796875, "calib/pce": 0.6732352941176472, "calib/std_conf": 0.17333688222788718, "calib/step_conf_rate": 0.1796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 3043.0, "completions/max_terminated_length": 3043.0, "completions/mean_length": 637.61328125, "completions/mean_terminated_length": 671.7242431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.013866666666666666, "grad_norm": 0.3250519931316376, "learning_rate": 3.2500000000000002e-06, "loss": 0.0736, "num_tokens": 3463323.0, "reward": 0.0953918844461441, "reward_std": 0.21836352348327637, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.0456300750374794, "rewards/format_reward_step_strict": 0.10546875, "step": 13 }, { "aux_brier/lambda": 0.020000000000000004, "aux_brier/loss": 8.25449501344148e-09, "aux_brier/mean_group_std": 0.11988383840590024, "aux_brier/mean_r": 0.4409585831012259, "aux_brier/n_active_tok": 53.48148148148148, "aux_brier/n_groups": 8.222222222222221, "aux_brier/n_step_records": 13.37037037037037, "calib/answer_extract_rate": 0.28515625, "calib/auroc": 0.4157509157509158, "calib/avg_num_step_conf": 1.42578125, "calib/ece": 0.6659636363636363, "calib/final_conf_rate": 0.21484375, "calib/format_rate": 0.1796875, "calib/frac_conf_gt_0.9": 0.7090909090909091, "calib/gap": 0.02294139194139211, "calib/mean_conf": 0.9023272727272728, "calib/mu_c": 0.9198461538461539, "calib/mu_w": 0.8969047619047618, "calib/nonempty_final_conf_rate": 0.21484375, "calib/nonempty_reasoning_rate": 0.33203125, "calib/nonempty_step_conf_rate": 0.2421875, "calib/pce": 0.6659636363636363, "calib/std_conf": 0.15360907342346847, "calib/step_conf_rate": 0.2421875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2989.0, "completions/max_terminated_length": 2989.0, "completions/mean_length": 546.046875, "completions/mean_terminated_length": 589.82275390625, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.014933333333333333, "grad_norm": 0.21622756123542786, "learning_rate": 3.5e-06, "loss": 0.033, "num_tokens": 3708511.0, "reward": 0.15140286087989807, "reward_std": 0.26687172055244446, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.06654896587133408, "rewards/format_reward_step_strict": 0.16796875, "step": 14 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.215258963336349e-08, "aux_brier/mean_group_std": 0.09739835425774569, "aux_brier/mean_r": 0.5195538920234438, "aux_brier/n_active_tok": 48.275862068965516, "aux_brier/n_groups": 6.482758620689655, "aux_brier/n_step_records": 12.068965517241379, "calib/answer_extract_rate": 0.3046875, "calib/auroc": 0.5235404896421845, "calib/avg_num_step_conf": 1.4140625, "calib/ece": 0.7321764705882352, "calib/final_conf_rate": 0.265625, "calib/format_rate": 0.21484375, "calib/frac_conf_gt_0.9": 0.7205882352941176, "calib/gap": 0.08186064030131823, "calib/mean_conf": 0.864529411764706, "calib/mu_c": 0.9355555555555556, "calib/mu_w": 0.8536949152542374, "calib/nonempty_final_conf_rate": 0.265625, "calib/nonempty_reasoning_rate": 0.35546875, "calib/nonempty_step_conf_rate": 0.2734375, "calib/pce": 0.7321764705882352, "calib/std_conf": 0.21858148773857064, "calib/step_conf_rate": 0.2734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2914.0, "completions/max_terminated_length": 2914.0, "completions/mean_length": 517.87890625, "completions/mean_terminated_length": 559.3966064453125, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.016, "grad_norm": 0.1340445727109909, "learning_rate": 3.7500000000000005e-06, "loss": 0.083, "num_tokens": 3948968.0, "reward": 0.15124216675758362, "reward_std": 0.259046733379364, "rewards/accuracy_reward_step": 0.03515625, "rewards/final_brier_reward_step": 0.06590613722801208, "rewards/format_reward_step_strict": 0.19921875, "step": 15 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.5859946040346207e-07, "aux_brier/mean_group_std": 0.11149276619869844, "aux_brier/mean_r": 0.466011658309321, "aux_brier/n_active_tok": 53.67741935483871, "aux_brier/n_groups": 7.096774193548387, "aux_brier/n_step_records": 13.419354838709678, "calib/answer_extract_rate": 0.31640625, "calib/auroc": 0.556497175141243, "calib/avg_num_step_conf": 1.69140625, "calib/ece": 0.8198240818498452, "calib/final_conf_rate": 0.265625, "calib/format_rate": 0.2421875, "calib/frac_conf_gt_0.9": 0.8676470588235294, "calib/gap": 0.016699928263455344, "calib/mean_conf": 0.9521770230263156, "calib/mu_c": 0.9666666666666666, "calib/mu_w": 0.9499667384032112, "calib/nonempty_final_conf_rate": 0.265625, "calib/nonempty_reasoning_rate": 0.38671875, "calib/nonempty_step_conf_rate": 0.328125, "calib/pce": 0.8198240818498452, "calib/std_conf": 0.05885991926718554, "calib/step_conf_rate": 0.328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2848.0, "completions/max_terminated_length": 2848.0, "completions/mean_length": 604.10546875, "completions/mean_terminated_length": 652.5358276367188, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.017066666666666667, "grad_norm": 0.07980859279632568, "learning_rate": 4.000000000000001e-06, "loss": 0.0935, "num_tokens": 4212467.0, "reward": 0.1710590124130249, "reward_std": 0.261719286441803, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.05142351984977722, "rewards/format_reward_step_strict": 0.23046875, "step": 16 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.8866845988285696e-08, "aux_brier/mean_group_std": 0.18979239793748265, "aux_brier/mean_r": 0.5208160422229862, "aux_brier/n_active_tok": 67.875, "aux_brier/n_groups": 7.15625, "aux_brier/n_step_records": 16.96875, "calib/answer_extract_rate": 0.5, "calib/auroc": 0.6292265837543722, "calib/avg_num_step_conf": 2.16796875, "calib/ece": 0.5964815789473684, "calib/final_conf_rate": 0.4453125, "calib/format_rate": 0.3828125, "calib/frac_conf_gt_0.9": 0.7192982456140351, "calib/gap": 0.08704209094442283, "calib/mean_conf": 0.8363078947368421, "calib/mu_c": 0.8996806451612903, "calib/mu_w": 0.8126385542168675, "calib/nonempty_final_conf_rate": 0.4453125, "calib/nonempty_reasoning_rate": 0.58203125, "calib/nonempty_step_conf_rate": 0.4765625, "calib/pce": 0.5804298245614035, "calib/std_conf": 0.27625250658376566, "calib/step_conf_rate": 0.4765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2979.0, "completions/max_terminated_length": 2979.0, "completions/mean_length": 512.2578125, "completions/mean_terminated_length": 528.7822265625, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.018133333333333335, "grad_norm": 0.21119742095470428, "learning_rate": 4.25e-06, "loss": 0.1329, "num_tokens": 4447133.0, "reward": 0.3442577123641968, "reward_std": 0.4013967514038086, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.1660933494567871, "rewards/format_reward_step_strict": 0.36328125, "step": 17 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 8.09537276067187e-08, "aux_brier/mean_group_std": 0.1396391851262472, "aux_brier/mean_r": 0.49583937271553763, "aux_brier/n_active_tok": 57.935483870967744, "aux_brier/n_groups": 6.774193548387097, "aux_brier/n_step_records": 14.483870967741936, "calib/answer_extract_rate": 0.4375, "calib/auroc": 0.5814636752136751, "calib/avg_num_step_conf": 1.79296875, "calib/ece": 0.6377040816326531, "calib/final_conf_rate": 0.3828125, "calib/format_rate": 0.3203125, "calib/frac_conf_gt_0.9": 0.6122448979591837, "calib/gap": -0.0008066239316240065, "calib/mean_conf": 0.8644387755102041, "calib/mu_c": 0.8638461538461537, "calib/mu_w": 0.8646527777777777, "calib/nonempty_final_conf_rate": 0.3828125, "calib/nonempty_reasoning_rate": 0.48046875, "calib/nonempty_step_conf_rate": 0.3828125, "calib/pce": 0.6184183673469388, "calib/std_conf": 0.22205198262176296, "calib/step_conf_rate": 0.3828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 3021.0, "completions/max_terminated_length": 3021.0, "completions/mean_length": 570.4453125, "completions/mean_terminated_length": 591.2307739257812, "completions/min_length": 0.0, "completions/min_terminated_length": 23.0, "epoch": 0.0192, "grad_norm": 0.0675438642501831, "learning_rate": 4.5e-06, "loss": 0.1028, "num_tokens": 4703887.0, "reward": 0.28900033235549927, "reward_std": 0.37102437019348145, "rewards/accuracy_reward_step": 0.10546875, "rewards/final_brier_reward_step": 0.11693897098302841, "rewards/format_reward_step_strict": 0.30859375, "step": 18 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.906780661286582e-09, "aux_brier/mean_group_std": 0.2010225263591276, "aux_brier/mean_r": 0.48971788551743184, "aux_brier/n_active_tok": 102.0, "aux_brier/n_groups": 7.53125, "aux_brier/n_step_records": 25.5, "calib/answer_extract_rate": 0.73828125, "calib/auroc": 0.5835373317013464, "calib/avg_num_step_conf": 3.328125, "calib/ece": 0.6714461077844311, "calib/final_conf_rate": 0.65234375, "calib/format_rate": 0.58984375, "calib/frac_conf_gt_0.9": 0.7245508982035929, "calib/gap": 0.04353590371277005, "calib/mean_conf": 0.8882125748502995, "calib/mu_c": 0.9218421052631577, "calib/mu_w": 0.8783062015503876, "calib/nonempty_final_conf_rate": 0.65234375, "calib/nonempty_reasoning_rate": 0.80859375, "calib/nonempty_step_conf_rate": 0.71875, "calib/pce": 0.6660568862275449, "calib/std_conf": 0.1946123692703289, "calib/step_conf_rate": 0.71875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2830.0, "completions/max_terminated_length": 2830.0, "completions/mean_length": 367.4375, "completions/mean_terminated_length": 371.79449462890625, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.020266666666666665, "grad_norm": 0.4549923539161682, "learning_rate": 4.75e-06, "loss": 0.0945, "num_tokens": 4902711.0, "reward": 0.4799804091453552, "reward_std": 0.5010626316070557, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.2089841514825821, "rewards/format_reward_step_strict": 0.55078125, "step": 19 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.7392651423253636e-08, "aux_brier/mean_group_std": 0.22839283814930253, "aux_brier/mean_r": 0.5214004263634809, "aux_brier/n_active_tok": 118.625, "aux_brier/n_groups": 7.5625, "aux_brier/n_step_records": 29.65625, "calib/answer_extract_rate": 0.82421875, "calib/auroc": 0.4738038277511961, "calib/avg_num_step_conf": 3.7734375, "calib/ece": 0.6260391304347825, "calib/final_conf_rate": 0.80859375, "calib/format_rate": 0.734375, "calib/frac_conf_gt_0.9": 0.6908212560386473, "calib/gap": 0.0120172846889951, "calib/mean_conf": 0.8868120772946861, "calib/mu_c": 0.8956363636363636, "calib/mu_w": 0.8836190789473685, "calib/nonempty_final_conf_rate": 0.80859375, "calib/nonempty_reasoning_rate": 0.8984375, "calib/nonempty_step_conf_rate": 0.8359375, "calib/pce": 0.6235753623188405, "calib/std_conf": 0.17488052981739108, "calib/step_conf_rate": 0.8359375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1657.0, "completions/max_terminated_length": 1657.0, "completions/mean_length": 291.59765625, "completions/mean_terminated_length": 295.05535888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.021333333333333333, "grad_norm": 0.47028541564941406, "learning_rate": 5e-06, "loss": 0.1147, "num_tokens": 5082232.0, "reward": 0.6447250843048096, "reward_std": 0.5082762837409973, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.28983786702156067, "rewards/format_reward_step_strict": 0.69921875, "step": 20 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.1170123432057153e-09, "aux_brier/mean_group_std": 0.20927929176372112, "aux_brier/mean_r": 0.5027852482760771, "aux_brier/n_active_tok": 131.625, "aux_brier/n_groups": 8.09375, "aux_brier/n_step_records": 32.90625, "calib/answer_extract_rate": 0.87890625, "calib/auroc": 0.42809594251901945, "calib/avg_num_step_conf": 4.1640625, "calib/ece": 0.6629777777777778, "calib/final_conf_rate": 0.87890625, "calib/format_rate": 0.80078125, "calib/frac_conf_gt_0.9": 0.6888888888888889, "calib/gap": -0.0349196956889265, "calib/mean_conf": 0.8948, "calib/mu_c": 0.8685714285714285, "calib/mu_w": 0.903491124260355, "calib/nonempty_final_conf_rate": 0.87890625, "calib/nonempty_reasoning_rate": 0.9375, "calib/nonempty_step_conf_rate": 0.890625, "calib/pce": 0.6544444444444444, "calib/std_conf": 0.16456833771347928, "calib/step_conf_rate": 0.890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2887.0, "completions/max_terminated_length": 2887.0, "completions/mean_length": 302.80859375, "completions/mean_terminated_length": 303.99609375, "completions/min_length": 0.0, "completions/min_terminated_length": 17.0, "epoch": 0.0224, "grad_norm": 0.3184995949268341, "learning_rate": 4.9722222222222224e-06, "loss": 0.0606, "num_tokens": 5262711.0, "reward": 0.6963440179824829, "reward_std": 0.48389190435409546, "rewards/accuracy_reward_step": 0.234375, "rewards/final_brier_reward_step": 0.2931886613368988, "rewards/format_reward_step_strict": 0.77734375, "step": 21 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.4656056541995088e-07, "aux_brier/mean_group_std": 0.19204337763798254, "aux_brier/mean_r": 0.5070142204806846, "aux_brier/n_active_tok": 124.25, "aux_brier/n_groups": 7.3125, "aux_brier/n_step_records": 31.0625, "calib/answer_extract_rate": 0.87890625, "calib/auroc": 0.4335634689922481, "calib/avg_num_step_conf": 3.9375, "calib/ece": 0.7069954545454545, "calib/final_conf_rate": 0.859375, "calib/format_rate": 0.76953125, "calib/frac_conf_gt_0.9": 0.7318181818181818, "calib/gap": -0.025662790697674298, "calib/mean_conf": 0.9138136363636364, "calib/mu_c": 0.8937500000000002, "calib/mu_w": 0.9194127906976745, "calib/nonempty_final_conf_rate": 0.859375, "calib/nonempty_reasoning_rate": 0.9453125, "calib/nonempty_step_conf_rate": 0.87890625, "calib/pce": 0.7013136363636363, "calib/std_conf": 0.12356855585409972, "calib/step_conf_rate": 0.87890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2643.0, "completions/max_terminated_length": 2643.0, "completions/mean_length": 279.92578125, "completions/mean_terminated_length": 284.3690490722656, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.023466666666666667, "grad_norm": 0.6099845170974731, "learning_rate": 4.944444444444445e-06, "loss": 0.0658, "num_tokens": 5436188.0, "reward": 0.6216346621513367, "reward_std": 0.48374059796333313, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.24435116350650787, "rewards/format_reward_step_strict": 0.74609375, "step": 22 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.123380926464692e-08, "aux_brier/mean_group_std": 0.20206573795531604, "aux_brier/mean_r": 0.4957934768555055, "aux_brier/n_active_tok": 135.5, "aux_brier/n_groups": 8.40625, "aux_brier/n_step_records": 33.875, "calib/answer_extract_rate": 0.90234375, "calib/auroc": 0.48203124999999997, "calib/avg_num_step_conf": 4.27734375, "calib/ece": 0.6293589743589743, "calib/final_conf_rate": 0.9140625, "calib/format_rate": 0.84765625, "calib/frac_conf_gt_0.9": 0.7478632478632479, "calib/gap": 0.0027959558823530406, "calib/mean_conf": 0.895, "calib/mu_c": 0.89703125, "calib/mu_w": 0.8942352941176469, "calib/nonempty_final_conf_rate": 0.9140625, "calib/nonempty_reasoning_rate": 0.9453125, "calib/nonempty_step_conf_rate": 0.921875, "calib/pce": 0.6254273504273503, "calib/std_conf": 0.17490962257227607, "calib/step_conf_rate": 0.921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2200.0, "completions/max_terminated_length": 2200.0, "completions/mean_length": 304.3671875, "completions/mean_terminated_length": 304.3671875, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.024533333333333334, "grad_norm": 0.16705112159252167, "learning_rate": 4.9166666666666665e-06, "loss": 0.0472, "num_tokens": 5618042.0, "reward": 0.7509870529174805, "reward_std": 0.5155820846557617, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.339885950088501, "rewards/format_reward_step_strict": 0.81640625, "step": 23 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.292471454669933e-09, "aux_brier/mean_group_std": 0.19789524778785733, "aux_brier/mean_r": 0.5432200850558442, "aux_brier/n_active_tok": 141.25, "aux_brier/n_groups": 8.9375, "aux_brier/n_step_records": 35.3125, "calib/answer_extract_rate": 0.91796875, "calib/auroc": 0.43882275132275134, "calib/avg_num_step_conf": 4.5078125, "calib/ece": 0.6959071729957806, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.875, "calib/frac_conf_gt_0.9": 0.6455696202531646, "calib/gap": -0.022377645502645538, "calib/mean_conf": 0.8876371308016878, "calib/mu_c": 0.8697916666666666, "calib/mu_w": 0.8921693121693122, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.6905063291139241, "calib/std_conf": 0.160682573488824, "calib/step_conf_rate": 0.94140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2696.0, "completions/max_terminated_length": 2696.0, "completions/mean_length": 270.51953125, "completions/mean_terminated_length": 273.727294921875, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.0256, "grad_norm": 0.5515680909156799, "learning_rate": 4.888888888888889e-06, "loss": 0.0867, "num_tokens": 5791807.0, "reward": 0.6945167779922485, "reward_std": 0.4239427149295807, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.3015046715736389, "rewards/format_reward_step_strict": 0.86328125, "step": 24 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.9143667082111016e-08, "aux_brier/mean_group_std": 0.17796238549870536, "aux_brier/mean_r": 0.4622655060774658, "aux_brier/n_active_tok": 147.75, "aux_brier/n_groups": 9.375, "aux_brier/n_step_records": 36.9375, "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.4699640445496798, "calib/avg_num_step_conf": 4.62109375, "calib/ece": 0.6515300546448086, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.8828125, "calib/frac_conf_gt_0.9": 0.7295081967213115, "calib/gap": 0.01956502674734728, "calib/mean_conf": 0.9086612021857924, "calib/mu_c": 0.9231746031746033, "calib/mu_w": 0.903609576427256, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.6509972677595627, "calib/std_conf": 0.14453723644997096, "calib/step_conf_rate": 0.953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1727.0, "completions/max_terminated_length": 1727.0, "completions/mean_length": 262.4453125, "completions/mean_terminated_length": 262.4453125, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.02666666666666667, "grad_norm": 0.38781338930130005, "learning_rate": 4.861111111111111e-06, "loss": 0.0489, "num_tokens": 5962217.0, "reward": 0.7675454020500183, "reward_std": 0.4628976583480835, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.32799413800239563, "rewards/format_reward_step_strict": 0.87890625, "step": 25 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.5465298583428044e-08, "aux_brier/mean_group_std": 0.19336496748253895, "aux_brier/mean_r": 0.5391585063432449, "aux_brier/n_active_tok": 151.125, "aux_brier/n_groups": 8.4375, "aux_brier/n_step_records": 37.78125, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.484878240377062, "calib/avg_num_step_conf": 4.7734375, "calib/ece": 0.7378645397489539, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.698744769874477, "calib/gap": 0.02188193898926427, "calib/mean_conf": 0.8968603556485357, "calib/mu_c": 0.9152631578947369, "calib/mu_w": 0.8933812189054726, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.7378645397489539, "calib/std_conf": 0.1523530721656261, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1840.0, "completions/max_terminated_length": 1840.0, "completions/mean_length": 272.98046875, "completions/mean_terminated_length": 274.0509948730469, "completions/min_length": 0.0, "completions/min_terminated_length": 19.0, "epoch": 0.027733333333333332, "grad_norm": 0.1853257268667221, "learning_rate": 4.833333333333333e-06, "loss": 0.0587, "num_tokens": 6137340.0, "reward": 0.6636569499969482, "reward_std": 0.38897761702537537, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.27962803840637207, "rewards/format_reward_step_strict": 0.8828125, "step": 26 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.605595465609122e-08, "aux_brier/mean_group_std": 0.2033091215678839, "aux_brier/mean_r": 0.5367957858957697, "aux_brier/n_active_tok": 150.5, "aux_brier/n_groups": 7.8125, "aux_brier/n_step_records": 37.625, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5379593810444874, "calib/avg_num_step_conf": 4.78125, "calib/ece": 0.6637037037037037, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.6790123456790124, "calib/gap": 0.036373307543520195, "calib/mean_conf": 0.8900411522633744, "calib/mu_c": 0.9181818181818182, "calib/mu_w": 0.881808510638298, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.6637037037037037, "calib/std_conf": 0.16842562667874827, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1172.0, "completions/max_terminated_length": 1172.0, "completions/mean_length": 258.03515625, "completions/mean_terminated_length": 259.0470886230469, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.0288, "grad_norm": 0.058313678950071335, "learning_rate": 4.805555555555556e-06, "loss": 0.0669, "num_tokens": 6308613.0, "reward": 0.7404499053955078, "reward_std": 0.4264795482158661, "rewards/accuracy_reward_step": 0.21875, "rewards/final_brier_reward_step": 0.3367995023727417, "rewards/format_reward_step_strict": 0.875, "step": 27 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.604338225104975e-08, "aux_brier/mean_group_std": 0.20404063494544103, "aux_brier/mean_r": 0.5249942505915735, "aux_brier/n_active_tok": 142.875, "aux_brier/n_groups": 7.71875, "aux_brier/n_step_records": 35.71875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.48313477289650036, "calib/avg_num_step_conf": 4.48828125, "calib/ece": 0.5925381526104417, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.6827309236947792, "calib/gap": 0.0019309009679820965, "calib/mean_conf": 0.8970361445783134, "calib/mu_c": 0.8983544303797468, "calib/mu_w": 0.8964235294117647, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.586152610441767, "calib/std_conf": 0.15173503478736503, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1941.0, "completions/max_terminated_length": 1941.0, "completions/mean_length": 261.7109375, "completions/mean_terminated_length": 261.7109375, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 0.029866666666666666, "grad_norm": 0.02496321126818657, "learning_rate": 4.777777777777778e-06, "loss": 0.029, "num_tokens": 6482555.0, "reward": 0.8744784593582153, "reward_std": 0.4500199556350708, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.4041638970375061, "rewards/format_reward_step_strict": 0.921875, "step": 28 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.02735930734843e-08, "aux_brier/mean_group_std": 0.18830558343603693, "aux_brier/mean_r": 0.5297287509927155, "aux_brier/n_active_tok": 154.5, "aux_brier/n_groups": 8.03125, "aux_brier/n_step_records": 38.625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5072883053299211, "calib/avg_num_step_conf": 4.84375, "calib/ece": 0.7195238095238095, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": 0.027288305329920992, "calib/mean_conf": 0.8901587301587301, "calib/mu_c": 0.9127906976744186, "calib/mu_w": 0.8855023923444976, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.7195238095238095, "calib/std_conf": 0.1517097486011025, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2370.0, "completions/max_terminated_length": 2370.0, "completions/mean_length": 269.95703125, "completions/mean_terminated_length": 269.95703125, "completions/min_length": 65.0, "completions/min_terminated_length": 65.0, "epoch": 0.030933333333333334, "grad_norm": 0.028272457420825958, "learning_rate": 4.75e-06, "loss": 0.0117, "num_tokens": 6658792.0, "reward": 0.7271352410316467, "reward_std": 0.332688570022583, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.3147910237312317, "rewards/format_reward_step_strict": 0.9609375, "step": 29 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.530250077323551e-10, "aux_brier/mean_group_std": 0.22293842301646147, "aux_brier/mean_r": 0.5668534328950838, "aux_brier/n_active_tok": 150.375, "aux_brier/n_groups": 8.40625, "aux_brier/n_step_records": 37.59375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.539996363966912, "calib/avg_num_step_conf": 4.70703125, "calib/ece": 0.6471600000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.604, "calib/gap": 0.02043177892918835, "calib/mean_conf": 0.8675600000000001, "calib/mu_c": 0.8833333333333335, "calib/mu_w": 0.8629015544041452, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.64336, "calib/std_conf": 0.1797644191713143, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1084.0, "completions/max_terminated_length": 1084.0, "completions/mean_length": 264.9921875, "completions/mean_terminated_length": 266.0314025878906, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.032, "grad_norm": 0.0495811365544796, "learning_rate": 4.722222222222222e-06, "loss": -0.0113, "num_tokens": 6833614.0, "reward": 0.7893297672271729, "reward_std": 0.3898700475692749, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.3682566285133362, "rewards/format_reward_step_strict": 0.94140625, "step": 30 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.4792719266475416e-10, "aux_brier/mean_group_std": 0.21893791036647947, "aux_brier/mean_r": 0.5746811340475313, "aux_brier/n_active_tok": 169.75, "aux_brier/n_groups": 9.46875, "aux_brier/n_step_records": 42.4375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4694078947368422, "calib/avg_num_step_conf": 5.35546875, "calib/ece": 0.6330040650406504, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.5975609756097561, "calib/gap": 0.04146390977443637, "calib/mean_conf": 0.8551178861788619, "calib/mu_c": 0.8871428571428572, "calib/mu_w": 0.8456789473684209, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.630239837398374, "calib/std_conf": 0.2011072761223544, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 708.0, "completions/max_terminated_length": 708.0, "completions/mean_length": 267.125, "completions/mean_terminated_length": 268.1725769042969, "completions/min_length": 0.0, "completions/min_terminated_length": 26.0, "epoch": 0.03306666666666667, "grad_norm": 0.11504989862442017, "learning_rate": 4.694444444444445e-06, "loss": -0.0066, "num_tokens": 7007910.0, "reward": 0.7742238640785217, "reward_std": 0.40484166145324707, "rewards/accuracy_reward_step": 0.21875, "rewards/final_brier_reward_step": 0.38595783710479736, "rewards/format_reward_step_strict": 0.91796875, "step": 31 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.2000071736583138e-08, "aux_brier/mean_group_std": 0.2174628534439109, "aux_brier/mean_r": 0.6717214829466005, "aux_brier/n_active_tok": 152.125, "aux_brier/n_groups": 7.71875, "aux_brier/n_step_records": 38.03125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5363514719000892, "calib/avg_num_step_conf": 4.7578125, "calib/ece": 0.6176706827309237, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.5140562248995983, "calib/gap": 0.009185548617305694, "calib/mean_conf": 0.8457028112449798, "calib/mu_c": 0.8527118644067795, "calib/mu_w": 0.8435263157894738, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.6132128514056225, "calib/std_conf": 0.1801862268251334, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2100.0, "completions/max_terminated_length": 2100.0, "completions/mean_length": 265.34375, "completions/mean_terminated_length": 265.34375, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.034133333333333335, "grad_norm": 0.22988779842853546, "learning_rate": 4.666666666666667e-06, "loss": 0.0257, "num_tokens": 7182542.0, "reward": 0.8147046566009521, "reward_std": 0.31555622816085815, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.40725624561309814, "rewards/format_reward_step_strict": 0.96484375, "step": 32 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 9.550209317810143e-09, "aux_brier/mean_group_std": 0.21457613000540993, "aux_brier/mean_r": 0.620726903961005, "aux_brier/n_active_tok": 166.75, "aux_brier/n_groups": 9.6875, "aux_brier/n_step_records": 41.6875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5519501278772377, "calib/avg_num_step_conf": 5.21484375, "calib/ece": 0.6388800000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.504, "calib/gap": 0.014320119352088878, "calib/mean_conf": 0.82288, "calib/mu_c": 0.8345652173913045, "calib/mu_w": 0.8202450980392156, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.6388800000000001, "calib/std_conf": 0.20745048951496836, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1842.0, "completions/max_terminated_length": 1842.0, "completions/mean_length": 298.94921875, "completions/mean_terminated_length": 300.12158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.0352, "grad_norm": 0.16715602576732635, "learning_rate": 4.638888888888889e-06, "loss": -0.0156, "num_tokens": 7365945.0, "reward": 0.7601896524429321, "reward_std": 0.36198821663856506, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.3923211097717285, "rewards/format_reward_step_strict": 0.96484375, "step": 33 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.486240712780031e-09, "aux_brier/mean_group_std": 0.20610838412419127, "aux_brier/mean_r": 0.6879036340887245, "aux_brier/n_active_tok": 160.0, "aux_brier/n_groups": 8.6875, "aux_brier/n_step_records": 40.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4242567994939911, "calib/avg_num_step_conf": 5.03515625, "calib/ece": 0.5600787401574803, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.40551181102362205, "calib/gap": -0.05788583175205564, "calib/mean_conf": 0.7916535433070866, "calib/mu_c": 0.7492647058823529, "calib/mu_w": 0.8071505376344086, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.542007874015748, "calib/std_conf": 0.21696461404405903, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 576.0, "completions/max_terminated_length": 576.0, "completions/mean_length": 256.43359375, "completions/mean_terminated_length": 257.4392395019531, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.03626666666666667, "grad_norm": 0.3874254822731018, "learning_rate": 4.611111111111112e-06, "loss": -0.0033, "num_tokens": 7536704.0, "reward": 0.871552050113678, "reward_std": 0.4220452308654785, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.4549582004547119, "rewards/format_reward_step_strict": 0.9765625, "step": 34 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.5682556586682495e-08, "aux_brier/mean_group_std": 0.18346159782395655, "aux_brier/mean_r": 0.7323745290227998, "aux_brier/n_active_tok": 171.375, "aux_brier/n_groups": 9.15625, "aux_brier/n_step_records": 42.84375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5748663101604278, "calib/avg_num_step_conf": 5.3671875, "calib/ece": 0.5036111111111111, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.39285714285714285, "calib/gap": 0.04778856437679957, "calib/mean_conf": 0.7562301587301588, "calib/mu_c": 0.7916923076923077, "calib/mu_w": 0.7439037433155081, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.500952380952381, "calib/std_conf": 0.23134114377848292, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1912.0, "completions/max_terminated_length": 1912.0, "completions/mean_length": 301.78515625, "completions/mean_terminated_length": 302.9686584472656, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.037333333333333336, "grad_norm": 0.1867360919713974, "learning_rate": 4.583333333333333e-06, "loss": -0.0185, "num_tokens": 7723217.0, "reward": 0.8578856587409973, "reward_std": 0.42439985275268555, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.5096675753593445, "rewards/format_reward_step_strict": 0.953125, "step": 35 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.5388134932958764e-09, "aux_brier/mean_group_std": 0.15764825389974663, "aux_brier/mean_r": 0.7963679494946774, "aux_brier/n_active_tok": 171.625, "aux_brier/n_groups": 9.71875, "aux_brier/n_step_records": 42.90625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4882842509603073, "calib/avg_num_step_conf": 5.3984375, "calib/ece": 0.32611111111111113, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.2261904761904762, "calib/gap": -0.00959026888604364, "calib/mean_conf": 0.6472222222222224, "calib/mu_c": 0.6418181818181817, "calib/mu_w": 0.6514084507042254, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26841269841269844, "calib/std_conf": 0.2654380592825691, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1841.0, "completions/max_terminated_length": 1841.0, "completions/mean_length": 293.41796875, "completions/mean_terminated_length": 293.41796875, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.0384, "grad_norm": 0.049407023936510086, "learning_rate": 4.555555555555556e-06, "loss": 0.0152, "num_tokens": 7901044.0, "reward": 1.0721511840820312, "reward_std": 0.42401590943336487, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.6245421767234802, "rewards/format_reward_step_strict": 0.97265625, "step": 36 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.9009657634415653e-08, "aux_brier/mean_group_std": 0.15041828112538194, "aux_brier/mean_r": 0.8168662889313909, "aux_brier/n_active_tok": 169.5, "aux_brier/n_groups": 9.1875, "aux_brier/n_step_records": 42.375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.505787037037037, "calib/avg_num_step_conf": 5.296875, "calib/ece": 0.4065217391304348, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.19367588932806323, "calib/gap": 0.011797288359788327, "calib/mean_conf": 0.6246245059288539, "calib/mu_c": 0.6334375, "calib/mu_w": 0.6216402116402117, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.38909090909090915, "calib/std_conf": 0.2612382291393936, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1788.0, "completions/max_terminated_length": 1788.0, "completions/mean_length": 289.91015625, "completions/mean_terminated_length": 289.91015625, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.039466666666666664, "grad_norm": 0.1267281323671341, "learning_rate": 4.527777777777778e-06, "loss": 0.0621, "num_tokens": 8082357.0, "reward": 0.8965669870376587, "reward_std": 0.3665044903755188, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.6018929481506348, "rewards/format_reward_step_strict": 0.984375, "step": 37 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.2826837991752882e-07, "aux_brier/mean_group_std": 0.11248490733689714, "aux_brier/mean_r": 0.8767241568463079, "aux_brier/n_active_tok": 168.875, "aux_brier/n_groups": 9.25, "aux_brier/n_step_records": 42.21875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4858777997364954, "calib/avg_num_step_conf": 5.28125, "calib/ece": 0.31332, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.128, "calib/gap": -0.013852108036890631, "calib/mean_conf": 0.5486800000000001, "calib/mu_c": 0.5384848484848485, "calib/mu_w": 0.5523369565217391, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.299, "calib/std_conf": 0.26747907880804433, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2279.0, "completions/max_terminated_length": 2279.0, "completions/mean_length": 306.4921875, "completions/mean_terminated_length": 307.6941223144531, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.04053333333333333, "grad_norm": 0.2342425286769867, "learning_rate": 4.5e-06, "loss": 0.1016, "num_tokens": 8267707.0, "reward": 0.8923693895339966, "reward_std": 0.3634984791278839, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.6241652369499207, "rewards/format_reward_step_strict": 0.95703125, "step": 38 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.383111908394909e-07, "aux_brier/mean_group_std": 0.10895802280901819, "aux_brier/mean_r": 0.8861176370587436, "aux_brier/n_active_tok": 168.25, "aux_brier/n_groups": 9.21875, "aux_brier/n_step_records": 42.0625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4239121882130774, "calib/avg_num_step_conf": 5.265625, "calib/ece": 0.30739999999999995, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.152, "calib/gap": -0.08044141946651961, "calib/mean_conf": 0.52492, "calib/mu_c": 0.46732394366197194, "calib/mu_w": 0.5477653631284916, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.27415999999999996, "calib/std_conf": 0.2909738709918813, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2291.0, "completions/max_terminated_length": 2291.0, "completions/mean_length": 319.078125, "completions/mean_terminated_length": 320.3294372558594, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.0416, "grad_norm": 0.3318597376346588, "learning_rate": 4.472222222222223e-06, "loss": 0.0696, "num_tokens": 8455479.0, "reward": 0.9113386869430542, "reward_std": 0.32969149947166443, "rewards/accuracy_reward_step": 0.27734375, "rewards/final_brier_reward_step": 0.6062921285629272, "rewards/format_reward_step_strict": 0.96484375, "step": 39 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.142651626986126e-07, "aux_brier/mean_group_std": 0.08402385434466757, "aux_brier/mean_r": 0.9106506398117968, "aux_brier/n_active_tok": 179.5, "aux_brier/n_groups": 9.8125, "aux_brier/n_step_records": 44.875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.48081481481481486, "calib/avg_num_step_conf": 5.609375, "calib/ece": 0.24349019607843136, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.06666666666666667, "calib/gap": -0.01774444444444445, "calib/mean_conf": 0.48105882352941176, "calib/mu_c": 0.46853333333333336, "calib/mu_w": 0.4862777777777778, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21521568627450982, "calib/std_conf": 0.25725560736393005, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1454.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 329.99609375, "completions/mean_terminated_length": 331.29022216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 58.0, "epoch": 0.042666666666666665, "grad_norm": 0.10303126275539398, "learning_rate": 4.444444444444444e-06, "loss": -0.0032, "num_tokens": 8646718.0, "reward": 0.9466276168823242, "reward_std": 0.3424081802368164, "rewards/accuracy_reward_step": 0.29296875, "rewards/final_brier_reward_step": 0.6693230867385864, "rewards/format_reward_step_strict": 0.97265625, "step": 40 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.811446523365291e-07, "aux_brier/mean_group_std": 0.11664949732272643, "aux_brier/mean_r": 0.8908425943823306, "aux_brier/n_active_tok": 173.875, "aux_brier/n_groups": 10.0, "aux_brier/n_step_records": 43.46875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.44767369727047146, "calib/avg_num_step_conf": 5.4453125, "calib/ece": 0.2651574803149606, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.06299212598425197, "calib/gap": -0.0529925558312655, "calib/mean_conf": 0.4272834645669291, "calib/mu_c": 0.40016129032258063, "calib/mu_w": 0.45315384615384613, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.10212598425196848, "calib/std_conf": 0.25539552150291284, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2461.0, "completions/max_terminated_length": 2461.0, "completions/mean_length": 302.65234375, "completions/mean_terminated_length": 302.65234375, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.04373333333333333, "grad_norm": 0.778255820274353, "learning_rate": 4.416666666666667e-06, "loss": 0.0236, "num_tokens": 8831445.0, "reward": 1.1339144706726074, "reward_std": 0.3703007698059082, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.6450332403182983, "rewards/format_reward_step_strict": 0.9765625, "step": 41 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.661378361856759e-07, "aux_brier/mean_group_std": 0.08366851952716041, "aux_brier/mean_r": 0.9229096395041728, "aux_brier/n_active_tok": 163.25, "aux_brier/n_groups": 8.4375, "aux_brier/n_step_records": 40.8125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5341361464968153, "calib/avg_num_step_conf": 5.1484375, "calib/ece": 0.18921343873517785, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.06719367588932806, "calib/gap": 0.021834925690021212, "calib/mean_conf": 0.35853359683794467, "calib/mu_c": 0.3720833333333333, "calib/mu_w": 0.3502484076433121, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08415019762845849, "calib/std_conf": 0.24942657528857123, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2349.0, "completions/max_terminated_length": 2349.0, "completions/mean_length": 281.30859375, "completions/mean_terminated_length": 281.30859375, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 0.0448, "grad_norm": 0.7035812735557556, "learning_rate": 4.388888888888889e-06, "loss": 0.0451, "num_tokens": 9007828.0, "reward": 1.0298219919204712, "reward_std": 0.3289202153682709, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.6974130272865295, "rewards/format_reward_step_strict": 0.9609375, "step": 42 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.6327332217079835e-08, "aux_brier/mean_group_std": 0.06886701483047437, "aux_brier/mean_r": 0.9339987424296959, "aux_brier/n_active_tok": 169.375, "aux_brier/n_groups": 9.53125, "aux_brier/n_step_records": 42.34375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.47710437710437714, "calib/avg_num_step_conf": 5.29296875, "calib/ece": 0.19454901960784313, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.027450980392156862, "calib/gap": -0.023262626262626318, "calib/mean_conf": 0.3216078431372549, "calib/mu_c": 0.3065555555555555, "calib/mu_w": 0.32981818181818184, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.08160784313725489, "calib/std_conf": 0.21682052562117374, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1197.0, "completions/max_terminated_length": 1197.0, "completions/mean_length": 311.1796875, "completions/mean_terminated_length": 312.4000244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.04586666666666667, "grad_norm": 0.1377396136522293, "learning_rate": 4.361111111111112e-06, "loss": 0.0219, "num_tokens": 9192714.0, "reward": 1.0290229320526123, "reward_std": 0.3359209895133972, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.7098422050476074, "rewards/format_reward_step_strict": 0.9921875, "step": 43 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.0762592037315564e-07, "aux_brier/mean_group_std": 0.07312426961250452, "aux_brier/mean_r": 0.9344066743447412, "aux_brier/n_active_tok": 179.625, "aux_brier/n_groups": 11.15625, "aux_brier/n_step_records": 44.90625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5467723090404557, "calib/avg_num_step_conf": 5.63671875, "calib/ece": 0.14820615796519412, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.020080321285140562, "calib/gap": 0.0478416338055595, "calib/mean_conf": 0.27998661311914325, "calib/mu_c": 0.31207317073170726, "calib/mu_w": 0.26423153692614776, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04943775100401608, "calib/std_conf": 0.2175026602784408, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2244.0, "completions/max_terminated_length": 2244.0, "completions/mean_length": 348.29296875, "completions/mean_terminated_length": 348.29296875, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.046933333333333334, "grad_norm": 0.6821126937866211, "learning_rate": 4.333333333333334e-06, "loss": 0.13, "num_tokens": 9388197.0, "reward": 0.9781696200370789, "reward_std": 0.2926007807254791, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.725178599357605, "rewards/format_reward_step_strict": 0.953125, "step": 44 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.175147795745017e-07, "aux_brier/mean_group_std": 0.06813884053215623, "aux_brier/mean_r": 0.9426513219949312, "aux_brier/n_active_tok": 163.0, "aux_brier/n_groups": 9.84375, "aux_brier/n_step_records": 40.75, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5320624546114743, "calib/avg_num_step_conf": 5.10546875, "calib/ece": 0.21762151394422313, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0398406374501992, "calib/gap": -0.0002480755265069079, "calib/mean_conf": 0.2528964143426295, "calib/mu_c": 0.2527283950617284, "calib/mu_w": 0.2529764705882353, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.07390438247011953, "calib/std_conf": 0.22709503869677308, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1557.0, "completions/max_terminated_length": 1557.0, "completions/mean_length": 325.01953125, "completions/mean_terminated_length": 326.29412841796875, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.048, "grad_norm": 0.309156596660614, "learning_rate": 4.305555555555556e-06, "loss": 0.0191, "num_tokens": 9576450.0, "reward": 0.9735827445983887, "reward_std": 0.3187885582447052, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.6990187168121338, "rewards/format_reward_step_strict": 0.95703125, "step": 45 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.3089005636053344e-08, "aux_brier/mean_group_std": 0.084851303929517, "aux_brier/mean_r": 0.9288060641978739, "aux_brier/n_active_tok": 187.125, "aux_brier/n_groups": 11.15625, "aux_brier/n_step_records": 46.78125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5061767702496179, "calib/avg_num_step_conf": 5.84765625, "calib/ece": 0.24050980392156862, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0196078431372549, "calib/gap": -0.013870351502801775, "calib/mean_conf": 0.2510980392156863, "calib/mu_c": 0.2428846153846154, "calib/mu_w": 0.25675496688741717, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.041882352941176475, "calib/std_conf": 0.2027796306920552, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2556.0, "completions/max_terminated_length": 2556.0, "completions/mean_length": 342.09765625, "completions/mean_terminated_length": 342.09765625, "completions/min_length": 65.0, "completions/min_terminated_length": 65.0, "epoch": 0.04906666666666667, "grad_norm": 0.16056883335113525, "learning_rate": 4.277777777777778e-06, "loss": 0.0167, "num_tokens": 9768795.0, "reward": 1.072223424911499, "reward_std": 0.2772214412689209, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.6795187592506409, "rewards/format_reward_step_strict": 0.9921875, "step": 46 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 6.738972862657677e-07, "aux_brier/mean_group_std": 0.05094374030910097, "aux_brier/mean_r": 0.9550568071250548, "aux_brier/n_active_tok": 186.25, "aux_brier/n_groups": 11.625, "aux_brier/n_step_records": 46.5625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5255102040816326, "calib/avg_num_step_conf": 5.83984375, "calib/ece": 0.24277747035573122, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.011857707509881422, "calib/gap": 0.011478907175773545, "calib/mean_conf": 0.22378379446640317, "calib/mu_c": 0.23081632653061226, "calib/mu_w": 0.21933741935483872, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.039604743083003956, "calib/std_conf": 0.18906239076821524, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1741.0, "completions/max_terminated_length": 1741.0, "completions/mean_length": 347.62890625, "completions/mean_terminated_length": 347.62890625, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.050133333333333335, "grad_norm": 0.8119200468063354, "learning_rate": 4.25e-06, "loss": 0.0185, "num_tokens": 9963764.0, "reward": 1.0399562120437622, "reward_std": 0.31012624502182007, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6910749673843384, "rewards/format_reward_step_strict": 0.96875, "step": 47 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 7.002691074786682e-07, "aux_brier/mean_group_std": 0.06842534960035207, "aux_brier/mean_r": 0.947467223744788, "aux_brier/n_active_tok": 156.875, "aux_brier/n_groups": 9.3125, "aux_brier/n_step_records": 39.21875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4822554124417649, "calib/avg_num_step_conf": 4.91796875, "calib/ece": 0.2117193675889328, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": -0.0060845437106056555, "calib/mean_conf": 0.17950592885375494, "calib/mu_c": 0.175561797752809, "calib/mu_w": 0.18164634146341466, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.01972332015810277, "calib/std_conf": 0.15003245025945752, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1876.0, "completions/max_terminated_length": 1876.0, "completions/mean_length": 305.21875, "completions/mean_terminated_length": 305.21875, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 0.0512, "grad_norm": 0.05966079980134964, "learning_rate": 4.222222222222223e-06, "loss": 0.07, "num_tokens": 10145588.0, "reward": 1.0119268894195557, "reward_std": 0.28723055124282837, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.703957736492157, "rewards/format_reward_step_strict": 0.9765625, "step": 48 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.071487197722991e-07, "aux_brier/mean_group_std": 0.06095274874137357, "aux_brier/mean_r": 0.9523212046326821, "aux_brier/n_active_tok": 169.75, "aux_brier/n_groups": 10.625, "aux_brier/n_step_records": 42.4375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5313121890547264, "calib/avg_num_step_conf": 5.3203125, "calib/ece": 0.34724409448818894, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.01968503937007874, "calib/gap": -0.01367910447761192, "calib/mean_conf": 0.16771653543307088, "calib/mu_c": 0.1605, "calib/mu_w": 0.17417910447761192, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.02125984251968504, "calib/std_conf": 0.15717330309850466, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1641.0, "completions/max_terminated_length": 1641.0, "completions/mean_length": 318.90234375, "completions/mean_terminated_length": 320.1529541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.05226666666666667, "grad_norm": 1.1238723993301392, "learning_rate": 4.194444444444445e-06, "loss": 0.0298, "num_tokens": 10331763.0, "reward": 1.1154522895812988, "reward_std": 0.28439438343048096, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6180590391159058, "rewards/format_reward_step_strict": 0.984375, "step": 49 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.9178487179472015e-06, "aux_brier/mean_group_std": 0.04917056897485339, "aux_brier/mean_r": 0.9605997027116365, "aux_brier/n_active_tok": 175.75, "aux_brier/n_groups": 11.25, "aux_brier/n_step_records": 43.9375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5150627087666225, "calib/avg_num_step_conf": 5.5078125, "calib/ece": 0.36686111111111114, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.007936507936507936, "calib/gap": -0.013845906598600854, "calib/mean_conf": 0.16123412698412698, "calib/mu_c": 0.15414634146341463, "calib/mu_w": 0.1679922480620155, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.02, "calib/std_conf": 0.15678436797620532, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1968.0, "completions/max_terminated_length": 1968.0, "completions/mean_length": 373.21875, "completions/mean_terminated_length": 373.21875, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 0.05333333333333334, "grad_norm": 0.04442565515637398, "learning_rate": 4.166666666666667e-06, "loss": 0.0236, "num_tokens": 10532667.0, "reward": 1.1212639808654785, "reward_std": 0.3354077637195587, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6022437810897827, "rewards/format_reward_step_strict": 0.98046875, "step": 50 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.771473663123249e-07, "aux_brier/mean_group_std": 0.05024114148348355, "aux_brier/mean_r": 0.9622786543279463, "aux_brier/n_active_tok": 173.875, "aux_brier/n_groups": 11.6875, "aux_brier/n_step_records": 43.46875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.536133255557716, "calib/avg_num_step_conf": 5.4765625, "calib/ece": 0.34056, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": -0.004009981204225799, "calib/mean_conf": 0.13768, "calib/mu_c": 0.13545045045045045, "calib/mu_w": 0.13946043165467625, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.01712, "calib/std_conf": 0.13789857722253698, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2863.0, "completions/max_terminated_length": 2863.0, "completions/mean_length": 361.27734375, "completions/mean_terminated_length": 361.27734375, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 0.0544, "grad_norm": 0.008375815115869045, "learning_rate": 4.138888888888889e-06, "loss": 0.1256, "num_tokens": 10734450.0, "reward": 1.069167137145996, "reward_std": 0.3004983365535736, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6204181909561157, "rewards/format_reward_step_strict": 0.953125, "step": 51 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.0889351026731617e-07, "aux_brier/mean_group_std": 0.0628723493081243, "aux_brier/mean_r": 0.9589201670694388, "aux_brier/n_active_tok": 139.375, "aux_brier/n_groups": 7.4375, "aux_brier/n_step_records": 34.84375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6332369215291751, "calib/avg_num_step_conf": 4.36328125, "calib/ece": 0.44421259842519684, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0334771126760563, "calib/mean_conf": 0.12059055118110236, "calib/mu_c": 0.13535211267605632, "calib/mu_w": 0.10187500000000002, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.002874015748031496, "calib/std_conf": 0.09815244886301328, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1989.0, "completions/max_terminated_length": 1989.0, "completions/mean_length": 297.875, "completions/mean_terminated_length": 299.04315185546875, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.055466666666666664, "grad_norm": 0.06173436716198921, "learning_rate": 4.111111111111111e-06, "loss": 0.0032, "num_tokens": 10918658.0, "reward": 1.184843897819519, "reward_std": 0.28533023595809937, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.5596883296966553, "rewards/format_reward_step_strict": 0.98046875, "step": 52 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 6.135251921640661e-07, "aux_brier/mean_group_std": 0.07898858012536986, "aux_brier/mean_r": 0.9440966234794856, "aux_brier/n_active_tok": 157.375, "aux_brier/n_groups": 10.40625, "aux_brier/n_step_records": 39.34375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4904507257448434, "calib/avg_num_step_conf": 4.9375, "calib/ece": 0.3560159362549801, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.00796812749003984, "calib/gap": -0.011899032340208837, "calib/mean_conf": 0.1392828685258964, "calib/mu_c": 0.13302521008403362, "calib/mu_w": 0.14492424242424246, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.010597609561752989, "calib/std_conf": 0.134099786184467, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2318.0, "completions/max_terminated_length": 2318.0, "completions/mean_length": 351.9296875, "completions/mean_terminated_length": 353.309814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.05653333333333333, "grad_norm": 0.0809786394238472, "learning_rate": 4.083333333333334e-06, "loss": 0.0974, "num_tokens": 11114576.0, "reward": 1.0954921245574951, "reward_std": 0.2942587733268738, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5929058790206909, "rewards/format_reward_step_strict": 0.96484375, "step": 53 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.1636615188124217e-06, "aux_brier/mean_group_std": 0.06836824017019381, "aux_brier/mean_r": 0.9480927544465717, "aux_brier/n_active_tok": 159.75, "aux_brier/n_groups": 9.28125, "aux_brier/n_step_records": 39.9375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5389139139139139, "calib/avg_num_step_conf": 4.99609375, "calib/ece": 0.44501960784313727, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.005407282282282294, "calib/mean_conf": 0.12854901960784315, "calib/mu_c": 0.13090277777777778, "calib/mu_w": 0.1254954954954955, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.004431372549019606, "calib/std_conf": 0.10835966142679215, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 764.0, "completions/max_terminated_length": 764.0, "completions/mean_length": 300.27734375, "completions/mean_terminated_length": 301.4549255371094, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.0576, "grad_norm": 0.09754743427038193, "learning_rate": 4.055555555555556e-06, "loss": 0.0099, "num_tokens": 11297679.0, "reward": 1.1941572427749634, "reward_std": 0.24160432815551758, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5500664114952087, "rewards/format_reward_step_strict": 0.98828125, "step": 54 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -8.122786133157867e-07, "aux_brier/mean_group_std": 0.07347715917728848, "aux_brier/mean_r": 0.9513396735274872, "aux_brier/n_active_tok": 151.25, "aux_brier/n_groups": 8.90625, "aux_brier/n_step_records": 37.8125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4379084967320262, "calib/avg_num_step_conf": 4.7265625, "calib/ece": 0.30817460317460316, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": -0.029529411764705873, "calib/mean_conf": 0.12404761904761907, "calib/mu_c": 0.10647058823529414, "calib/mu_w": 0.136, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.013730158730158728, "calib/std_conf": 0.10409609854565302, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2460.0, "completions/max_terminated_length": 2460.0, "completions/mean_length": 327.328125, "completions/mean_terminated_length": 328.6117858886719, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.058666666666666666, "grad_norm": 0.017162779346108437, "learning_rate": 4.027777777777779e-06, "loss": 0.0401, "num_tokens": 11489299.0, "reward": 1.0469169616699219, "reward_std": 0.30747726559638977, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.640792965888977, "rewards/format_reward_step_strict": 0.9765625, "step": 55 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.792779739164587e-07, "aux_brier/mean_group_std": 0.058836280913051306, "aux_brier/mean_r": 0.9535270850290725, "aux_brier/n_active_tok": 180.25, "aux_brier/n_groups": 11.78125, "aux_brier/n_step_records": 45.0625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5298408925859907, "calib/avg_num_step_conf": 5.67578125, "calib/ece": 0.2851984126984127, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.010427807486631052, "calib/mean_conf": 0.12043650793650794, "calib/mu_c": 0.12676767676767678, "calib/mu_w": 0.11633986928104573, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.006388888888888888, "calib/std_conf": 0.10496548911554603, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2729.0, "completions/max_terminated_length": 2729.0, "completions/mean_length": 381.68359375, "completions/mean_terminated_length": 381.68359375, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.05973333333333333, "grad_norm": 0.016328781843185425, "learning_rate": 4.000000000000001e-06, "loss": 0.0904, "num_tokens": 11693850.0, "reward": 1.035246729850769, "reward_std": 0.3158321976661682, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.6644245386123657, "rewards/format_reward_step_strict": 0.96484375, "step": 56 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 9.82358970946784e-07, "aux_brier/mean_group_std": 0.04653501148296142, "aux_brier/mean_r": 0.9702314117980674, "aux_brier/n_active_tok": 161.75, "aux_brier/n_groups": 9.71875, "aux_brier/n_step_records": 40.4375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.46222639149468414, "calib/avg_num_step_conf": 5.12109375, "calib/ece": 0.39778656126482204, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.010028767979987502, "calib/mean_conf": 0.1239525691699605, "calib/mu_c": 0.11907692307692308, "calib/mu_w": 0.12910569105691058, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003952569169960474, "calib/std_conf": 0.08564674739445256, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2237.0, "completions/max_terminated_length": 2237.0, "completions/mean_length": 339.53125, "completions/mean_terminated_length": 340.8627624511719, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.0608, "grad_norm": 0.02530394308269024, "learning_rate": 3.972222222222223e-06, "loss": 0.0016, "num_tokens": 11887562.0, "reward": 1.1466963291168213, "reward_std": 0.2759783864021301, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5789726376533508, "rewards/format_reward_step_strict": 0.98828125, "step": 57 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 6.034399596677176e-08, "aux_brier/mean_group_std": 0.0408035824071343, "aux_brier/mean_r": 0.9703351715369016, "aux_brier/n_active_tok": 166.125, "aux_brier/n_groups": 11.40625, "aux_brier/n_step_records": 41.53125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5653220951234197, "calib/avg_num_step_conf": 5.28125, "calib/ece": 0.2870819996000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.008, "calib/gap": 0.01128500167904209, "calib/mean_conf": 0.12459800040000002, "calib/mu_c": 0.13141414141414143, "calib/mu_w": 0.12012913973509934, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00784, "calib/std_conf": 0.1151718686933647, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2854.0, "completions/max_terminated_length": 2854.0, "completions/mean_length": 390.7890625, "completions/mean_terminated_length": 395.4229431152344, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.06186666666666667, "grad_norm": 0.030217565596103668, "learning_rate": 3.944444444444445e-06, "loss": 0.069, "num_tokens": 12093924.0, "reward": 1.0271323919296265, "reward_std": 0.3332974314689636, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.6554046869277954, "rewards/format_reward_step_strict": 0.953125, "step": 58 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 9.827907669901492e-07, "aux_brier/mean_group_std": 0.049403470093003755, "aux_brier/mean_r": 0.9666100210440571, "aux_brier/n_active_tok": 165.125, "aux_brier/n_groups": 10.5625, "aux_brier/n_step_records": 41.28125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5309877955758963, "calib/avg_num_step_conf": 5.26953125, "calib/ece": 0.33872222222222226, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005707856598016786, "calib/mean_conf": 0.11794444444444445, "calib/mu_c": 0.1210701754385965, "calib/mu_w": 0.11536231884057971, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.002142857142857143, "calib/std_conf": 0.08120763174272962, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2511.0, "completions/max_terminated_length": 2511.0, "completions/mean_length": 347.46484375, "completions/mean_terminated_length": 348.8274841308594, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.06293333333333333, "grad_norm": 0.018596315756440163, "learning_rate": 3.916666666666667e-06, "loss": 0.0075, "num_tokens": 12289123.0, "reward": 1.0893032550811768, "reward_std": 0.21987329423427582, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6228382587432861, "rewards/format_reward_step_strict": 0.9765625, "step": 59 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 9.710774881155348e-07, "aux_brier/mean_group_std": 0.04960393934987858, "aux_brier/mean_r": 0.965859396805132, "aux_brier/n_active_tok": 162.5, "aux_brier/n_groups": 10.09375, "aux_brier/n_step_records": 40.625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.47795260295260295, "calib/avg_num_step_conf": 5.10546875, "calib/ece": 0.3368924302788844, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.012364672364672397, "calib/mean_conf": 0.10852589641434264, "calib/mu_c": 0.10148148148148148, "calib/mu_w": 0.11384615384615387, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.007569721115537848, "calib/std_conf": 0.06676218666595297, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2151.0, "completions/max_terminated_length": 2151.0, "completions/mean_length": 349.91796875, "completions/mean_terminated_length": 351.29022216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.064, "grad_norm": 0.017143506556749344, "learning_rate": 3.88888888888889e-06, "loss": 0.0417, "num_tokens": 12487558.0, "reward": 1.0662939548492432, "reward_std": 0.3008437156677246, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.6245508193969727, "rewards/format_reward_step_strict": 0.9765625, "step": 60 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.5978284104156693e-07, "aux_brier/mean_group_std": 0.05777770298304682, "aux_brier/mean_r": 0.9561135121257793, "aux_brier/n_active_tok": 167.875, "aux_brier/n_groups": 11.1875, "aux_brier/n_step_records": 41.96875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6247787610619469, "calib/avg_num_step_conf": 5.24609375, "calib/ece": 0.42284584980237155, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.039938685208596725, "calib/mean_conf": 0.13723320158102767, "calib/mu_c": 0.15507142857142858, "calib/mu_w": 0.11513274336283186, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.003359683794466403, "calib/std_conf": 0.11528386071746465, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2410.0, "completions/max_terminated_length": 2410.0, "completions/mean_length": 330.24609375, "completions/mean_terminated_length": 330.24609375, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.06506666666666666, "grad_norm": 0.03179638460278511, "learning_rate": 3.861111111111112e-06, "loss": 0.0734, "num_tokens": 12676165.0, "reward": 1.1858327388763428, "reward_std": 0.25070449709892273, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.579268753528595, "rewards/format_reward_step_strict": 0.98828125, "step": 61 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -8.46104220020294e-07, "aux_brier/mean_group_std": 0.0512468291876447, "aux_brier/mean_r": 0.9559081316517538, "aux_brier/n_active_tok": 179.5, "aux_brier/n_groups": 11.46875, "aux_brier/n_step_records": 44.875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.53499740999741, "calib/avg_num_step_conf": 5.6796875, "calib/ece": 0.31529880478087646, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006859621859621828, "calib/mean_conf": 0.12183266932270917, "calib/mu_c": 0.12574074074074074, "calib/mu_w": 0.11888111888111891, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0034262948207171317, "calib/std_conf": 0.0939898479749637, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2379.0, "completions/max_terminated_length": 2379.0, "completions/mean_length": 384.77734375, "completions/mean_terminated_length": 386.2862854003906, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.06613333333333334, "grad_norm": 0.012848633341491222, "learning_rate": 3.833333333333334e-06, "loss": 0.0517, "num_tokens": 12881748.0, "reward": 1.0705244541168213, "reward_std": 0.27952396869659424, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.6414726972579956, "rewards/format_reward_step_strict": 0.9765625, "step": 62 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.4853741657216979e-06, "aux_brier/mean_group_std": 0.06036445125762814, "aux_brier/mean_r": 0.9578557489188202, "aux_brier/n_active_tok": 186.5, "aux_brier/n_groups": 11.59375, "aux_brier/n_step_records": 46.625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5240845959595959, "calib/avg_num_step_conf": 5.84765625, "calib/ece": 0.34446825396825403, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0038712121212121364, "calib/mean_conf": 0.13172222222222224, "calib/mu_c": 0.13375, "calib/mu_w": 0.12987878787878787, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.07792468269322313, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2243.0, "completions/max_terminated_length": 2243.0, "completions/mean_length": 400.26953125, "completions/mean_terminated_length": 400.26953125, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.0672, "grad_norm": 0.0221357773989439, "learning_rate": 3.8055555555555556e-06, "loss": 0.0555, "num_tokens": 13092857.0, "reward": 1.1173802614212036, "reward_std": 0.3128426671028137, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.6179586052894592, "rewards/format_reward_step_strict": 0.98046875, "step": 63 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 8.004063940664219e-07, "aux_brier/mean_group_std": 0.04270907810887059, "aux_brier/mean_r": 0.9655679757900179, "aux_brier/n_active_tok": 179.0, "aux_brier/n_groups": 11.5625, "aux_brier/n_step_records": 44.75, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.48725578988226165, "calib/avg_num_step_conf": 5.6171875, "calib/ece": 0.41305220883534144, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": -0.008328373657653002, "calib/mean_conf": 0.12646586345381527, "calib/mu_c": 0.12251908396946563, "calib/mu_w": 0.13084745762711864, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.006706827309236947, "calib/std_conf": 0.09800192522982618, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2843.0, "completions/max_terminated_length": 2843.0, "completions/mean_length": 395.33984375, "completions/mean_terminated_length": 396.8902282714844, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.06826666666666667, "grad_norm": 0.009988549165427685, "learning_rate": 3.777777777777778e-06, "loss": 0.0993, "num_tokens": 13297840.0, "reward": 1.128638744354248, "reward_std": 0.3070526719093323, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5614300966262817, "rewards/format_reward_step_strict": 0.953125, "step": 64 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.7370756747459026e-06, "aux_brier/mean_group_std": 0.04716174586798315, "aux_brier/mean_r": 0.9679136611818253, "aux_brier/n_active_tok": 152.0, "aux_brier/n_groups": 9.125, "aux_brier/n_step_records": 38.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6479689922480621, "calib/avg_num_step_conf": 4.7734375, "calib/ece": 0.36543307086614163, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.041241550387596915, "calib/mean_conf": 0.13409448818897637, "calib/mu_c": 0.15504, "calib/mu_w": 0.1137984496124031, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.003700787401574803, "calib/std_conf": 0.10612987386735297, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1045.0, "completions/max_terminated_length": 1045.0, "completions/mean_length": 303.1484375, "completions/mean_terminated_length": 304.3372802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.06933333333333333, "grad_norm": 0.016148686408996582, "learning_rate": 3.7500000000000005e-06, "loss": 0.0106, "num_tokens": 13480470.0, "reward": 1.1338117122650146, "reward_std": 0.21589995920658112, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.6211844086647034, "rewards/format_reward_step_strict": 0.98046875, "step": 65 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.5480337049178505e-06, "aux_brier/mean_group_std": 0.05333084433954547, "aux_brier/mean_r": 0.9594576144111381, "aux_brier/n_active_tok": 182.0, "aux_brier/n_groups": 12.84375, "aux_brier/n_step_records": 45.5, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.49497536945812814, "calib/avg_num_step_conf": 5.83203125, "calib/ece": 0.296932, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": -0.016251888341543508, "calib/mean_conf": 0.14818800000000001, "calib/mu_c": 0.13876190476190478, "calib/mu_w": 0.1550137931034483, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.012559999999999998, "calib/std_conf": 0.1054997282271381, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2272.0, "completions/max_terminated_length": 2272.0, "completions/mean_length": 414.62109375, "completions/mean_terminated_length": 416.2470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.0704, "grad_norm": 0.022092042490839958, "learning_rate": 3.7222222222222225e-06, "loss": 0.0481, "num_tokens": 13692965.0, "reward": 1.053545355796814, "reward_std": 0.3082756996154785, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6438689231872559, "rewards/format_reward_step_strict": 0.96484375, "step": 66 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -8.39356148585324e-07, "aux_brier/mean_group_std": 0.05352900282085929, "aux_brier/mean_r": 0.9607603994714087, "aux_brier/n_active_tok": 171.75, "aux_brier/n_groups": 10.8125, "aux_brier/n_step_records": 42.9375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5314205955334987, "calib/avg_num_step_conf": 5.37109375, "calib/ece": 0.3517322834645669, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.0024491315136476177, "calib/mean_conf": 0.1581889763779528, "calib/mu_c": 0.15693548387096776, "calib/mu_w": 0.15938461538461537, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.010866141732283466, "calib/std_conf": 0.11208066079457707, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2239.0, "completions/max_terminated_length": 2239.0, "completions/mean_length": 381.91015625, "completions/mean_terminated_length": 381.91015625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.07146666666666666, "grad_norm": 0.03895385190844536, "learning_rate": 3.694444444444445e-06, "loss": 0.0618, "num_tokens": 13895742.0, "reward": 1.1341534852981567, "reward_std": 0.19069793820381165, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.6225515604019165, "rewards/format_reward_step_strict": 0.98828125, "step": 67 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.884196547205732e-07, "aux_brier/mean_group_std": 0.03522157067945391, "aux_brier/mean_r": 0.9724513701694049, "aux_brier/n_active_tok": 178.0, "aux_brier/n_groups": 13.0625, "aux_brier/n_step_records": 44.5, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4138558201058201, "calib/avg_num_step_conf": 5.57421875, "calib/ece": 0.2884738955823293, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.008032128514056224, "calib/gap": -0.03465674603174604, "calib/mean_conf": 0.16156626506024094, "calib/mu_c": 0.14152380952380952, "calib/mu_w": 0.17618055555555556, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.014176706827309236, "calib/std_conf": 0.11302857364554952, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2446.0, "completions/max_terminated_length": 2446.0, "completions/mean_length": 377.39453125, "completions/mean_terminated_length": 377.39453125, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.07253333333333334, "grad_norm": 0.014074550941586494, "learning_rate": 3.6666666666666666e-06, "loss": 0.1011, "num_tokens": 14096443.0, "reward": 1.0547256469726562, "reward_std": 0.27158546447753906, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6407777070999146, "rewards/format_reward_step_strict": 0.96875, "step": 68 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.912274668977812e-08, "aux_brier/mean_group_std": 0.060206578719245854, "aux_brier/mean_r": 0.9519198334044197, "aux_brier/n_active_tok": 166.75, "aux_brier/n_groups": 10.59375, "aux_brier/n_step_records": 41.6875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4611863313990973, "calib/avg_num_step_conf": 5.33203125, "calib/ece": 0.29003984063745014, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.026959381044487446, "calib/mean_conf": 0.18478087649402392, "calib/mu_c": 0.1696363636363636, "calib/mu_w": 0.19659574468085106, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.01828685258964144, "calib/std_conf": 0.11722323424237922, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2322.0, "completions/max_terminated_length": 2322.0, "completions/mean_length": 413.578125, "completions/mean_terminated_length": 415.20001220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.0736, "grad_norm": 0.045668672770261765, "learning_rate": 3.638888888888889e-06, "loss": 0.0362, "num_tokens": 14306815.0, "reward": 1.0803816318511963, "reward_std": 0.2846702039241791, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.6418390274047852, "rewards/format_reward_step_strict": 0.97265625, "step": 69 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.839567105755725e-07, "aux_brier/mean_group_std": 0.03887261264810001, "aux_brier/mean_r": 0.9667606420677022, "aux_brier/n_active_tok": 166.0, "aux_brier/n_groups": 10.34375, "aux_brier/n_step_records": 41.5, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5257194728348574, "calib/avg_num_step_conf": 5.24609375, "calib/ece": 0.26608906882591093, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008900349650349637, "calib/mean_conf": 0.16071255060728748, "calib/mu_c": 0.1658653846153846, "calib/mu_w": 0.15696503496503497, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.002874493927125506, "calib/std_conf": 0.1002256682657909, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2915.0, "completions/max_terminated_length": 2915.0, "completions/mean_length": 441.3046875, "completions/mean_terminated_length": 441.3046875, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.07466666666666667, "grad_norm": 0.01115515735000372, "learning_rate": 3.6111111111111115e-06, "loss": 0.1459, "num_tokens": 14526781.0, "reward": 1.0445696115493774, "reward_std": 0.2699681520462036, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.6548410058021545, "rewards/format_reward_step_strict": 0.94921875, "step": 70 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -8.424466776540962e-07, "aux_brier/mean_group_std": 0.052839652499730085, "aux_brier/mean_r": 0.947085518634407, "aux_brier/n_active_tok": 172.25, "aux_brier/n_groups": 10.0, "aux_brier/n_step_records": 43.0625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49413265306122456, "calib/avg_num_step_conf": 5.3828125, "calib/ece": 0.27218253968253964, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01194642857142858, "calib/mean_conf": 0.22726190476190478, "calib/mu_c": 0.22062500000000002, "calib/mu_w": 0.2325714285714286, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0275, "calib/std_conf": 0.13971925811582087, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1807.0, "completions/max_terminated_length": 1807.0, "completions/mean_length": 397.23828125, "completions/mean_terminated_length": 397.23828125, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.07573333333333333, "grad_norm": 0.024247240275144577, "learning_rate": 3.5833333333333335e-06, "loss": 0.0572, "num_tokens": 14732882.0, "reward": 1.091394305229187, "reward_std": 0.3131329417228699, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6624523401260376, "rewards/format_reward_step_strict": 0.9765625, "step": 71 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.7193469714626985e-07, "aux_brier/mean_group_std": 0.04157498335602871, "aux_brier/mean_r": 0.9612437829135265, "aux_brier/n_active_tok": 177.25, "aux_brier/n_groups": 9.96875, "aux_brier/n_step_records": 44.3125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5350649350649351, "calib/avg_num_step_conf": 5.5390625, "calib/ece": 0.23332, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005688311688311687, "calib/mean_conf": 0.23036, "calib/mu_c": 0.23354545454545453, "calib/mu_w": 0.22785714285714284, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.011840000000000003, "calib/std_conf": 0.11927560689428497, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2397.0, "completions/max_terminated_length": 2397.0, "completions/mean_length": 377.12109375, "completions/mean_terminated_length": 380.0905456542969, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.0768, "grad_norm": 0.023552026599645615, "learning_rate": 3.555555555555556e-06, "loss": 0.0545, "num_tokens": 14933833.0, "reward": 1.0884344577789307, "reward_std": 0.2788498103618622, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.6818628907203674, "rewards/format_reward_step_strict": 0.9765625, "step": 72 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.15556284272256e-07, "aux_brier/mean_group_std": 0.058614386723719145, "aux_brier/mean_r": 0.9473025581278537, "aux_brier/n_active_tok": 174.875, "aux_brier/n_groups": 9.1875, "aux_brier/n_step_records": 43.71875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.47384085213032584, "calib/avg_num_step_conf": 5.48828125, "calib/ece": 0.318503937007874, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.014969924812030161, "calib/mean_conf": 0.24543307086614172, "calib/mu_c": 0.23871428571428568, "calib/mu_w": 0.25368421052631585, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006377952755905511, "calib/std_conf": 0.11492041686925968, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2148.0, "completions/max_terminated_length": 2148.0, "completions/mean_length": 359.2578125, "completions/mean_terminated_length": 360.66668701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.07786666666666667, "grad_norm": 0.023432835936546326, "learning_rate": 3.5277777777777784e-06, "loss": 0.0172, "num_tokens": 15132835.0, "reward": 1.1985793113708496, "reward_std": 0.3204796016216278, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6302546858787537, "rewards/format_reward_step_strict": 0.98828125, "step": 73 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.805087781585172e-07, "aux_brier/mean_group_std": 0.033791970163948824, "aux_brier/mean_r": 0.9606275891011922, "aux_brier/n_active_tok": 174.5, "aux_brier/n_groups": 9.75, "aux_brier/n_step_records": 43.625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4802073210236475, "calib/avg_num_step_conf": 5.61328125, "calib/ece": 0.2040873015873016, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": -0.0185986394557823, "calib/mean_conf": 0.2598015873015873, "calib/mu_c": 0.24895238095238095, "calib/mu_w": 0.26755102040816325, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.02361111111111111, "calib/std_conf": 0.1415796662739476, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2384.0, "completions/max_terminated_length": 2384.0, "completions/mean_length": 380.80859375, "completions/mean_terminated_length": 382.3019714355469, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.07893333333333333, "grad_norm": 0.03156125918030739, "learning_rate": 3.5e-06, "loss": 0.0388, "num_tokens": 15334250.0, "reward": 1.0754096508026123, "reward_std": 0.3259609639644623, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6922637224197388, "rewards/format_reward_step_strict": 0.984375, "step": 74 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -7.679853244457746e-07, "aux_brier/mean_group_std": 0.06551556186302822, "aux_brier/mean_r": 0.9260156059854672, "aux_brier/n_active_tok": 168.25, "aux_brier/n_groups": 9.09375, "aux_brier/n_step_records": 42.0625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5486648606811146, "calib/avg_num_step_conf": 5.2578125, "calib/ece": 0.33153543307086614, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.012025283797729658, "calib/mean_conf": 0.29003937007874014, "calib/mu_c": 0.2948684210526316, "calib/mu_w": 0.28284313725490196, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011574803149606298, "calib/std_conf": 0.13342991327690923, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2356.0, "completions/max_terminated_length": 2356.0, "completions/mean_length": 366.9296875, "completions/mean_terminated_length": 366.9296875, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 0.08, "grad_norm": 0.014250561594963074, "learning_rate": 3.4722222222222224e-06, "loss": 0.0215, "num_tokens": 15532936.0, "reward": 1.2556159496307373, "reward_std": 0.28199487924575806, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.6474636793136597, "rewards/format_reward_step_strict": 0.9921875, "step": 75 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.577658853615162e-07, "aux_brier/mean_group_std": 0.047672911206574485, "aux_brier/mean_r": 0.938165863818875, "aux_brier/n_active_tok": 162.5, "aux_brier/n_groups": 10.125, "aux_brier/n_step_records": 40.625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4636904761904762, "calib/avg_num_step_conf": 5.078125, "calib/ece": 0.28976284584980233, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": -0.020929197994987436, "calib/mean_conf": 0.2715810276679842, "calib/mu_c": 0.26165413533834586, "calib/mu_w": 0.2825833333333333, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.017826086956521738, "calib/std_conf": 0.12108481313553088, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2444.0, "completions/max_terminated_length": 2444.0, "completions/mean_length": 378.9609375, "completions/mean_terminated_length": 378.9609375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.08106666666666666, "grad_norm": 0.05815184861421585, "learning_rate": 3.444444444444445e-06, "loss": 0.0521, "num_tokens": 15733006.0, "reward": 1.1769827604293823, "reward_std": 0.24581657350063324, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.653243362903595, "rewards/format_reward_step_strict": 0.98828125, "step": 76 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.1953953993337763e-07, "aux_brier/mean_group_std": 0.0652622002784243, "aux_brier/mean_r": 0.924452100722223, "aux_brier/n_active_tok": 159.0, "aux_brier/n_groups": 9.625, "aux_brier/n_step_records": 39.75, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.46071878940731403, "calib/avg_num_step_conf": 4.96875, "calib/ece": 0.2793650793650793, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.011770491803278715, "calib/mean_conf": 0.2626984126984127, "calib/mu_c": 0.25699999999999995, "calib/mu_w": 0.26877049180327867, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.013095238095238097, "calib/std_conf": 0.13197169428600425, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2891.0, "completions/max_terminated_length": 2891.0, "completions/mean_length": 365.3828125, "completions/mean_terminated_length": 366.8157043457031, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.08213333333333334, "grad_norm": 0.03915157914161682, "learning_rate": 3.416666666666667e-06, "loss": 0.0536, "num_tokens": 15931208.0, "reward": 1.1603152751922607, "reward_std": 0.29630452394485474, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.6490738391876221, "rewards/format_reward_step_strict": 0.98046875, "step": 77 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.553938307561012e-07, "aux_brier/mean_group_std": 0.0469306941978666, "aux_brier/mean_r": 0.9418391258096338, "aux_brier/n_active_tok": 161.875, "aux_brier/n_groups": 9.46875, "aux_brier/n_step_records": 40.46875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5845184227537169, "calib/avg_num_step_conf": 5.05859375, "calib/ece": 0.2643775100401607, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03356625727213963, "calib/mean_conf": 0.2926506024096386, "calib/mu_c": 0.30869230769230765, "calib/mu_w": 0.275126050420168, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.01746987951807229, "calib/std_conf": 0.129003760762242, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2275.0, "completions/max_terminated_length": 2275.0, "completions/mean_length": 433.18359375, "completions/mean_terminated_length": 434.88238525390625, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.0832, "grad_norm": 0.02204030193388462, "learning_rate": 3.3888888888888893e-06, "loss": 0.0954, "num_tokens": 16150127.0, "reward": 1.1609346866607666, "reward_std": 0.35593631863594055, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.6749886870384216, "rewards/format_reward_step_strict": 0.96875, "step": 78 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.4770025424937216e-08, "aux_brier/mean_group_std": 0.04229294099187673, "aux_brier/mean_r": 0.9357093332584171, "aux_brier/n_active_tok": 157.875, "aux_brier/n_groups": 9.5, "aux_brier/n_step_records": 39.46875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5722112780103423, "calib/avg_num_step_conf": 4.99609375, "calib/ece": 0.2337254901960784, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": 0.03830583600098497, "calib/mean_conf": 0.2970980392156863, "calib/mu_c": 0.31572519083969464, "calib/mu_w": 0.27741935483870966, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.008549019607843135, "calib/std_conf": 0.15410529312357946, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1279.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 381.42578125, "completions/mean_terminated_length": 382.9216003417969, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.08426666666666667, "grad_norm": 0.02219904214143753, "learning_rate": 3.3611111111111117e-06, "loss": 0.0015, "num_tokens": 16354148.0, "reward": 1.179839849472046, "reward_std": 0.24919675290584564, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6959218382835388, "rewards/format_reward_step_strict": 0.98828125, "step": 79 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.2483069001851845e-08, "aux_brier/mean_group_std": 0.05762175924924037, "aux_brier/mean_r": 0.918613112171638, "aux_brier/n_active_tok": 174.0, "aux_brier/n_groups": 9.90625, "aux_brier/n_step_records": 43.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.516119109142365, "calib/avg_num_step_conf": 5.4453125, "calib/ece": 0.18933333333333333, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0018899963086010385, "calib/mean_conf": 0.33968627450980393, "calib/mu_c": 0.3406201550387597, "calib/mu_w": 0.33873015873015866, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.011568627450980393, "calib/std_conf": 0.13014885748914035, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2481.0, "completions/max_terminated_length": 2481.0, "completions/mean_length": 369.08203125, "completions/mean_terminated_length": 369.08203125, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.08533333333333333, "grad_norm": 0.03751935809850693, "learning_rate": 3.3333333333333333e-06, "loss": 0.0446, "num_tokens": 16550793.0, "reward": 1.171188473701477, "reward_std": 0.31861335039138794, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.7003788948059082, "rewards/format_reward_step_strict": 0.984375, "step": 80 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 8.201769834126349e-07, "aux_brier/mean_group_std": 0.05518780480113048, "aux_brier/mean_r": 0.9199615752677882, "aux_brier/n_active_tok": 171.0, "aux_brier/n_groups": 11.4375, "aux_brier/n_step_records": 42.75, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4610793650793651, "calib/avg_num_step_conf": 5.3828125, "calib/ece": 0.27350597609561755, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.034043809523809476, "calib/mean_conf": 0.3175697211155379, "calib/mu_c": 0.30047999999999997, "calib/mu_w": 0.33452380952380945, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.04653386454183267, "calib/std_conf": 0.14240139567280813, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2405.0, "completions/max_terminated_length": 2405.0, "completions/mean_length": 413.1484375, "completions/mean_terminated_length": 413.1484375, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.0864, "grad_norm": 0.0270853154361248, "learning_rate": 3.3055555555555558e-06, "loss": 0.1317, "num_tokens": 16762807.0, "reward": 1.1423234939575195, "reward_std": 0.2549367845058441, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.663043737411499, "rewards/format_reward_step_strict": 0.9765625, "step": 81 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.1273195890670884e-07, "aux_brier/mean_group_std": 0.04399663749933305, "aux_brier/mean_r": 0.930897226417169, "aux_brier/n_active_tok": 160.625, "aux_brier/n_groups": 9.59375, "aux_brier/n_step_records": 40.15625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.47119078104993595, "calib/avg_num_step_conf": 5.16796875, "calib/ece": 0.2869047619047619, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.007936507936507936, "calib/gap": -0.020683738796414775, "calib/mean_conf": 0.31325396825396823, "calib/mu_c": 0.3042253521126761, "calib/mu_w": 0.32490909090909087, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.01833333333333333, "calib/std_conf": 0.14968660045999052, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2347.0, "completions/max_terminated_length": 2347.0, "completions/mean_length": 351.58984375, "completions/mean_terminated_length": 354.3582763671875, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.08746666666666666, "grad_norm": 0.030250275507569313, "learning_rate": 3.277777777777778e-06, "loss": 0.0619, "num_tokens": 16958366.0, "reward": 1.2067850828170776, "reward_std": 0.26575493812561035, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.6474527716636658, "rewards/format_reward_step_strict": 0.98046875, "step": 82 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.1769497393918655e-08, "aux_brier/mean_group_std": 0.054236977676692506, "aux_brier/mean_r": 0.9155251421432733, "aux_brier/n_active_tok": 177.5, "aux_brier/n_groups": 10.5, "aux_brier/n_step_records": 44.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4310899418532723, "calib/avg_num_step_conf": 5.55078125, "calib/ece": 0.23168627450980392, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": -0.04061610788073744, "calib/mean_conf": 0.33843137254901967, "calib/mu_c": 0.3166101694915254, "calib/mu_w": 0.3572262773722628, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.05368627450980391, "calib/std_conf": 0.15597099465165282, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1803.0, "completions/max_terminated_length": 1803.0, "completions/mean_length": 412.29296875, "completions/mean_terminated_length": 413.9098205566406, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.08853333333333334, "grad_norm": 0.009271882474422455, "learning_rate": 3.2500000000000002e-06, "loss": 0.0311, "num_tokens": 17171177.0, "reward": 1.1272363662719727, "reward_std": 0.2718566954135895, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6886332035064697, "rewards/format_reward_step_strict": 0.98828125, "step": 83 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.938602060951646e-07, "aux_brier/mean_group_std": 0.055110220448419386, "aux_brier/mean_r": 0.9163679464388175, "aux_brier/n_active_tok": 159.0, "aux_brier/n_groups": 9.78125, "aux_brier/n_step_records": 39.75, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5751875, "calib/avg_num_step_conf": 4.98828125, "calib/ece": 0.21980237154150195, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.023636875, "calib/mean_conf": 0.31988142292490124, "calib/mu_c": 0.33184, "calib/mu_w": 0.308203125, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.022806324110671936, "calib/std_conf": 0.15663073619345894, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1058.0, "completions/max_terminated_length": 1058.0, "completions/mean_length": 336.3046875, "completions/mean_terminated_length": 338.9527587890625, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.0896, "grad_norm": 0.013896271586418152, "learning_rate": 3.2222222222222227e-06, "loss": -0.0091, "num_tokens": 17363191.0, "reward": 1.1473292112350464, "reward_std": 0.34168779850006104, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.6986917853355408, "rewards/format_reward_step_strict": 0.96875, "step": 84 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.499322417388484e-07, "aux_brier/mean_group_std": 0.059883257192061376, "aux_brier/mean_r": 0.9146038676776481, "aux_brier/n_active_tok": 160.125, "aux_brier/n_groups": 10.125, "aux_brier/n_step_records": 40.03125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4722751524390244, "calib/avg_num_step_conf": 5.00390625, "calib/ece": 0.2156972111553785, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.020544334349593407, "calib/mean_conf": 0.32454183266932274, "calib/mu_c": 0.3140650406504066, "calib/mu_w": 0.334609375, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0250996015936255, "calib/std_conf": 0.13921553570453937, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2413.0, "completions/max_terminated_length": 2413.0, "completions/mean_length": 398.296875, "completions/mean_terminated_length": 398.296875, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.09066666666666667, "grad_norm": 0.06122644990682602, "learning_rate": 3.1944444444444443e-06, "loss": 0.0907, "num_tokens": 17572979.0, "reward": 1.1405842304229736, "reward_std": 0.22327688336372375, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6795241832733154, "rewards/format_reward_step_strict": 0.98046875, "step": 85 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.1265720201159013e-06, "aux_brier/mean_group_std": 0.056234676397725655, "aux_brier/mean_r": 0.9039048247060489, "aux_brier/n_active_tok": 164.875, "aux_brier/n_groups": 10.8125, "aux_brier/n_step_records": 41.21875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5933844916145801, "calib/avg_num_step_conf": 5.1875, "calib/ece": 0.1508203125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0078125, "calib/gap": 0.0422891268024011, "calib/mean_conf": 0.33628906249999996, "calib/mu_c": 0.35991150442477876, "calib/mu_w": 0.31762237762237766, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.022851562500000002, "calib/std_conf": 0.15439871540226976, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1791.0, "completions/max_terminated_length": 1791.0, "completions/mean_length": 365.703125, "completions/mean_terminated_length": 367.13726806640625, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.09173333333333333, "grad_norm": 0.02597670443356037, "learning_rate": 3.1666666666666667e-06, "loss": -0.0123, "num_tokens": 17772111.0, "reward": 1.122349739074707, "reward_std": 0.2914327383041382, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.7393988370895386, "rewards/format_reward_step_strict": 0.9921875, "step": 86 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.160669021009312e-07, "aux_brier/mean_group_std": 0.06265765768392766, "aux_brier/mean_r": 0.9199099695836886, "aux_brier/n_active_tok": 138.5, "aux_brier/n_groups": 8.375, "aux_brier/n_step_records": 34.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5198178721174005, "calib/avg_num_step_conf": 4.39453125, "calib/ece": 0.33150823529411766, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008580227987421418, "calib/mean_conf": 0.30060627450980393, "calib/mu_c": 0.3038364779874214, "calib/mu_w": 0.29525625, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.004292549019607843, "calib/std_conf": 0.11487349273518041, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2891.0, "completions/max_terminated_length": 2891.0, "completions/mean_length": 330.64453125, "completions/mean_terminated_length": 330.64453125, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.0928, "grad_norm": 0.05910065397620201, "learning_rate": 3.138888888888889e-06, "loss": 0.02, "num_tokens": 17962252.0, "reward": 1.2766623497009277, "reward_std": 0.290641725063324, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.6457116603851318, "rewards/format_reward_step_strict": 0.98828125, "step": 87 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -7.17818114281954e-07, "aux_brier/mean_group_std": 0.05809240895458697, "aux_brier/mean_r": 0.9106669037827064, "aux_brier/n_active_tok": 152.0, "aux_brier/n_groups": 9.15625, "aux_brier/n_step_records": 38.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5666976744186046, "calib/avg_num_step_conf": 4.796875, "calib/ece": 0.17496062992125985, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.011811023622047244, "calib/gap": 0.04075162790697673, "calib/mean_conf": 0.33354330708661417, "calib/mu_c": 0.35424, "calib/mu_w": 0.31348837209302327, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00818897637795276, "calib/std_conf": 0.1618788677052555, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2639.0, "completions/max_terminated_length": 2639.0, "completions/mean_length": 389.703125, "completions/mean_terminated_length": 389.703125, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.09386666666666667, "grad_norm": 0.011103942058980465, "learning_rate": 3.1111111111111116e-06, "loss": 0.0879, "num_tokens": 18171864.0, "reward": 1.1568810939788818, "reward_std": 0.286232054233551, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.713461697101593, "rewards/format_reward_step_strict": 0.98046875, "step": 88 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -9.609432654045236e-07, "aux_brier/mean_group_std": 0.047199897622080135, "aux_brier/mean_r": 0.9364309733206839, "aux_brier/n_active_tok": 142.125, "aux_brier/n_groups": 8.84375, "aux_brier/n_step_records": 35.53125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5249311811811812, "calib/avg_num_step_conf": 4.46484375, "calib/ece": 0.17866666666666664, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00390015015015005, "calib/mean_conf": 0.3057254901960785, "calib/mu_c": 0.30792792792792784, "calib/mu_w": 0.3040277777777778, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.024549019607843118, "calib/std_conf": 0.1111198043504278, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1660.0, "completions/max_terminated_length": 1660.0, "completions/mean_length": 365.609375, "completions/mean_terminated_length": 367.04315185546875, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.09493333333333333, "grad_norm": 0.04293569177389145, "learning_rate": 3.0833333333333336e-06, "loss": 0.0361, "num_tokens": 18374348.0, "reward": 1.107604742050171, "reward_std": 0.23909682035446167, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.7194820642471313, "rewards/format_reward_step_strict": 0.98828125, "step": 89 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.5871090816170783e-07, "aux_brier/mean_group_std": 0.06883439678714949, "aux_brier/mean_r": 0.8987536353359264, "aux_brier/n_active_tok": 156.75, "aux_brier/n_groups": 9.71875, "aux_brier/n_step_records": 39.1875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5296717171717171, "calib/avg_num_step_conf": 4.8984375, "calib/ece": 0.25271653543307077, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.016273989898989927, "calib/mean_conf": 0.3317716535433071, "calib/mu_c": 0.33881944444444445, "calib/mu_w": 0.3225454545454545, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.008779527559055117, "calib/std_conf": 0.1420727156047671, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2863.0, "completions/max_terminated_length": 2863.0, "completions/mean_length": 364.3046875, "completions/mean_terminated_length": 364.3046875, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.096, "grad_norm": 0.03275603801012039, "learning_rate": 3.055555555555556e-06, "loss": -0.0051, "num_tokens": 18570930.0, "reward": 1.2289987802505493, "reward_std": 0.2162112593650818, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6816199421882629, "rewards/format_reward_step_strict": 0.9921875, "step": 90 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 6.288571167134549e-07, "aux_brier/mean_group_std": 0.04189549822299543, "aux_brier/mean_r": 0.9120065562343892, "aux_brier/n_active_tok": 172.625, "aux_brier/n_groups": 12.28125, "aux_brier/n_step_records": 43.15625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.48371140516698174, "calib/avg_num_step_conf": 5.7265625, "calib/ece": 0.2661660079051384, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.015810276679841896, "calib/gap": -0.01897101449275368, "calib/mean_conf": 0.33304347826086955, "calib/mu_c": 0.3244202898550725, "calib/mu_w": 0.34339130434782617, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.026877470355731226, "calib/std_conf": 0.14008970194791004, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2414.0, "completions/max_terminated_length": 2414.0, "completions/mean_length": 392.77734375, "completions/mean_terminated_length": 394.3176574707031, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.09706666666666666, "grad_norm": 0.015519512817263603, "learning_rate": 3.0277777777777776e-06, "loss": -0.0363, "num_tokens": 18779193.0, "reward": 1.1949305534362793, "reward_std": 0.2625961899757385, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.6625351309776306, "rewards/format_reward_step_strict": 0.98046875, "step": 91 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.1462448890053878e-07, "aux_brier/mean_group_std": 0.048035465417402626, "aux_brier/mean_r": 0.9297901756865723, "aux_brier/n_active_tok": 136.625, "aux_brier/n_groups": 9.15625, "aux_brier/n_step_records": 34.15625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.583395942900075, "calib/avg_num_step_conf": 4.26953125, "calib/ece": 0.2284584980237154, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": 0.03573002754820942, "calib/mean_conf": 0.30806324110671934, "calib/mu_c": 0.3251515151515152, "calib/mu_w": 0.28942148760330577, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007391304347826086, "calib/std_conf": 0.14108488443899017, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1143.0, "completions/max_terminated_length": 1143.0, "completions/mean_length": 322.94140625, "completions/mean_terminated_length": 324.2078552246094, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.09813333333333334, "grad_norm": 0.13573399186134338, "learning_rate": 3e-06, "loss": 0.032, "num_tokens": 18968586.0, "reward": 1.1833921670913696, "reward_std": 0.2916836142539978, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.6945062875747681, "rewards/format_reward_step_strict": 0.98828125, "step": 92 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.201434862822651e-07, "aux_brier/mean_group_std": 0.049988126099442164, "aux_brier/mean_r": 0.9079618060813863, "aux_brier/n_active_tok": 172.375, "aux_brier/n_groups": 11.125, "aux_brier/n_step_records": 43.09375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4173543385846461, "calib/avg_num_step_conf": 5.4296875, "calib/ece": 0.24845849802371545, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.011857707509881422, "calib/gap": -0.052819454863715865, "calib/mean_conf": 0.35581027667984194, "calib/mu_c": 0.32992248062015506, "calib/mu_w": 0.3827419354838709, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.047193675889328074, "calib/std_conf": 0.165438199933258, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2390.0, "completions/max_terminated_length": 2390.0, "completions/mean_length": 397.484375, "completions/mean_terminated_length": 400.6141662597656, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.0992, "grad_norm": 0.03095397725701332, "learning_rate": 2.9722222222222225e-06, "loss": 0.0175, "num_tokens": 19176118.0, "reward": 1.1613082885742188, "reward_std": 0.2993322014808655, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6608585715293884, "rewards/format_reward_step_strict": 0.984375, "step": 93 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.641496773507889e-07, "aux_brier/mean_group_std": 0.058897890132481125, "aux_brier/mean_r": 0.9092752259100776, "aux_brier/n_active_tok": 154.875, "aux_brier/n_groups": 10.15625, "aux_brier/n_step_records": 38.71875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6728434100683025, "calib/avg_num_step_conf": 4.96484375, "calib/ece": 0.20888888888888885, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.07464963318998241, "calib/mean_conf": 0.33325396825396825, "calib/mu_c": 0.36820895522388064, "calib/mu_w": 0.29355932203389823, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005198412698412698, "calib/std_conf": 0.13583562291426696, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2827.0, "completions/max_terminated_length": 2827.0, "completions/mean_length": 345.3984375, "completions/mean_terminated_length": 348.11810302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.10026666666666667, "grad_norm": 0.024323515594005585, "learning_rate": 2.944444444444445e-06, "loss": 0.0563, "num_tokens": 19373220.0, "reward": 1.1953550577163696, "reward_std": 0.2658788561820984, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.7189203500747681, "rewards/format_reward_step_strict": 0.984375, "step": 94 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.361819251199936e-06, "aux_brier/mean_group_std": 0.06770083618106394, "aux_brier/mean_r": 0.9133217776937195, "aux_brier/n_active_tok": 156.625, "aux_brier/n_groups": 8.96875, "aux_brier/n_step_records": 39.15625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5699152542372882, "calib/avg_num_step_conf": 4.89453125, "calib/ece": 0.21519685039370082, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.032866400797607165, "calib/mean_conf": 0.34149606299212604, "calib/mu_c": 0.356764705882353, "calib/mu_w": 0.3238983050847458, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01062992125984252, "calib/std_conf": 0.15089016049988507, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2245.0, "completions/max_terminated_length": 2245.0, "completions/mean_length": 355.59375, "completions/mean_terminated_length": 356.9882507324219, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.10133333333333333, "grad_norm": 0.09926465898752213, "learning_rate": 2.916666666666667e-06, "loss": 0.0061, "num_tokens": 19570380.0, "reward": 1.202769160270691, "reward_std": 0.34373557567596436, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.7017015814781189, "rewards/format_reward_step_strict": 0.9921875, "step": 95 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.2177741061103475e-06, "aux_brier/mean_group_std": 0.06300016902309992, "aux_brier/mean_r": 0.9226664448038038, "aux_brier/n_active_tok": 147.25, "aux_brier/n_groups": 8.15625, "aux_brier/n_step_records": 36.8125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6432211042311662, "calib/avg_num_step_conf": 4.6015625, "calib/ece": 0.2813779527559055, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0547987616099071, "calib/mean_conf": 0.32720472440944887, "calib/mu_c": 0.34921052631578947, "calib/mu_w": 0.2944117647058824, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005078740157480313, "calib/std_conf": 0.12096585309695967, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2190.0, "completions/max_terminated_length": 2190.0, "completions/mean_length": 333.421875, "completions/mean_terminated_length": 333.421875, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.1024, "grad_norm": 0.14316502213478088, "learning_rate": 2.888888888888889e-06, "loss": 0.0104, "num_tokens": 19761552.0, "reward": 1.2629387378692627, "reward_std": 0.2341615855693817, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6923800706863403, "rewards/format_reward_step_strict": 0.9921875, "step": 96 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.8734332185577074e-07, "aux_brier/mean_group_std": 0.05663403609999906, "aux_brier/mean_r": 0.9211536845790516, "aux_brier/n_active_tok": 156.0, "aux_brier/n_groups": 9.65625, "aux_brier/n_step_records": 39.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.44559151785714285, "calib/avg_num_step_conf": 4.875, "calib/ece": 0.21110236220472436, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.011811023622047244, "calib/gap": -0.03108878968253964, "calib/mean_conf": 0.3459842519685039, "calib/mu_c": 0.33031746031746034, "calib/mu_w": 0.36140625, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03051181102362205, "calib/std_conf": 0.15375233097276464, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1935.0, "completions/max_terminated_length": 1935.0, "completions/mean_length": 340.828125, "completions/mean_terminated_length": 340.828125, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.10346666666666667, "grad_norm": 0.04999121278524399, "learning_rate": 2.861111111111111e-06, "loss": 0.0376, "num_tokens": 19953876.0, "reward": 1.160067081451416, "reward_std": 0.30073976516723633, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6793312430381775, "rewards/format_reward_step_strict": 0.98828125, "step": 97 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.1376712472088713e-07, "aux_brier/mean_group_std": 0.0805589025357599, "aux_brier/mean_r": 0.8964236862957904, "aux_brier/n_active_tok": 145.375, "aux_brier/n_groups": 8.65625, "aux_brier/n_step_records": 36.34375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5358522971232288, "calib/avg_num_step_conf": 4.60546875, "calib/ece": 0.1894140625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": 0.0220088327301724, "calib/mean_conf": 0.37371093750000006, "calib/mu_c": 0.3839416058394161, "calib/mu_w": 0.3619327731092437, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.013984374999999997, "calib/std_conf": 0.15284763922897565, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1072.0, "completions/max_terminated_length": 1072.0, "completions/mean_length": 333.53515625, "completions/mean_terminated_length": 334.8431701660156, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.10453333333333334, "grad_norm": 0.13771434128284454, "learning_rate": 2.8333333333333335e-06, "loss": -0.0523, "num_tokens": 20145445.0, "reward": 1.211392879486084, "reward_std": 0.3488040566444397, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.7127590179443359, "rewards/format_reward_step_strict": 0.99609375, "step": 98 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.0447834646331042e-07, "aux_brier/mean_group_std": 0.06869045806504485, "aux_brier/mean_r": 0.9140179784990052, "aux_brier/n_active_tok": 144.75, "aux_brier/n_groups": 9.53125, "aux_brier/n_step_records": 36.1875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5370285539777064, "calib/avg_num_step_conf": 4.53125, "calib/ece": 0.1258964143426295, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.01593625498007968, "calib/gap": 0.02845396243701337, "calib/mean_conf": 0.3560159362549801, "calib/mu_c": 0.3760810810810812, "calib/mu_w": 0.3476271186440678, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.09354581673306776, "calib/std_conf": 0.17327885473716584, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2278.0, "completions/max_terminated_length": 2278.0, "completions/mean_length": 359.08984375, "completions/mean_terminated_length": 363.34783935546875, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.1056, "grad_norm": 0.08540429919958115, "learning_rate": 2.805555555555556e-06, "loss": 0.0184, "num_tokens": 20343172.0, "reward": 0.9680761098861694, "reward_std": 0.2911674380302429, "rewards/accuracy_reward_step": 0.2890625, "rewards/final_brier_reward_step": 0.7551171779632568, "rewards/format_reward_step_strict": 0.98046875, "step": 99 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.0144564665665046e-06, "aux_brier/mean_group_std": 0.07165123380718379, "aux_brier/mean_r": 0.9095255815183254, "aux_brier/n_active_tok": 141.0, "aux_brier/n_groups": 8.21875, "aux_brier/n_step_records": 35.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6369385647909137, "calib/avg_num_step_conf": 4.4140625, "calib/ece": 0.09380237154150199, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": 0.07481917914300473, "calib/mean_conf": 0.37007114624505927, "calib/mu_c": 0.41413461538461543, "calib/mu_w": 0.3393154362416107, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.026403162055336, "calib/std_conf": 0.1671301913690808, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2585.0, "completions/max_terminated_length": 2585.0, "completions/mean_length": 354.5, "completions/mean_terminated_length": 354.5, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.10666666666666667, "grad_norm": 0.013658665120601654, "learning_rate": 2.7777777777777783e-06, "loss": 0.1055, "num_tokens": 20541332.0, "reward": 1.0836881399154663, "reward_std": 0.33303046226501465, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.7488147020339966, "rewards/format_reward_step_strict": 0.97265625, "step": 100 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.86234873475766e-08, "aux_brier/mean_group_std": 0.08143233513363299, "aux_brier/mean_r": 0.89499492494281, "aux_brier/n_active_tok": 157.875, "aux_brier/n_groups": 9.59375, "aux_brier/n_step_records": 39.46875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5831081081081081, "calib/avg_num_step_conf": 4.94921875, "calib/ece": 0.09276284584980238, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.03162055335968379, "calib/gap": 0.07781389961389962, "calib/mean_conf": 0.3755375494071146, "calib/mu_c": 0.4210571428571429, "calib/mu_w": 0.3432432432432433, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.026640316205533587, "calib/std_conf": 0.18866415036651846, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2316.0, "completions/max_terminated_length": 2316.0, "completions/mean_length": 367.96875, "completions/mean_terminated_length": 369.4117736816406, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.10773333333333333, "grad_norm": 0.07658431679010391, "learning_rate": 2.7500000000000004e-06, "loss": 0.0496, "num_tokens": 20742524.0, "reward": 1.0876333713531494, "reward_std": 0.27290263772010803, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.748970627784729, "rewards/format_reward_step_strict": 0.98046875, "step": 101 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.261652958940319e-07, "aux_brier/mean_group_std": 0.07505972520616663, "aux_brier/mean_r": 0.9071425766198262, "aux_brier/n_active_tok": 139.75, "aux_brier/n_groups": 8.125, "aux_brier/n_step_records": 34.9375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5938762626262626, "calib/avg_num_step_conf": 4.3671875, "calib/ece": 0.23661417322834646, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.03554671717171709, "calib/mean_conf": 0.35842519685039365, "calib/mu_c": 0.3738194444444444, "calib/mu_w": 0.33827272727272734, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.014055118110236225, "calib/std_conf": 0.14980262708528902, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2945.0, "completions/max_terminated_length": 2945.0, "completions/mean_length": 306.578125, "completions/mean_terminated_length": 306.578125, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 0.1088, "grad_norm": 0.3046693503856659, "learning_rate": 2.7222222222222224e-06, "loss": 0.0201, "num_tokens": 20927704.0, "reward": 1.2337197065353394, "reward_std": 0.23760932683944702, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.7005038857460022, "rewards/format_reward_step_strict": 0.9921875, "step": 102 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.840684202951252e-08, "aux_brier/mean_group_std": 0.08440399074519347, "aux_brier/mean_r": 0.8849029438175865, "aux_brier/n_active_tok": 156.875, "aux_brier/n_groups": 9.59375, "aux_brier/n_step_records": 39.21875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5929629629629629, "calib/avg_num_step_conf": 4.91796875, "calib/ece": 0.16698039215686278, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.011764705882352941, "calib/gap": 0.055481481481481465, "calib/mean_conf": 0.40737254901960784, "calib/mu_c": 0.43348148148148147, "calib/mu_w": 0.378, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02247058823529411, "calib/std_conf": 0.18789170439318764, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2430.0, "completions/max_terminated_length": 2430.0, "completions/mean_length": 381.87890625, "completions/mean_terminated_length": 381.87890625, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.10986666666666667, "grad_norm": 0.12313272058963776, "learning_rate": 2.6944444444444444e-06, "loss": 0.0343, "num_tokens": 21130017.0, "reward": 1.204804539680481, "reward_std": 0.3005444407463074, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.7254679799079895, "rewards/format_reward_step_strict": 0.9921875, "step": 103 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.4673656872097336e-07, "aux_brier/mean_group_std": 0.06418843498650209, "aux_brier/mean_r": 0.907262416261346, "aux_brier/n_active_tok": 150.875, "aux_brier/n_groups": 8.5, "aux_brier/n_step_records": 37.71875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5762580645161292, "calib/avg_num_step_conf": 4.71484375, "calib/ece": 0.12490196078431375, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.011764705882352941, "calib/gap": 0.035716129032258126, "calib/mean_conf": 0.39149019607843144, "calib/mu_c": 0.41320000000000007, "calib/mu_w": 0.37748387096774194, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.062117647058823534, "calib/std_conf": 0.16871641975824397, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2426.0, "completions/max_terminated_length": 2426.0, "completions/mean_length": 335.21484375, "completions/mean_terminated_length": 335.21484375, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.11093333333333333, "grad_norm": 0.018597371876239777, "learning_rate": 2.666666666666667e-06, "loss": 0.0392, "num_tokens": 21322512.0, "reward": 1.0754871368408203, "reward_std": 0.28687724471092224, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.7472612857818604, "rewards/format_reward_step_strict": 0.99609375, "step": 104 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.404413666530104e-07, "aux_brier/mean_group_std": 0.07330830036124375, "aux_brier/mean_r": 0.8990910735402349, "aux_brier/n_active_tok": 156.125, "aux_brier/n_groups": 9.5625, "aux_brier/n_step_records": 39.03125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5260475297060663, "calib/avg_num_step_conf": 4.8828125, "calib/ece": 0.18699604743083, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.023715415019762844, "calib/gap": 0.00814196372732956, "calib/mean_conf": 0.37988142292490124, "calib/mu_c": 0.38406504065040653, "calib/mu_w": 0.37592307692307697, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04035573122529644, "calib/std_conf": 0.20288527924017216, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2457.0, "completions/max_terminated_length": 2457.0, "completions/mean_length": 367.94140625, "completions/mean_terminated_length": 367.94140625, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 0.112, "grad_norm": 0.1323513239622116, "learning_rate": 2.6388888888888893e-06, "loss": 0.0878, "num_tokens": 21522465.0, "reward": 1.1499565839767456, "reward_std": 0.3213230073451996, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.6935761570930481, "rewards/format_reward_step_strict": 0.984375, "step": 105 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.6703656830505693e-07, "aux_brier/mean_group_std": 0.0817213059123052, "aux_brier/mean_r": 0.8851597949594987, "aux_brier/n_active_tok": 142.25, "aux_brier/n_groups": 8.65625, "aux_brier/n_step_records": 35.5625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6334838709677418, "calib/avg_num_step_conf": 4.4453125, "calib/ece": 0.05662745098039212, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.011764705882352941, "calib/gap": 0.08661290322580645, "calib/mean_conf": 0.40235294117647064, "calib/mu_c": 0.455, "calib/mu_w": 0.36838709677419357, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.033411764705882335, "calib/std_conf": 0.17425683361055583, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2748.0, "completions/max_terminated_length": 2748.0, "completions/mean_length": 340.65625, "completions/mean_terminated_length": 340.65625, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.11306666666666666, "grad_norm": 0.1326768845319748, "learning_rate": 2.6111111111111113e-06, "loss": 0.0094, "num_tokens": 21714257.0, "reward": 1.075605869293213, "reward_std": 0.25695741176605225, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.7633609771728516, "rewards/format_reward_step_strict": 0.98828125, "step": 106 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.4899348892150055e-07, "aux_brier/mean_group_std": 0.0726369190381352, "aux_brier/mean_r": 0.8983872991704096, "aux_brier/n_active_tok": 153.125, "aux_brier/n_groups": 9.34375, "aux_brier/n_step_records": 38.28125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5338606030647552, "calib/avg_num_step_conf": 4.78515625, "calib/ece": 0.17854901960784314, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.023529411764705882, "calib/gap": 0.025262605042016828, "calib/mean_conf": 0.4025490196078432, "calib/mu_c": 0.41433823529411773, "calib/mu_w": 0.3890756302521009, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.023882352941176473, "calib/std_conf": 0.18639872019731168, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 333.27734375, "completions/mean_terminated_length": 334.5843200683594, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.11413333333333334, "grad_norm": 0.32100990414619446, "learning_rate": 2.5833333333333337e-06, "loss": -0.0008, "num_tokens": 21904192.0, "reward": 1.2065610885620117, "reward_std": 0.3408036530017853, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.7090566158294678, "rewards/format_reward_step_strict": 0.99609375, "step": 107 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.1088532825563888e-07, "aux_brier/mean_group_std": 0.10277840367139544, "aux_brier/mean_r": 0.8577811296154776, "aux_brier/n_active_tok": 164.5, "aux_brier/n_groups": 8.71875, "aux_brier/n_step_records": 41.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.606356048166393, "calib/avg_num_step_conf": 5.140625, "calib/ece": 0.19894117647058818, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.043137254901960784, "calib/gap": 0.07960385878489323, "calib/mean_conf": 0.4830196078431373, "calib/mu_c": 0.5101785714285715, "calib/mu_w": 0.43057471264367825, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011568627450980369, "calib/std_conf": 0.2126192851080217, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2201.0, "completions/max_terminated_length": 2201.0, "completions/mean_length": 358.546875, "completions/mean_terminated_length": 358.546875, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.1152, "grad_norm": 0.33517855405807495, "learning_rate": 2.5555555555555557e-06, "loss": 0.0201, "num_tokens": 22099212.0, "reward": 1.3294909000396729, "reward_std": 0.2619668245315552, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.7320261001586914, "rewards/format_reward_step_strict": 0.98046875, "step": 108 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.449572831883742e-08, "aux_brier/mean_group_std": 0.09258432177188369, "aux_brier/mean_r": 0.8743384908562138, "aux_brier/n_active_tok": 168.25, "aux_brier/n_groups": 9.96875, "aux_brier/n_step_records": 42.0625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5752794005588011, "calib/avg_num_step_conf": 5.265625, "calib/ece": 0.20621513944223108, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.04780876494023904, "calib/gap": 0.040998221996444095, "calib/mean_conf": 0.4415139442231075, "calib/mu_c": 0.4622580645161291, "calib/mu_w": 0.421259842519685, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07685258964143427, "calib/std_conf": 0.2304510102652012, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2275.0, "completions/max_terminated_length": 2275.0, "completions/mean_length": 384.71875, "completions/mean_terminated_length": 387.7480163574219, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.11626666666666667, "grad_norm": 0.4102321267127991, "learning_rate": 2.5277777777777778e-06, "loss": 0.0307, "num_tokens": 22302300.0, "reward": 1.1478333473205566, "reward_std": 0.2864153981208801, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.7007085680961609, "rewards/format_reward_step_strict": 0.9765625, "step": 109 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.1042610548184228e-08, "aux_brier/mean_group_std": 0.07470751147615498, "aux_brier/mean_r": 0.8892870579721913, "aux_brier/n_active_tok": 145.375, "aux_brier/n_groups": 8.53125, "aux_brier/n_step_records": 36.34375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.532967032967033, "calib/avg_num_step_conf": 4.5546875, "calib/ece": 0.17137254901960786, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.011764705882352941, "calib/gap": 0.007657967032967006, "calib/mean_conf": 0.41490196078431374, "calib/mu_c": 0.4191964285714286, "calib/mu_w": 0.4115384615384616, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.07352941176470588, "calib/std_conf": 0.19000487660428672, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1996.0, "completions/max_terminated_length": 1996.0, "completions/mean_length": 339.5546875, "completions/mean_terminated_length": 339.5546875, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 0.11733333333333333, "grad_norm": 0.06716830283403397, "learning_rate": 2.5e-06, "loss": 0.033, "num_tokens": 22494146.0, "reward": 1.1126129627227783, "reward_std": 0.31422674655914307, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.7160773277282715, "rewards/format_reward_step_strict": 0.9921875, "step": 110 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.85634411909286e-07, "aux_brier/mean_group_std": 0.08572202197603937, "aux_brier/mean_r": 0.8715183497103269, "aux_brier/n_active_tok": 147.5, "aux_brier/n_groups": 9.40625, "aux_brier/n_step_records": 36.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5761852394916911, "calib/avg_num_step_conf": 4.65234375, "calib/ece": 0.1604296875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0546875, "calib/gap": 0.034108015640273714, "calib/mean_conf": 0.4364453125, "calib/mu_c": 0.45403225806451614, "calib/mu_w": 0.4199242424242424, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.05624999999999999, "calib/std_conf": 0.225318049813541, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1429.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 346.96875, "completions/mean_terminated_length": 348.3294372558594, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.1184, "grad_norm": 0.2514590919017792, "learning_rate": 2.4722222222222226e-06, "loss": 0.0449, "num_tokens": 22690378.0, "reward": 1.1522479057312012, "reward_std": 0.31342071294784546, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.7105538845062256, "rewards/format_reward_step_strict": 0.98046875, "step": 111 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -7.069849301821751e-08, "aux_brier/mean_group_std": 0.10011424842211442, "aux_brier/mean_r": 0.8573007675960261, "aux_brier/n_active_tok": 164.875, "aux_brier/n_groups": 12.0, "aux_brier/n_step_records": 41.21875, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.6517857142857142, "calib/avg_num_step_conf": 5.30078125, "calib/ece": 0.14479674796747963, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.032520325203252036, "calib/gap": 0.09957539682539679, "calib/mean_conf": 0.43341463414634146, "calib/mu_c": 0.48441666666666666, "calib/mu_w": 0.3848412698412699, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04520325203252032, "calib/std_conf": 0.23165831745483337, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2793.0, "completions/max_terminated_length": 2793.0, "completions/mean_length": 428.47265625, "completions/mean_terminated_length": 435.2738342285156, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.11946666666666667, "grad_norm": 0.05973617359995842, "learning_rate": 2.4444444444444447e-06, "loss": 0.0533, "num_tokens": 22907987.0, "reward": 1.1252638101577759, "reward_std": 0.3145098388195038, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.711992621421814, "rewards/format_reward_step_strict": 0.95703125, "step": 112 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.1709850034069547e-08, "aux_brier/mean_group_std": 0.08739688755907164, "aux_brier/mean_r": 0.8745251383117962, "aux_brier/n_active_tok": 149.625, "aux_brier/n_groups": 9.09375, "aux_brier/n_step_records": 37.40625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6068394484117212, "calib/avg_num_step_conf": 4.70703125, "calib/ece": 0.1307450980392157, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.03529411764705882, "calib/gap": 0.07035520807682838, "calib/mean_conf": 0.44235294117647056, "calib/mu_c": 0.47656488549618325, "calib/mu_w": 0.4062096774193549, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.02968627450980392, "calib/std_conf": 0.20206180606869187, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 325.19140625, "completions/mean_terminated_length": 326.4666748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.12053333333333334, "grad_norm": 0.17817603051662445, "learning_rate": 2.4166666666666667e-06, "loss": 0.0473, "num_tokens": 23096436.0, "reward": 1.1877672672271729, "reward_std": 0.29871809482574463, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.7354441285133362, "rewards/format_reward_step_strict": 0.984375, "step": 113 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.7384530445752056e-07, "aux_brier/mean_group_std": 0.08547388446520225, "aux_brier/mean_r": 0.8827719815450317, "aux_brier/n_active_tok": 170.75, "aux_brier/n_groups": 10.625, "aux_brier/n_step_records": 42.6875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5920479302832244, "calib/avg_num_step_conf": 5.34765625, "calib/ece": 0.2181176470588235, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.047058823529411764, "calib/gap": 0.05983660130718954, "calib/mean_conf": 0.4524705882352941, "calib/mu_c": 0.47640522875816993, "calib/mu_w": 0.4165686274509804, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03529411764705881, "calib/std_conf": 0.22202228760593298, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1448.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 331.80859375, "completions/mean_terminated_length": 333.1098327636719, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.1216, "grad_norm": 0.0962509736418724, "learning_rate": 2.388888888888889e-06, "loss": -0.0122, "num_tokens": 23286403.0, "reward": 1.2724648714065552, "reward_std": 0.3058438301086426, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7148593664169312, "rewards/format_reward_step_strict": 0.9921875, "step": 114 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.4755961041945653e-07, "aux_brier/mean_group_std": 0.08000380095624444, "aux_brier/mean_r": 0.8847967506190682, "aux_brier/n_active_tok": 147.5, "aux_brier/n_groups": 8.59375, "aux_brier/n_step_records": 36.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5706005029749127, "calib/avg_num_step_conf": 4.6171875, "calib/ece": 0.1517578125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.015625, "calib/gap": 0.05589094031773301, "calib/mean_conf": 0.4471484375, "calib/mu_c": 0.47705882352941187, "calib/mu_w": 0.42116788321167886, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06703125000000001, "calib/std_conf": 0.2144799156478494, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 908.0, "completions/max_terminated_length": 908.0, "completions/mean_length": 320.32421875, "completions/mean_terminated_length": 321.5804138183594, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.12266666666666666, "grad_norm": 0.07951895892620087, "learning_rate": 2.361111111111111e-06, "loss": 0.0392, "num_tokens": 23473670.0, "reward": 1.1480258703231812, "reward_std": 0.27451997995376587, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.7327284812927246, "rewards/format_reward_step_strict": 1.0, "step": 115 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.062078467510389e-08, "aux_brier/mean_group_std": 0.08707501669837008, "aux_brier/mean_r": 0.8670354526341408, "aux_brier/n_active_tok": 155.5, "aux_brier/n_groups": 10.0, "aux_brier/n_step_records": 38.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6268748086929905, "calib/avg_num_step_conf": 4.92578125, "calib/ece": 0.1732421875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.07421875, "calib/gap": 0.08672972145699409, "calib/mean_conf": 0.4544140625, "calib/mu_c": 0.49540740740740735, "calib/mu_w": 0.40867768595041326, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.05015625, "calib/std_conf": 0.23883598017728838, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1084.0, "completions/max_terminated_length": 1084.0, "completions/mean_length": 377.35546875, "completions/mean_terminated_length": 378.8353271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.12373333333333333, "grad_norm": 0.06519480794668198, "learning_rate": 2.3333333333333336e-06, "loss": 0.0076, "num_tokens": 23674793.0, "reward": 1.201657772064209, "reward_std": 0.2613446116447449, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.7285062074661255, "rewards/format_reward_step_strict": 0.984375, "step": 116 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.285587217791928e-07, "aux_brier/mean_group_std": 0.08887216248945048, "aux_brier/mean_r": 0.8877628202023963, "aux_brier/n_active_tok": 159.125, "aux_brier/n_groups": 8.875, "aux_brier/n_step_records": 39.78125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5661910056832221, "calib/avg_num_step_conf": 5.08984375, "calib/ece": 0.11613281249999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.02734375, "calib/gap": 0.05093773165307641, "calib/mean_conf": 0.42042968750000004, "calib/mu_c": 0.44868421052631585, "calib/mu_w": 0.39774647887323944, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.045625, "calib/std_conf": 0.2007296527512872, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1033.0, "completions/max_terminated_length": 1033.0, "completions/mean_length": 354.15625, "completions/mean_terminated_length": 355.54510498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.1248, "grad_norm": 0.45683225989341736, "learning_rate": 2.305555555555556e-06, "loss": 0.0039, "num_tokens": 23872057.0, "reward": 1.119857668876648, "reward_std": 0.2601226568222046, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.7372433543205261, "rewards/format_reward_step_strict": 0.98046875, "step": 117 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.1666325783774596e-08, "aux_brier/mean_group_std": 0.10054606616027299, "aux_brier/mean_r": 0.8643378587874978, "aux_brier/n_active_tok": 154.625, "aux_brier/n_groups": 9.5625, "aux_brier/n_step_records": 38.65625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.44894432575091114, "calib/avg_num_step_conf": 4.83984375, "calib/ece": 0.28799647058823524, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.058823529411764705, "calib/gap": -0.03816274349629262, "calib/mean_conf": 0.4071407843137255, "calib/mu_c": 0.39082808219178083, "calib/mu_w": 0.42899082568807345, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06129411764705881, "calib/std_conf": 0.23531119283598614, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2319.0, "completions/max_terminated_length": 2319.0, "completions/mean_length": 339.65234375, "completions/mean_terminated_length": 339.65234375, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 0.12586666666666665, "grad_norm": 0.01657978817820549, "learning_rate": 2.277777777777778e-06, "loss": 0.0398, "num_tokens": 24063016.0, "reward": 1.2292308807373047, "reward_std": 0.2461319863796234, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.6512982845306396, "rewards/format_reward_step_strict": 0.9921875, "step": 118 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.0341530166697623e-08, "aux_brier/mean_group_std": 0.09084952239194923, "aux_brier/mean_r": 0.8888653701471764, "aux_brier/n_active_tok": 142.125, "aux_brier/n_groups": 8.4375, "aux_brier/n_step_records": 35.53125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.654015748031496, "calib/avg_num_step_conf": 4.55859375, "calib/ece": 0.15884920634920632, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.031746031746031744, "calib/gap": 0.09944503937007887, "calib/mean_conf": 0.4203571428571429, "calib/mu_c": 0.4696850393700788, "calib/mu_w": 0.37023999999999996, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03761904761904761, "calib/std_conf": 0.20869271723459198, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2882.0, "completions/max_terminated_length": 2882.0, "completions/mean_length": 385.35546875, "completions/mean_terminated_length": 386.86669921875, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.12693333333333334, "grad_norm": 0.0730527713894844, "learning_rate": 2.25e-06, "loss": 0.0193, "num_tokens": 24266731.0, "reward": 1.1726526021957397, "reward_std": 0.33147647976875305, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.7374855279922485, "rewards/format_reward_step_strict": 0.984375, "step": 119 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.0540737782616105e-08, "aux_brier/mean_group_std": 0.09026363877259834, "aux_brier/mean_r": 0.8913992862721279, "aux_brier/n_active_tok": 153.0, "aux_brier/n_groups": 8.1875, "aux_brier/n_step_records": 38.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6489453558653014, "calib/avg_num_step_conf": 4.78515625, "calib/ece": 0.13423529411764704, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0196078431372549, "calib/gap": 0.09716787961021334, "calib/mean_conf": 0.4271764705882353, "calib/mu_c": 0.4732835820895522, "calib/mu_w": 0.37611570247933884, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.017960784313725497, "calib/std_conf": 0.2018091416225866, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2444.0, "completions/max_terminated_length": 2444.0, "completions/mean_length": 352.83203125, "completions/mean_terminated_length": 352.83203125, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.128, "grad_norm": 0.12056002020835876, "learning_rate": 2.222222222222222e-06, "loss": 0.0035, "num_tokens": 24463744.0, "reward": 1.2059788703918457, "reward_std": 0.3073646128177643, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.7457902431488037, "rewards/format_reward_step_strict": 0.9921875, "step": 120 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.417528720601638e-08, "aux_brier/mean_group_std": 0.10318590389525976, "aux_brier/mean_r": 0.8496503904299731, "aux_brier/n_active_tok": 149.75, "aux_brier/n_groups": 9.5625, "aux_brier/n_step_records": 37.4375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5958017676767677, "calib/avg_num_step_conf": 4.8203125, "calib/ece": 0.15706349206349204, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0873015873015873, "calib/gap": 0.07640909090909098, "calib/mean_conf": 0.4638095238095238, "calib/mu_c": 0.5038333333333334, "calib/mu_w": 0.4274242424242424, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.07234126984126983, "calib/std_conf": 0.24640384048896183, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2840.0, "completions/max_terminated_length": 2840.0, "completions/mean_length": 390.1953125, "completions/mean_terminated_length": 393.2677001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.12906666666666666, "grad_norm": 0.03554612770676613, "learning_rate": 2.1944444444444445e-06, "loss": 0.0297, "num_tokens": 24668690.0, "reward": 1.1195628643035889, "reward_std": 0.3589603304862976, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.7126262187957764, "rewards/format_reward_step_strict": 0.9453125, "step": 121 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.1827658319418788e-07, "aux_brier/mean_group_std": 0.0946850603019013, "aux_brier/mean_r": 0.8874073725829168, "aux_brier/n_active_tok": 146.0, "aux_brier/n_groups": 7.5625, "aux_brier/n_step_records": 36.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6489562374245473, "calib/avg_num_step_conf": 4.5625, "calib/ece": 0.12716535433070866, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.011811023622047244, "calib/gap": 0.12007796780684105, "calib/mean_conf": 0.44409448818897634, "calib/mu_c": 0.49704225352112674, "calib/mu_w": 0.3769642857142857, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0061023622047244085, "calib/std_conf": 0.225241268126553, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1574.0, "completions/max_terminated_length": 1574.0, "completions/mean_length": 322.10546875, "completions/mean_terminated_length": 323.36865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.13013333333333332, "grad_norm": 0.015693731606006622, "learning_rate": 2.166666666666667e-06, "loss": -0.0199, "num_tokens": 24858493.0, "reward": 1.2343302965164185, "reward_std": 0.26484552025794983, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7420085668563843, "rewards/format_reward_step_strict": 0.98828125, "step": 122 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.8801498891107826e-07, "aux_brier/mean_group_std": 0.10608873661524075, "aux_brier/mean_r": 0.8413336388252236, "aux_brier/n_active_tok": 159.75, "aux_brier/n_groups": 9.90625, "aux_brier/n_step_records": 39.9375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5281862745098039, "calib/avg_num_step_conf": 5.46875, "calib/ece": 0.19280000000000003, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.06, "calib/gap": 0.017316821465428178, "calib/mean_conf": 0.49672000000000005, "calib/mu_c": 0.5061403508771929, "calib/mu_w": 0.4888235294117647, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11675999999999997, "calib/std_conf": 0.23642259113714156, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2609.0, "completions/max_terminated_length": 2609.0, "completions/mean_length": 412.78125, "completions/mean_terminated_length": 416.031494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.1312, "grad_norm": 0.031050747260451317, "learning_rate": 2.138888888888889e-06, "loss": 0.034, "num_tokens": 25069453.0, "reward": 1.1010408401489258, "reward_std": 0.3160806894302368, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6854137182235718, "rewards/format_reward_step_strict": 0.96875, "step": 123 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.988070060660041e-07, "aux_brier/mean_group_std": 0.10306439348137618, "aux_brier/mean_r": 0.8692028999523264, "aux_brier/n_active_tok": 151.5, "aux_brier/n_groups": 8.375, "aux_brier/n_step_records": 37.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.470428466796875, "calib/avg_num_step_conf": 4.77734375, "calib/ece": 0.25312500000000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.046875, "calib/gap": -0.01734374999999999, "calib/mean_conf": 0.4503125, "calib/mu_c": 0.441640625, "calib/mu_w": 0.458984375, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.10171875000000002, "calib/std_conf": 0.2564890272384961, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1195.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 361.20703125, "completions/mean_terminated_length": 364.0511779785156, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.13226666666666667, "grad_norm": 0.2864026725292206, "learning_rate": 2.1111111111111114e-06, "loss": -0.0122, "num_tokens": 25268738.0, "reward": 1.1633951663970947, "reward_std": 0.2395247519016266, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6692054271697998, "rewards/format_reward_step_strict": 0.9921875, "step": 124 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.646160974731584e-07, "aux_brier/mean_group_std": 0.10724135052195052, "aux_brier/mean_r": 0.8474057649160144, "aux_brier/n_active_tok": 150.25, "aux_brier/n_groups": 9.90625, "aux_brier/n_step_records": 37.5625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.664725770097671, "calib/avg_num_step_conf": 4.7265625, "calib/ece": 0.1176679841897233, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.05533596837944664, "calib/gap": 0.14447658402203867, "calib/mean_conf": 0.45197628458498024, "calib/mu_c": 0.5273553719008265, "calib/mu_w": 0.3828787878787878, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04569169960474307, "calib/std_conf": 0.2474343595449688, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2709.0, "completions/max_terminated_length": 2709.0, "completions/mean_length": 404.203125, "completions/mean_terminated_length": 405.78826904296875, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.13333333333333333, "grad_norm": 0.29562297463417053, "learning_rate": 2.0833333333333334e-06, "loss": -0.001, "num_tokens": 25477022.0, "reward": 1.1527801752090454, "reward_std": 0.314397394657135, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.7517457008361816, "rewards/format_reward_step_strict": 0.984375, "step": 125 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.0224669508572823e-07, "aux_brier/mean_group_std": 0.12232005259746306, "aux_brier/mean_r": 0.8612689452947031, "aux_brier/n_active_tok": 162.25, "aux_brier/n_groups": 9.5625, "aux_brier/n_step_records": 40.5625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7232528041415014, "calib/avg_num_step_conf": 5.0703125, "calib/ece": 0.13066666666666665, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.043137254901960784, "calib/gap": 0.17442314803401943, "calib/mean_conf": 0.4579607843137255, "calib/mu_c": 0.5489344262295082, "calib/mu_w": 0.37451127819548874, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.055098039215686255, "calib/std_conf": 0.24571416207154625, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2587.0, "completions/max_terminated_length": 2587.0, "completions/mean_length": 395.515625, "completions/mean_terminated_length": 395.515625, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.1344, "grad_norm": 0.6604772806167603, "learning_rate": 2.0555555555555555e-06, "loss": 0.0298, "num_tokens": 25683738.0, "reward": 1.1651129722595215, "reward_std": 0.2828695774078369, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.7698269486427307, "rewards/format_reward_step_strict": 0.9921875, "step": 126 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.7240986049937845e-07, "aux_brier/mean_group_std": 0.101689242567387, "aux_brier/mean_r": 0.873445960943868, "aux_brier/n_active_tok": 150.5, "aux_brier/n_groups": 9.21875, "aux_brier/n_step_records": 37.625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6691041468819247, "calib/avg_num_step_conf": 4.875, "calib/ece": 0.14007936507936508, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.031746031746031744, "calib/gap": 0.11456410256410249, "calib/mean_conf": 0.3956349206349207, "calib/mu_c": 0.457008547008547, "calib/mu_w": 0.3424444444444445, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03571428571428572, "calib/std_conf": 0.22939878645056888, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2796.0, "completions/max_terminated_length": 2796.0, "completions/mean_length": 374.80078125, "completions/mean_terminated_length": 377.751953125, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.13546666666666668, "grad_norm": 0.1178617998957634, "learning_rate": 2.027777777777778e-06, "loss": 0.0191, "num_tokens": 25883359.0, "reward": 1.1340175867080688, "reward_std": 0.21370188891887665, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.7391953468322754, "rewards/format_reward_step_strict": 0.984375, "step": 127 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.3119981368567863e-08, "aux_brier/mean_group_std": 0.0870432280072729, "aux_brier/mean_r": 0.8977486576236519, "aux_brier/n_active_tok": 134.75, "aux_brier/n_groups": 7.59375, "aux_brier/n_step_records": 33.6875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5940603514132927, "calib/avg_num_step_conf": 4.2109375, "calib/ece": 0.1352191235059761, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0199203187250996, "calib/gap": 0.07287687802393683, "calib/mean_conf": 0.407808764940239, "calib/mu_c": 0.4461344537815126, "calib/mu_w": 0.3732575757575758, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.03446215139442231, "calib/std_conf": 0.2191098872986348, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2021.0, "completions/max_terminated_length": 2021.0, "completions/mean_length": 360.046875, "completions/mean_terminated_length": 362.88189697265625, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.13653333333333334, "grad_norm": 0.1828767955303192, "learning_rate": 2.0000000000000003e-06, "loss": 0.005, "num_tokens": 26082195.0, "reward": 1.1307801008224487, "reward_std": 0.2932726740837097, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.718433141708374, "rewards/format_reward_step_strict": 0.97265625, "step": 128 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.567461550641916e-08, "aux_brier/mean_group_std": 0.11122924313788123, "aux_brier/mean_r": 0.8456283750074335, "aux_brier/n_active_tok": 149.0, "aux_brier/n_groups": 9.59375, "aux_brier/n_step_records": 37.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6755646152850578, "calib/avg_num_step_conf": 4.65625, "calib/ece": 0.1816141732283465, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.047244094488188976, "calib/gap": 0.15361224357729886, "calib/mean_conf": 0.430748031496063, "calib/mu_c": 0.4918300653594771, "calib/mu_w": 0.33821782178217824, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005, "calib/std_conf": 0.23850549997798173, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2632.0, "completions/max_terminated_length": 2632.0, "completions/mean_length": 365.29296875, "completions/mean_terminated_length": 365.29296875, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.1376, "grad_norm": 0.05314285680651665, "learning_rate": 1.9722222222222224e-06, "loss": 0.0391, "num_tokens": 26278094.0, "reward": 1.279221773147583, "reward_std": 0.2662426829338074, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.741887092590332, "rewards/format_reward_step_strict": 0.9921875, "step": 129 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.070673162963722e-08, "aux_brier/mean_group_std": 0.11395797789631851, "aux_brier/mean_r": 0.8604183942753519, "aux_brier/n_active_tok": 148.75, "aux_brier/n_groups": 9.21875, "aux_brier/n_step_records": 37.1875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5640786363070797, "calib/avg_num_step_conf": 4.7109375, "calib/ece": 0.21717647058823536, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.050980392156862744, "calib/gap": 0.04847517730496459, "calib/mean_conf": 0.4418039215686274, "calib/mu_c": 0.4634751773049646, "calib/mu_w": 0.41500000000000004, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.05301960784313724, "calib/std_conf": 0.23246648669346232, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1273.0, "completions/max_terminated_length": 1273.0, "completions/mean_length": 343.33984375, "completions/mean_terminated_length": 344.6863098144531, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.13866666666666666, "grad_norm": 0.4812711477279663, "learning_rate": 1.944444444444445e-06, "loss": -0.0222, "num_tokens": 26471277.0, "reward": 1.2247710227966309, "reward_std": 0.2347400188446045, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7037718296051025, "rewards/format_reward_step_strict": 0.98828125, "step": 130 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.2584099540191573e-07, "aux_brier/mean_group_std": 0.09516103547688488, "aux_brier/mean_r": 0.8974033465950478, "aux_brier/n_active_tok": 135.625, "aux_brier/n_groups": 6.96875, "aux_brier/n_step_records": 33.90625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7035392364793214, "calib/avg_num_step_conf": 4.2578125, "calib/ece": 0.07484375000000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.03125, "calib/gap": 0.15309650053022278, "calib/mean_conf": 0.41203124999999996, "calib/mu_c": 0.510108695652174, "calib/mu_w": 0.3570121951219512, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06375000000000001, "calib/std_conf": 0.22691497366510988, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1129.0, "completions/max_terminated_length": 1129.0, "completions/mean_length": 332.16796875, "completions/mean_terminated_length": 333.4706115722656, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.13973333333333332, "grad_norm": 0.23848077654838562, "learning_rate": 1.916666666666667e-06, "loss": 0.0177, "num_tokens": 26662520.0, "reward": 1.05392324924469, "reward_std": 0.17232808470726013, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.7860054969787598, "rewards/format_reward_step_strict": 0.99609375, "step": 131 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.2778377309063949e-07, "aux_brier/mean_group_std": 0.12836679395531925, "aux_brier/mean_r": 0.8437291650893733, "aux_brier/n_active_tok": 151.5, "aux_brier/n_groups": 8.84375, "aux_brier/n_step_records": 37.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5946997776130467, "calib/avg_num_step_conf": 4.7578125, "calib/ece": 0.23011718750000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0546875, "calib/gap": 0.04791821102050897, "calib/mean_conf": 0.4703515625, "calib/mu_c": 0.4916901408450704, "calib/mu_w": 0.44377192982456143, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07289062499999999, "calib/std_conf": 0.23557831400366333, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1012.0, "completions/max_terminated_length": 1012.0, "completions/mean_length": 354.21484375, "completions/mean_terminated_length": 355.60394287109375, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.1408, "grad_norm": 0.09761397540569305, "learning_rate": 1.888888888888889e-06, "loss": -0.0031, "num_tokens": 26858791.0, "reward": 1.2303372621536255, "reward_std": 0.29718682169914246, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7104113101959229, "rewards/format_reward_step_strict": 0.99609375, "step": 132 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.257254382691091e-08, "aux_brier/mean_group_std": 0.09797772669652348, "aux_brier/mean_r": 0.8487588479886035, "aux_brier/n_active_tok": 162.375, "aux_brier/n_groups": 10.28125, "aux_brier/n_step_records": 40.59375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5794680293501049, "calib/avg_num_step_conf": 5.07421875, "calib/ece": 0.19388235294117648, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.07058823529411765, "calib/gap": 0.07302869496855341, "calib/mean_conf": 0.4895686274509804, "calib/mu_c": 0.5351041666666666, "calib/mu_w": 0.4620754716981132, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.15349019607843137, "calib/std_conf": 0.245675160539195, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2084.0, "completions/max_terminated_length": 2084.0, "completions/mean_length": 414.26171875, "completions/mean_terminated_length": 414.26171875, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.14186666666666667, "grad_norm": 0.12505367398262024, "learning_rate": 1.8611111111111113e-06, "loss": 0.0394, "num_tokens": 27071186.0, "reward": 1.0517127513885498, "reward_std": 0.31258824467658997, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.7224761843681335, "rewards/format_reward_step_strict": 0.9921875, "step": 133 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.166705913187906e-07, "aux_brier/mean_group_std": 0.11841662931201469, "aux_brier/mean_r": 0.8527166637312599, "aux_brier/n_active_tok": 146.375, "aux_brier/n_groups": 8.46875, "aux_brier/n_step_records": 36.59375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6394675212621718, "calib/avg_num_step_conf": 4.62890625, "calib/ece": 0.14580392156862743, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.050980392156862744, "calib/gap": 0.12857512634044116, "calib/mean_conf": 0.4524705882352941, "calib/mu_c": 0.513984962406015, "calib/mu_w": 0.38540983606557383, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03835294117647056, "calib/std_conf": 0.2646162113911892, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2009.0, "completions/max_terminated_length": 2009.0, "completions/mean_length": 420.75390625, "completions/mean_terminated_length": 420.75390625, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.14293333333333333, "grad_norm": 0.0434727743268013, "learning_rate": 1.8333333333333333e-06, "loss": 0.0074, "num_tokens": 27287851.0, "reward": 1.197908639907837, "reward_std": 0.3439040780067444, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.7369468808174133, "rewards/format_reward_step_strict": 0.98828125, "step": 134 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.7586449063689003e-07, "aux_brier/mean_group_std": 0.09495125476588134, "aux_brier/mean_r": 0.8686151587228502, "aux_brier/n_active_tok": 148.25, "aux_brier/n_groups": 10.03125, "aux_brier/n_step_records": 37.0625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5088279489105936, "calib/avg_num_step_conf": 4.671875, "calib/ece": 0.28446640316205535, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.04743083003952569, "calib/gap": -0.009607438016528858, "calib/mean_conf": 0.40573122529644273, "calib/mu_c": 0.4011363636363637, "calib/mu_w": 0.41074380165289254, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0842292490118577, "calib/std_conf": 0.24196486305625992, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2670.0, "completions/max_terminated_length": 2670.0, "completions/mean_length": 404.4140625, "completions/mean_terminated_length": 406.0000305175781, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.144, "grad_norm": 0.018072057515382767, "learning_rate": 1.8055555555555557e-06, "loss": 0.0222, "num_tokens": 27497261.0, "reward": 1.1693822145462036, "reward_std": 0.2787361145019531, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.6619038581848145, "rewards/format_reward_step_strict": 0.9765625, "step": 135 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.6963278571278124e-07, "aux_brier/mean_group_std": 0.10035939349191147, "aux_brier/mean_r": 0.8840327020246582, "aux_brier/n_active_tok": 152.625, "aux_brier/n_groups": 9.4375, "aux_brier/n_step_records": 38.15625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5743686868686868, "calib/avg_num_step_conf": 4.83984375, "calib/ece": 0.14574409448818898, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.03937007874015748, "calib/gap": 0.05753560606060604, "calib/mean_conf": 0.4222086614173229, "calib/mu_c": 0.4548272727272727, "calib/mu_w": 0.39729166666666665, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.06744094488188977, "calib/std_conf": 0.23177458113523347, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2156.0, "completions/max_terminated_length": 2156.0, "completions/mean_length": 401.5703125, "completions/mean_terminated_length": 401.5703125, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.14506666666666668, "grad_norm": 0.2320486158132553, "learning_rate": 1.777777777777778e-06, "loss": 0.0622, "num_tokens": 27708551.0, "reward": 1.0957106351852417, "reward_std": 0.26839518547058105, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.718779981136322, "rewards/format_reward_step_strict": 0.97265625, "step": 136 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.1207802191881555e-07, "aux_brier/mean_group_std": 0.11627500770620562, "aux_brier/mean_r": 0.8622695410000498, "aux_brier/n_active_tok": 166.375, "aux_brier/n_groups": 10.375, "aux_brier/n_step_records": 41.59375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6421440972222222, "calib/avg_num_step_conf": 5.21484375, "calib/ece": 0.13389763779527555, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.04330708661417323, "calib/gap": 0.11618551587301579, "calib/mean_conf": 0.47232283464566927, "calib/mu_c": 0.5308730158730158, "calib/mu_w": 0.41468750000000004, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05507874015748028, "calib/std_conf": 0.2480072510334993, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2620.0, "completions/max_terminated_length": 2620.0, "completions/mean_length": 398.05078125, "completions/mean_terminated_length": 398.05078125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.14613333333333334, "grad_norm": 0.10685214400291443, "learning_rate": 1.75e-06, "loss": 0.0438, "num_tokens": 27917436.0, "reward": 1.1752855777740479, "reward_std": 0.26773321628570557, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.7402050495147705, "rewards/format_reward_step_strict": 0.98828125, "step": 137 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.2109068817101054e-08, "aux_brier/mean_group_std": 0.10397222747225303, "aux_brier/mean_r": 0.8673317062790896, "aux_brier/n_active_tok": 157.75, "aux_brier/n_groups": 9.0625, "aux_brier/n_step_records": 39.4375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6007628734901462, "calib/avg_num_step_conf": 4.9296875, "calib/ece": 0.18909090909090898, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.07509881422924901, "calib/gap": 0.09020979020979009, "calib/mean_conf": 0.4509881422924902, "calib/mu_c": 0.49020979020979016, "calib/mu_w": 0.4000000000000001, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03743083003952566, "calib/std_conf": 0.25137922287529424, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2948.0, "completions/max_terminated_length": 2948.0, "completions/mean_length": 389.6171875, "completions/mean_terminated_length": 391.1451110839844, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.1472, "grad_norm": 0.023131702095270157, "learning_rate": 1.7222222222222224e-06, "loss": 0.0244, "num_tokens": 28121514.0, "reward": 1.2328879833221436, "reward_std": 0.32997649908065796, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.7128019332885742, "rewards/format_reward_step_strict": 0.984375, "step": 138 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.1711855202610764e-08, "aux_brier/mean_group_std": 0.10339953483657323, "aux_brier/mean_r": 0.8589236167423117, "aux_brier/n_active_tok": 152.5, "aux_brier/n_groups": 9.09375, "aux_brier/n_step_records": 38.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6624748490945673, "calib/avg_num_step_conf": 4.80859375, "calib/ece": 0.16464566929133856, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.047244094488188976, "calib/gap": 0.13121353118712276, "calib/mean_conf": 0.4609448818897638, "calib/mu_c": 0.5188028169014085, "calib/mu_w": 0.38758928571428575, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03326771653543306, "calib/std_conf": 0.2457014162319214, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2493.0, "completions/max_terminated_length": 2493.0, "completions/mean_length": 394.1328125, "completions/mean_terminated_length": 394.1328125, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.14826666666666666, "grad_norm": 0.21900862455368042, "learning_rate": 1.6944444444444446e-06, "loss": 0.0326, "num_tokens": 28325508.0, "reward": 1.2324597835540771, "reward_std": 0.3042023479938507, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7423390746116638, "rewards/format_reward_step_strict": 0.984375, "step": 139 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.5296178423884754e-07, "aux_brier/mean_group_std": 0.10491539743330237, "aux_brier/mean_r": 0.8573796328534654, "aux_brier/n_active_tok": 146.5, "aux_brier/n_groups": 8.0625, "aux_brier/n_step_records": 36.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6832255401128388, "calib/avg_num_step_conf": 4.59765625, "calib/ece": 0.20505882352941168, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.043137254901960784, "calib/gap": 0.15438282647584972, "calib/mean_conf": 0.46254901960784317, "calib/mu_c": 0.5146153846153846, "calib/mu_w": 0.36023255813953486, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002431372549019615, "calib/std_conf": 0.23492045493336175, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1556.0, "completions/max_terminated_length": 1556.0, "completions/mean_length": 397.37890625, "completions/mean_terminated_length": 398.9372863769531, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.14933333333333335, "grad_norm": 0.19613376259803772, "learning_rate": 1.6666666666666667e-06, "loss": 0.0217, "num_tokens": 28532253.0, "reward": 1.336437463760376, "reward_std": 0.2852696180343628, "rewards/accuracy_reward_step": 0.66015625, "rewards/final_brier_reward_step": 0.7441874742507935, "rewards/format_reward_step_strict": 0.98046875, "step": 140 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.3265246309734913e-07, "aux_brier/mean_group_std": 0.13106887129133274, "aux_brier/mean_r": 0.8237553344871757, "aux_brier/n_active_tok": 158.75, "aux_brier/n_groups": 9.71875, "aux_brier/n_step_records": 39.6875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7558265410086601, "calib/avg_num_step_conf": 5.0234375, "calib/ece": 0.15741176470588236, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.11372549019607843, "calib/gap": 0.23163588894549147, "calib/mean_conf": 0.4895686274509804, "calib/mu_c": 0.5840397350993377, "calib/mu_w": 0.35240384615384623, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02741176470588234, "calib/std_conf": 0.2758249839315888, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1464.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 403.7109375, "completions/mean_terminated_length": 405.29412841796875, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.1504, "grad_norm": 0.016531849279999733, "learning_rate": 1.638888888888889e-06, "loss": -0.0031, "num_tokens": 28742699.0, "reward": 1.2801527976989746, "reward_std": 0.27934694290161133, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.7768609523773193, "rewards/format_reward_step_strict": 0.9921875, "step": 141 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.1151371324432695e-07, "aux_brier/mean_group_std": 0.1267744893044742, "aux_brier/mean_r": 0.8154235344445251, "aux_brier/n_active_tok": 168.625, "aux_brier/n_groups": 10.46875, "aux_brier/n_step_records": 42.15625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6366908707255011, "calib/avg_num_step_conf": 5.30078125, "calib/ece": 0.16783464566929132, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.09448818897637795, "calib/gap": 0.13334636628809043, "calib/mean_conf": 0.5339763779527559, "calib/mu_c": 0.5985496183206108, "calib/mu_w": 0.4652032520325204, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.09303149606299213, "calib/std_conf": 0.2672540731204657, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 418.09375, "completions/mean_terminated_length": 421.3858337402344, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.15146666666666667, "grad_norm": 0.06602392345666885, "learning_rate": 1.6111111111111113e-06, "loss": 0.021, "num_tokens": 28954891.0, "reward": 1.184819221496582, "reward_std": 0.23616482317447662, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.7392769455909729, "rewards/format_reward_step_strict": 0.9765625, "step": 142 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.8225036313435083e-08, "aux_brier/mean_group_std": 0.11590850122108248, "aux_brier/mean_r": 0.8699870057239746, "aux_brier/n_active_tok": 179.625, "aux_brier/n_groups": 11.1875, "aux_brier/n_step_records": 44.90625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6742481203007519, "calib/avg_num_step_conf": 5.6484375, "calib/ece": 0.12565217391304348, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.023715415019762844, "calib/gap": 0.14580701754385977, "calib/mean_conf": 0.4703162055335969, "calib/mu_c": 0.5394736842105264, "calib/mu_w": 0.39366666666666666, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03513833992094863, "calib/std_conf": 0.24918979150032125, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3043.0, "completions/max_terminated_length": 3043.0, "completions/mean_length": 431.55859375, "completions/mean_terminated_length": 431.55859375, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.15253333333333333, "grad_norm": 0.08794353902339935, "learning_rate": 1.5833333333333333e-06, "loss": 0.0435, "num_tokens": 29172706.0, "reward": 1.193190336227417, "reward_std": 0.2812745273113251, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.749323844909668, "rewards/format_reward_step_strict": 0.97265625, "step": 143 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 8.11761387206622e-08, "aux_brier/mean_group_std": 0.11240189797013717, "aux_brier/mean_r": 0.8132787126500433, "aux_brier/n_active_tok": 165.25, "aux_brier/n_groups": 10.59375, "aux_brier/n_step_records": 41.3125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5276225139737424, "calib/avg_num_step_conf": 5.25390625, "calib/ece": 0.2456470588235294, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.10588235294117647, "calib/gap": 0.022529572338489623, "calib/mean_conf": 0.5281568627450981, "calib/mu_c": 0.5368152866242039, "calib/mu_w": 0.5142857142857142, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.07905882352941176, "calib/std_conf": 0.2620689456170347, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1591.0, "completions/max_terminated_length": 1591.0, "completions/mean_length": 406.20703125, "completions/mean_terminated_length": 407.8000183105469, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.1536, "grad_norm": 0.0926431193947792, "learning_rate": 1.5555555555555558e-06, "loss": -0.0263, "num_tokens": 29380823.0, "reward": 1.271442174911499, "reward_std": 0.26009607315063477, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.6873311996459961, "rewards/format_reward_step_strict": 0.97265625, "step": 144 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.840106399006672e-08, "aux_brier/mean_group_std": 0.11061379380508077, "aux_brier/mean_r": 0.8101737122108782, "aux_brier/n_active_tok": 171.625, "aux_brier/n_groups": 10.09375, "aux_brier/n_step_records": 42.90625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5978014656895403, "calib/avg_num_step_conf": 5.37109375, "calib/ece": 0.16102766798418977, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.09881422924901186, "calib/gap": 0.09130046635576272, "calib/mean_conf": 0.5734387351778657, "calib/mu_c": 0.6077215189873417, "calib/mu_w": 0.516421052631579, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05498023715415028, "calib/std_conf": 0.26611914699400946, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2631.0, "completions/max_terminated_length": 2631.0, "completions/mean_length": 385.67578125, "completions/mean_terminated_length": 388.71258544921875, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.15466666666666667, "grad_norm": 0.03689923882484436, "learning_rate": 1.527777777777778e-06, "loss": 0.0234, "num_tokens": 29582260.0, "reward": 1.2909455299377441, "reward_std": 0.3134627044200897, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.7262820601463318, "rewards/format_reward_step_strict": 0.984375, "step": 145 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.16075968071128e-09, "aux_brier/mean_group_std": 0.12136202682685863, "aux_brier/mean_r": 0.8476437314556111, "aux_brier/n_active_tok": 173.75, "aux_brier/n_groups": 10.1875, "aux_brier/n_step_records": 43.4375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6105483094846145, "calib/avg_num_step_conf": 5.4296875, "calib/ece": 0.13011764705882356, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.07058823529411765, "calib/gap": 0.0889508674180069, "calib/mean_conf": 0.484156862745098, "calib/mu_c": 0.5361320754716982, "calib/mu_w": 0.44718120805369127, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09929411764705882, "calib/std_conf": 0.23915860456855245, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1180.0, "completions/max_terminated_length": 1180.0, "completions/mean_length": 435.50390625, "completions/mean_terminated_length": 437.2117919921875, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.15573333333333333, "grad_norm": 0.14213219285011292, "learning_rate": 1.5e-06, "loss": 0.0106, "num_tokens": 29800965.0, "reward": 1.0959968566894531, "reward_std": 0.2882137894630432, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.7355499267578125, "rewards/format_reward_step_strict": 0.99609375, "step": 146 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.5753061239086605e-08, "aux_brier/mean_group_std": 0.13903331618433384, "aux_brier/mean_r": 0.7917967211471233, "aux_brier/n_active_tok": 171.5, "aux_brier/n_groups": 9.3125, "aux_brier/n_step_records": 42.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6507095553453169, "calib/avg_num_step_conf": 5.421875, "calib/ece": 0.21617187499999996, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.125, "calib/gap": 0.13481046988331769, "calib/mean_conf": 0.582578125, "calib/mu_c": 0.6620952380952382, "calib/mu_w": 0.5272847682119205, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19429687499999998, "calib/std_conf": 0.26664801850282777, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1156.0, "completions/max_terminated_length": 1156.0, "completions/mean_length": 408.76953125, "completions/mean_terminated_length": 410.37255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.1568, "grad_norm": 0.053500305861234665, "learning_rate": 1.4722222222222225e-06, "loss": 0.0177, "num_tokens": 30009290.0, "reward": 1.0866820812225342, "reward_std": 0.25396963953971863, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.7217280864715576, "rewards/format_reward_step_strict": 0.9921875, "step": 147 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.18270529911813e-08, "aux_brier/mean_group_std": 0.14510941490496707, "aux_brier/mean_r": 0.7777847091972564, "aux_brier/n_active_tok": 179.875, "aux_brier/n_groups": 10.3125, "aux_brier/n_step_records": 44.96875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7305123884416151, "calib/avg_num_step_conf": 5.625, "calib/ece": 0.09527343749999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.15234375, "calib/gap": 0.1961661361792424, "calib/mean_conf": 0.6387890625, "calib/mu_c": 0.7223129251700681, "calib/mu_w": 0.5261467889908257, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.07992187499999998, "calib/std_conf": 0.24587633935653727, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1435.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 401.03515625, "completions/mean_terminated_length": 402.6078796386719, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.15786666666666666, "grad_norm": 0.06759005784988403, "learning_rate": 1.4444444444444445e-06, "loss": 0.0061, "num_tokens": 30217067.0, "reward": 1.2679929733276367, "reward_std": 0.25442275404930115, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.7829093933105469, "rewards/format_reward_step_strict": 0.99609375, "step": 148 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.1345247609428899e-07, "aux_brier/mean_group_std": 0.1439968289027517, "aux_brier/mean_r": 0.7876439387436677, "aux_brier/n_active_tok": 192.375, "aux_brier/n_groups": 11.09375, "aux_brier/n_step_records": 48.09375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6473214285714286, "calib/avg_num_step_conf": 6.05859375, "calib/ece": 0.164744, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.172, "calib/gap": 0.14561645962732928, "calib/mean_conf": 0.5834159999999999, "calib/mu_c": 0.6486521739130435, "calib/mu_w": 0.5030357142857143, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.09808000000000003, "calib/std_conf": 0.2818881958223863, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3019.0, "completions/max_terminated_length": 3019.0, "completions/mean_length": 475.4296875, "completions/mean_terminated_length": 479.1732177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.15893333333333334, "grad_norm": 0.034335069358348846, "learning_rate": 1.4166666666666667e-06, "loss": 0.0557, "num_tokens": 30443233.0, "reward": 1.2051457166671753, "reward_std": 0.32590651512145996, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.7268328070640564, "rewards/format_reward_step_strict": 0.96875, "step": 149 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.9801118059391563e-08, "aux_brier/mean_group_std": 0.13442839650499902, "aux_brier/mean_r": 0.8012688460856431, "aux_brier/n_active_tok": 166.625, "aux_brier/n_groups": 9.3125, "aux_brier/n_step_records": 41.65625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5937343828085956, "calib/avg_num_step_conf": 5.40234375, "calib/ece": 0.15464566929133858, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.07874015748031496, "calib/gap": 0.08082583708145918, "calib/mean_conf": 0.5805511811023621, "calib/mu_c": 0.6174637681159421, "calib/mu_w": 0.5366379310344829, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09594488188976379, "calib/std_conf": 0.24510581705402895, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1589.0, "completions/max_terminated_length": 1589.0, "completions/mean_length": 397.60546875, "completions/mean_terminated_length": 399.16473388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 120.0, "epoch": 0.16, "grad_norm": 0.05126010254025459, "learning_rate": 1.3888888888888892e-06, "loss": -0.0044, "num_tokens": 30649980.0, "reward": 1.2144067287445068, "reward_std": 0.37564900517463684, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.7248140573501587, "rewards/format_reward_step_strict": 0.98828125, "step": 150 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -9.47707560761124e-08, "aux_brier/mean_group_std": 0.1269075709654268, "aux_brier/mean_r": 0.7557008715614809, "aux_brier/n_active_tok": 179.375, "aux_brier/n_groups": 11.3125, "aux_brier/n_step_records": 44.84375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5144787644787645, "calib/avg_num_step_conf": 5.8828125, "calib/ece": 0.2334262948207172, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.1394422310756972, "calib/gap": 0.018050836550836724, "calib/mean_conf": 0.603625498007968, "calib/mu_c": 0.6136936936936938, "calib/mu_w": 0.595642857142857, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19741035856573713, "calib/std_conf": 0.26567212213803254, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2307.0, "completions/max_terminated_length": 2307.0, "completions/mean_length": 434.859375, "completions/mean_terminated_length": 440.0158386230469, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.16106666666666666, "grad_norm": 0.05654248967766762, "learning_rate": 1.3611111111111112e-06, "loss": 0.0206, "num_tokens": 30868328.0, "reward": 1.0869810581207275, "reward_std": 0.2987477779388428, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.6526120901107788, "rewards/format_reward_step_strict": 0.98046875, "step": 151 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.443325288620235e-08, "aux_brier/mean_group_std": 0.11533839896068751, "aux_brier/mean_r": 0.7959775638730627, "aux_brier/n_active_tok": 173.125, "aux_brier/n_groups": 11.03125, "aux_brier/n_step_records": 43.28125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.591305753070459, "calib/avg_num_step_conf": 5.60546875, "calib/ece": 0.13680120481927716, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.10441767068273092, "calib/gap": 0.07706929541047192, "calib/mean_conf": 0.6011907630522089, "calib/mu_c": 0.638023076923077, "calib/mu_w": 0.5609537815126051, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1079518072289157, "calib/std_conf": 0.23542918331516563, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2595.0, "completions/max_terminated_length": 2595.0, "completions/mean_length": 444.10546875, "completions/mean_terminated_length": 447.60235595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.16213333333333332, "grad_norm": 0.14546184241771698, "learning_rate": 1.3333333333333334e-06, "loss": 0.0483, "num_tokens": 31087411.0, "reward": 1.1604429483413696, "reward_std": 0.3661574125289917, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.704271674156189, "rewards/format_reward_step_strict": 0.953125, "step": 152 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.0082205215720137e-07, "aux_brier/mean_group_std": 0.13697361324819832, "aux_brier/mean_r": 0.7917897499822438, "aux_brier/n_active_tok": 176.875, "aux_brier/n_groups": 10.375, "aux_brier/n_step_records": 44.21875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5713698456868201, "calib/avg_num_step_conf": 5.52734375, "calib/ece": 0.22611111111111115, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.15079365079365079, "calib/gap": 0.06230457880091078, "calib/mean_conf": 0.6150793650793651, "calib/mu_c": 0.6442537313432837, "calib/mu_w": 0.5819491525423729, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.15472222222222226, "calib/std_conf": 0.2606912241256019, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1835.0, "completions/max_terminated_length": 1835.0, "completions/mean_length": 440.34765625, "completions/mean_terminated_length": 443.8149719238281, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.1632, "grad_norm": 0.1055821031332016, "learning_rate": 1.3055555555555556e-06, "loss": 0.001, "num_tokens": 31307460.0, "reward": 1.187672734260559, "reward_std": 0.28842902183532715, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.6960035562515259, "rewards/format_reward_step_strict": 0.98046875, "step": 153 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.21805267692066e-09, "aux_brier/mean_group_std": 0.12686975593948335, "aux_brier/mean_r": 0.7998879815074778, "aux_brier/n_active_tok": 156.0, "aux_brier/n_groups": 8.46875, "aux_brier/n_step_records": 39.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7001890359168244, "calib/avg_num_step_conf": 5.0390625, "calib/ece": 0.19521739130434787, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.1225296442687747, "calib/gap": 0.16552173913043455, "calib/mean_conf": 0.6102371541501976, "calib/mu_c": 0.7005217391304347, "calib/mu_w": 0.5350000000000001, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.17545454545454545, "calib/std_conf": 0.24428799315522298, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2790.0, "completions/max_terminated_length": 2790.0, "completions/mean_length": 395.45703125, "completions/mean_terminated_length": 397.00787353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.16426666666666667, "grad_norm": 0.0952596366405487, "learning_rate": 1.2777777777777779e-06, "loss": 0.0053, "num_tokens": 31513137.0, "reward": 1.1326243877410889, "reward_std": 0.30417975783348083, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.7414347529411316, "rewards/format_reward_step_strict": 0.98828125, "step": 154 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.4763500017808155e-08, "aux_brier/mean_group_std": 0.13779846848636926, "aux_brier/mean_r": 0.7960159172203838, "aux_brier/n_active_tok": 164.125, "aux_brier/n_groups": 9.125, "aux_brier/n_step_records": 41.03125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6894146948941469, "calib/avg_num_step_conf": 5.16015625, "calib/ece": 0.15851562500000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.109375, "calib/gap": 0.17920797011207995, "calib/mean_conf": 0.54625, "calib/mu_c": 0.6484545454545456, "calib/mu_w": 0.46924657534246567, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13753906250000003, "calib/std_conf": 0.26870203060267334, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 968.0, "completions/max_terminated_length": 968.0, "completions/mean_length": 383.57421875, "completions/mean_terminated_length": 385.0784606933594, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.16533333333333333, "grad_norm": 0.02583649568259716, "learning_rate": 1.25e-06, "loss": 0.0138, "num_tokens": 31718548.0, "reward": 1.112450122833252, "reward_std": 0.2871822714805603, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.7544882893562317, "rewards/format_reward_step_strict": 0.98828125, "step": 155 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.829771050289921e-08, "aux_brier/mean_group_std": 0.12302485247484855, "aux_brier/mean_r": 0.7776026305040514, "aux_brier/n_active_tok": 176.25, "aux_brier/n_groups": 9.53125, "aux_brier/n_step_records": 44.0625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5847667407172327, "calib/avg_num_step_conf": 5.59765625, "calib/ece": 0.23416666666666672, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.13095238095238096, "calib/gap": 0.07775880672802293, "calib/mean_conf": 0.6187698412698412, "calib/mu_c": 0.6610434782608696, "calib/mu_w": 0.5832846715328467, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19829365079365083, "calib/std_conf": 0.2517487933704288, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2999.0, "completions/max_terminated_length": 2999.0, "completions/mean_length": 451.46484375, "completions/mean_terminated_length": 451.46484375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.1664, "grad_norm": 0.17125293612480164, "learning_rate": 1.2222222222222223e-06, "loss": 0.0078, "num_tokens": 31938883.0, "reward": 1.1118985414505005, "reward_std": 0.33926159143447876, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.689781665802002, "rewards/format_reward_step_strict": 0.98046875, "step": 156 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.691566070023299e-08, "aux_brier/mean_group_std": 0.13706306788561623, "aux_brier/mean_r": 0.7464380565734717, "aux_brier/n_active_tok": 192.5, "aux_brier/n_groups": 10.46875, "aux_brier/n_step_records": 48.125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.7284209846233364, "calib/avg_num_step_conf": 6.03515625, "calib/ece": 0.1257768924302789, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.1593625498007968, "calib/gap": 0.21101434293836407, "calib/mean_conf": 0.642589641434263, "calib/mu_c": 0.734225352112676, "calib/mu_w": 0.523211009174312, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10131474103585658, "calib/std_conf": 0.2567558619422429, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2588.0, "completions/max_terminated_length": 2588.0, "completions/mean_length": 440.9765625, "completions/mean_terminated_length": 446.20556640625, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.16746666666666668, "grad_norm": 0.0412166602909565, "learning_rate": 1.1944444444444446e-06, "loss": 0.0342, "num_tokens": 32155501.0, "reward": 1.2317674160003662, "reward_std": 0.2891837954521179, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.770819902420044, "rewards/format_reward_step_strict": 0.96875, "step": 157 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.5143887812106858e-08, "aux_brier/mean_group_std": 0.11988775852216747, "aux_brier/mean_r": 0.7481809163657375, "aux_brier/n_active_tok": 173.0, "aux_brier/n_groups": 9.84375, "aux_brier/n_step_records": 43.25, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4912872942386831, "calib/avg_num_step_conf": 5.40625, "calib/ece": 0.21277777777777776, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.14285714285714285, "calib/gap": -0.005833333333333468, "calib/mean_conf": 0.6644444444444444, "calib/mu_c": 0.6619444444444444, "calib/mu_w": 0.6677777777777779, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1528968253968254, "calib/std_conf": 0.22775493497538915, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2218.0, "completions/max_terminated_length": 2218.0, "completions/mean_length": 407.0625, "completions/mean_terminated_length": 411.88934326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.16853333333333334, "grad_norm": 0.012716798111796379, "learning_rate": 1.1666666666666668e-06, "loss": 0.028, "num_tokens": 32364949.0, "reward": 1.222691535949707, "reward_std": 0.3177594840526581, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6798285245895386, "rewards/format_reward_step_strict": 0.98046875, "step": 158 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.0770565500584084e-07, "aux_brier/mean_group_std": 0.15063667122630234, "aux_brier/mean_r": 0.8045148502922331, "aux_brier/n_active_tok": 175.0, "aux_brier/n_groups": 10.15625, "aux_brier/n_step_records": 43.75, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4885457854766837, "calib/avg_num_step_conf": 5.4765625, "calib/ece": 0.23204724409448824, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.1141732283464567, "calib/gap": -0.01245841963220995, "calib/mean_conf": 0.5465354330708662, "calib/mu_c": 0.5409929078014184, "calib/mu_w": 0.5534513274336283, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11173228346456693, "calib/std_conf": 0.2764464549334733, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2371.0, "completions/max_terminated_length": 2371.0, "completions/mean_length": 427.27734375, "completions/mean_terminated_length": 427.27734375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.1696, "grad_norm": 0.025821218267083168, "learning_rate": 1.138888888888889e-06, "loss": 0.094, "num_tokens": 32579116.0, "reward": 1.2105845212936401, "reward_std": 0.2866003215312958, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6626508235931396, "rewards/format_reward_step_strict": 0.98828125, "step": 159 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 8.182264056577626e-08, "aux_brier/mean_group_std": 0.13987717889225337, "aux_brier/mean_r": 0.7918607675799973, "aux_brier/n_active_tok": 185.75, "aux_brier/n_groups": 11.53125, "aux_brier/n_step_records": 46.4375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6507251812953239, "calib/avg_num_step_conf": 5.88671875, "calib/ece": 0.14086956521739133, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.12648221343873517, "calib/gap": 0.13403413353338334, "calib/mean_conf": 0.5982608695652173, "calib/mu_c": 0.6639534883720931, "calib/mu_w": 0.5299193548387098, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11462450592885376, "calib/std_conf": 0.2588606442332258, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2846.0, "completions/max_terminated_length": 2846.0, "completions/mean_length": 418.76953125, "completions/mean_terminated_length": 420.41180419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.17066666666666666, "grad_norm": 0.06163140758872032, "learning_rate": 1.111111111111111e-06, "loss": 0.0111, "num_tokens": 32791161.0, "reward": 1.1794862747192383, "reward_std": 0.30159661173820496, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.7335702776908875, "rewards/format_reward_step_strict": 0.984375, "step": 160 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.472216469566856e-08, "aux_brier/mean_group_std": 0.10883649614578836, "aux_brier/mean_r": 0.7972156714269121, "aux_brier/n_active_tok": 168.5, "aux_brier/n_groups": 9.375, "aux_brier/n_step_records": 42.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6139204545454546, "calib/avg_num_step_conf": 5.28515625, "calib/ece": 0.13585937500000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.078125, "calib/gap": 0.09940909090909089, "calib/mean_conf": 0.6192187499999999, "calib/mu_c": 0.6502840909090909, "calib/mu_w": 0.550875, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03378906250000001, "calib/std_conf": 0.23450282972373168, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1157.0, "completions/max_terminated_length": 1157.0, "completions/mean_length": 399.640625, "completions/mean_terminated_length": 401.2078552246094, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.17173333333333332, "grad_norm": 0.027240252122282982, "learning_rate": 1.0833333333333335e-06, "loss": -0.0209, "num_tokens": 32997389.0, "reward": 1.371936559677124, "reward_std": 0.2640629708766937, "rewards/accuracy_reward_step": 0.6875, "rewards/final_brier_reward_step": 0.7611836194992065, "rewards/format_reward_step_strict": 0.98828125, "step": 161 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -7.686420269714667e-08, "aux_brier/mean_group_std": 0.14525228010587657, "aux_brier/mean_r": 0.7886401615083652, "aux_brier/n_active_tok": 174.625, "aux_brier/n_groups": 9.6875, "aux_brier/n_step_records": 43.65625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5500986842105263, "calib/avg_num_step_conf": 5.48828125, "calib/ece": 0.22090196078431373, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.10588235294117647, "calib/gap": 0.04642434210526314, "calib/mean_conf": 0.582392156862745, "calib/mu_c": 0.5996875, "calib/mu_w": 0.5532631578947369, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.08792156862745101, "calib/std_conf": 0.2567980879556951, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1138.0, "completions/max_terminated_length": 1138.0, "completions/mean_length": 396.4453125, "completions/mean_terminated_length": 398.0000305175781, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.1728, "grad_norm": 0.04163028672337532, "learning_rate": 1.0555555555555557e-06, "loss": -0.0095, "num_tokens": 33203023.0, "reward": 1.2980287075042725, "reward_std": 0.3110371232032776, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.7155523300170898, "rewards/format_reward_step_strict": 0.98828125, "step": 162 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.2115081460956079e-07, "aux_brier/mean_group_std": 0.1383938187367926, "aux_brier/mean_r": 0.8091868236959634, "aux_brier/n_active_tok": 180.125, "aux_brier/n_groups": 11.0625, "aux_brier/n_step_records": 45.03125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6298390604610701, "calib/avg_num_step_conf": 5.8125, "calib/ece": 0.1850787401574803, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.08267716535433071, "calib/gap": 0.11352078543466115, "calib/mean_conf": 0.5679133858267716, "calib/mu_c": 0.6273553719008266, "calib/mu_w": 0.5138345864661654, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13830708661417324, "calib/std_conf": 0.25454911577238815, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2893.0, "completions/max_terminated_length": 2893.0, "completions/mean_length": 445.109375, "completions/mean_terminated_length": 446.85491943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.17386666666666667, "grad_norm": 0.09340494871139526, "learning_rate": 1.0277777777777777e-06, "loss": 0.0476, "num_tokens": 33421803.0, "reward": 1.1488676071166992, "reward_std": 0.3309406638145447, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.728283166885376, "rewards/format_reward_step_strict": 0.98828125, "step": 163 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.2348200429410667e-07, "aux_brier/mean_group_std": 0.11400020591540719, "aux_brier/mean_r": 0.8364965513376516, "aux_brier/n_active_tok": 180.375, "aux_brier/n_groups": 10.09375, "aux_brier/n_step_records": 45.09375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6069170215511679, "calib/avg_num_step_conf": 5.7578125, "calib/ece": 0.1572690763052209, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.08032128514056225, "calib/gap": 0.09653116531165301, "calib/mean_conf": 0.5254618473895583, "calib/mu_c": 0.5743089430894308, "calib/mu_w": 0.4777777777777778, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.09437751004016065, "calib/std_conf": 0.2596659259947126, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3049.0, "completions/max_terminated_length": 3049.0, "completions/mean_length": 470.60546875, "completions/mean_terminated_length": 474.31103515625, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.17493333333333333, "grad_norm": 0.043340470641851425, "learning_rate": 1.0000000000000002e-06, "loss": 0.0193, "num_tokens": 33648414.0, "reward": 1.1420526504516602, "reward_std": 0.32285794615745544, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.7088354825973511, "rewards/format_reward_step_strict": 0.96875, "step": 164 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.4346702933174544e-08, "aux_brier/mean_group_std": 0.12499093570922237, "aux_brier/mean_r": 0.8435749692709568, "aux_brier/n_active_tok": 183.75, "aux_brier/n_groups": 10.53125, "aux_brier/n_step_records": 45.9375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6282643508253264, "calib/avg_num_step_conf": 5.7421875, "calib/ece": 0.15396078431372548, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.08235294117647059, "calib/gap": 0.11300258684405035, "calib/mean_conf": 0.5410980392156863, "calib/mu_c": 0.5995934959349595, "calib/mu_w": 0.48659090909090913, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.10635294117647058, "calib/std_conf": 0.2587321466722786, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2906.0, "completions/max_terminated_length": 2906.0, "completions/mean_length": 445.265625, "completions/mean_terminated_length": 445.265625, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.176, "grad_norm": 0.11459824442863464, "learning_rate": 9.722222222222224e-07, "loss": 0.016, "num_tokens": 33867978.0, "reward": 1.15895414352417, "reward_std": 0.27036812901496887, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.7295668125152588, "rewards/format_reward_step_strict": 0.9921875, "step": 165 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.0974192347912304e-07, "aux_brier/mean_group_std": 0.13252428335265373, "aux_brier/mean_r": 0.8068924695729875, "aux_brier/n_active_tok": 190.5, "aux_brier/n_groups": 11.0, "aux_brier/n_step_records": 47.625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5361037234042553, "calib/avg_num_step_conf": 6.0546875, "calib/ece": 0.2381102362204725, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.10236220472440945, "calib/gap": 0.029906914893617076, "calib/mean_conf": 0.5633070866141732, "calib/mu_c": 0.574375, "calib/mu_w": 0.5444680851063829, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08574803149606303, "calib/std_conf": 0.27159938978660664, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2571.0, "completions/max_terminated_length": 2571.0, "completions/mean_length": 464.1484375, "completions/mean_terminated_length": 465.9686584472656, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.17706666666666668, "grad_norm": 0.06173868477344513, "learning_rate": 9.444444444444445e-07, "loss": 0.0188, "num_tokens": 34092984.0, "reward": 1.2890897989273071, "reward_std": 0.30467140674591064, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.6954214572906494, "rewards/format_reward_step_strict": 0.98046875, "step": 166 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.744194001538027e-08, "aux_brier/mean_group_std": 0.12886058695185243, "aux_brier/mean_r": 0.7738747577349124, "aux_brier/n_active_tok": 199.875, "aux_brier/n_groups": 12.5625, "aux_brier/n_step_records": 49.96875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6333892617449665, "calib/avg_num_step_conf": 6.3359375, "calib/ece": 0.15158102766798423, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.13438735177865613, "calib/gap": 0.12027684563758378, "calib/mean_conf": 0.6145849802371541, "calib/mu_c": 0.6640268456375839, "calib/mu_w": 0.5437500000000001, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08861660079051388, "calib/std_conf": 0.27050882681911687, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2845.0, "completions/max_terminated_length": 2845.0, "completions/mean_length": 455.6796875, "completions/mean_terminated_length": 457.4667053222656, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.17813333333333334, "grad_norm": 0.01448934618383646, "learning_rate": 9.166666666666666e-07, "loss": 0.0383, "num_tokens": 34315246.0, "reward": 1.2537157535552979, "reward_std": 0.3421841859817505, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7336128950119019, "rewards/format_reward_step_strict": 0.9765625, "step": 167 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.117549296996705e-08, "aux_brier/mean_group_std": 0.13716370659561095, "aux_brier/mean_r": 0.8212303595201175, "aux_brier/n_active_tok": 192.875, "aux_brier/n_groups": 11.71875, "aux_brier/n_step_records": 48.21875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.603968253968254, "calib/avg_num_step_conf": 6.23046875, "calib/ece": 0.15282868525896415, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.13147410358565736, "calib/gap": 0.09974031746031747, "calib/mean_conf": 0.542788844621514, "calib/mu_c": 0.5924603174603175, "calib/mu_w": 0.49272, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09681274900398407, "calib/std_conf": 0.2743674202776643, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3032.0, "completions/max_terminated_length": 3032.0, "completions/mean_length": 470.58203125, "completions/mean_terminated_length": 472.427490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.1792, "grad_norm": 0.07744412124156952, "learning_rate": 8.88888888888889e-07, "loss": 0.0483, "num_tokens": 34540387.0, "reward": 1.1635310649871826, "reward_std": 0.3672263026237488, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.7088117003440857, "rewards/format_reward_step_strict": 0.98046875, "step": 168 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -8.701656303844452e-08, "aux_brier/mean_group_std": 0.12960164289584802, "aux_brier/mean_r": 0.8424348734402805, "aux_brier/n_active_tok": 179.125, "aux_brier/n_groups": 9.9375, "aux_brier/n_step_records": 44.78125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6354812701562889, "calib/avg_num_step_conf": 5.62890625, "calib/ece": 0.15974117647058822, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.06666666666666667, "calib/gap": 0.11835313817911192, "calib/mean_conf": 0.49994509803921566, "calib/mu_c": 0.5537841726618705, "calib/mu_w": 0.4354310344827586, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.05729411764705882, "calib/std_conf": 0.2630103049600651, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1052.0, "completions/max_terminated_length": 1052.0, "completions/mean_length": 437.9296875, "completions/mean_terminated_length": 439.6470947265625, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.18026666666666666, "grad_norm": 0.25850746035575867, "learning_rate": 8.611111111111112e-07, "loss": -0.0109, "num_tokens": 34756681.0, "reward": 1.2086318731307983, "reward_std": 0.24466845393180847, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7329649329185486, "rewards/format_reward_step_strict": 0.96484375, "step": 169 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.069467319731899e-09, "aux_brier/mean_group_std": 0.1441439193861494, "aux_brier/mean_r": 0.79168639335128, "aux_brier/n_active_tok": 202.125, "aux_brier/n_groups": 11.65625, "aux_brier/n_step_records": 50.53125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7347021943573668, "calib/avg_num_step_conf": 6.31640625, "calib/ece": 0.12321568627450985, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.17647058823529413, "calib/gap": 0.25299686520376174, "calib/mean_conf": 0.5865882352941176, "calib/mu_c": 0.6957241379310345, "calib/mu_w": 0.44272727272727275, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.07058823529411772, "calib/std_conf": 0.30367250009273494, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2476.0, "completions/max_terminated_length": 2476.0, "completions/mean_length": 458.96484375, "completions/mean_terminated_length": 458.96484375, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 0.18133333333333335, "grad_norm": 0.03180493786931038, "learning_rate": 8.333333333333333e-07, "loss": 0.0207, "num_tokens": 34978328.0, "reward": 1.2545132637023926, "reward_std": 0.26771682500839233, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.7758660316467285, "rewards/format_reward_step_strict": 0.98828125, "step": 170 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.4276413096723246e-08, "aux_brier/mean_group_std": 0.13350658076977476, "aux_brier/mean_r": 0.8447229648423799, "aux_brier/n_active_tok": 167.875, "aux_brier/n_groups": 8.9375, "aux_brier/n_step_records": 41.96875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6285831285831286, "calib/avg_num_step_conf": 5.28515625, "calib/ece": 0.16425196850393703, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.07086614173228346, "calib/gap": 0.10505890505890503, "calib/mean_conf": 0.4828346456692914, "calib/mu_c": 0.541981981981982, "calib/mu_w": 0.43692307692307697, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10503937007874017, "calib/std_conf": 0.26861088455908594, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2423.0, "completions/max_terminated_length": 2423.0, "completions/mean_length": 434.1640625, "completions/mean_terminated_length": 434.1640625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.1824, "grad_norm": 0.056708645075559616, "learning_rate": 8.055555555555557e-07, "loss": 0.0552, "num_tokens": 35196370.0, "reward": 1.1091587543487549, "reward_std": 0.3410470485687256, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.7256976366043091, "rewards/format_reward_step_strict": 0.98828125, "step": 171 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 8.3982179821529e-08, "aux_brier/mean_group_std": 0.12698731212937597, "aux_brier/mean_r": 0.8196424473396564, "aux_brier/n_active_tok": 171.5, "aux_brier/n_groups": 9.59375, "aux_brier/n_step_records": 42.875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5888429752066116, "calib/avg_num_step_conf": 5.38671875, "calib/ece": 0.16628458498023713, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.09486166007905138, "calib/gap": 0.08337662337662338, "calib/mean_conf": 0.526205533596838, "calib/mu_c": 0.5588311688311689, "calib/mu_w": 0.4754545454545455, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.041897233201581036, "calib/std_conf": 0.2673716108888531, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2820.0, "completions/max_terminated_length": 2820.0, "completions/mean_length": 415.96875, "completions/mean_terminated_length": 417.60003662109375, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.18346666666666667, "grad_norm": 0.015460886992514133, "learning_rate": 7.777777777777779e-07, "loss": -0.0134, "num_tokens": 35406210.0, "reward": 1.2659008502960205, "reward_std": 0.2815476655960083, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.7120406627655029, "rewards/format_reward_step_strict": 0.97265625, "step": 172 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.4552024610314795e-07, "aux_brier/mean_group_std": 0.14090398843318633, "aux_brier/mean_r": 0.7598359783819812, "aux_brier/n_active_tok": 206.5, "aux_brier/n_groups": 11.9375, "aux_brier/n_step_records": 51.625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6527679547354208, "calib/avg_num_step_conf": 6.453125, "calib/ece": 0.1622834645669291, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.17716535433070865, "calib/gap": 0.16263357551597746, "calib/mean_conf": 0.6117322834645669, "calib/mu_c": 0.6776821192052979, "calib/mu_w": 0.5150485436893204, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08976377952755904, "calib/std_conf": 0.2863621804753498, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1314.0, "completions/max_terminated_length": 1314.0, "completions/mean_length": 452.39453125, "completions/mean_terminated_length": 452.39453125, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.18453333333333333, "grad_norm": 0.1217033714056015, "learning_rate": 7.5e-07, "loss": 0.0204, "num_tokens": 35625183.0, "reward": 1.2712502479553223, "reward_std": 0.30118289589881897, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.7490636110305786, "rewards/format_reward_step_strict": 0.98828125, "step": 173 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.0695314348052598e-07, "aux_brier/mean_group_std": 0.11057211656513695, "aux_brier/mean_r": 0.8604450335270105, "aux_brier/n_active_tok": 180.25, "aux_brier/n_groups": 10.84375, "aux_brier/n_step_records": 45.0625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6464091884387226, "calib/avg_num_step_conf": 5.6328125, "calib/ece": 0.15189723320158108, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.05928853754940711, "calib/gap": 0.1402713618578822, "calib/mean_conf": 0.5064426877470357, "calib/mu_c": 0.5835087719298246, "calib/mu_w": 0.44323741007194245, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10387351778656129, "calib/std_conf": 0.25747940873940156, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2474.0, "completions/max_terminated_length": 2474.0, "completions/mean_length": 474.03125, "completions/mean_terminated_length": 475.8902282714844, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.1856, "grad_norm": 0.15364979207515717, "learning_rate": 7.222222222222222e-07, "loss": -0.0155, "num_tokens": 35850767.0, "reward": 1.123063087463379, "reward_std": 0.37715062499046326, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.742252767086029, "rewards/format_reward_step_strict": 0.984375, "step": 174 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 9.6226778406594e-08, "aux_brier/mean_group_std": 0.1201758730907, "aux_brier/mean_r": 0.8374527627666861, "aux_brier/n_active_tok": 188.25, "aux_brier/n_groups": 11.375, "aux_brier/n_step_records": 47.0625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5755693581780538, "calib/avg_num_step_conf": 5.921875, "calib/ece": 0.2318110236220472, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.08267716535433071, "calib/gap": 0.08182728912041665, "calib/mean_conf": 0.46716535433070866, "calib/mu_c": 0.5190322580645161, "calib/mu_w": 0.4372049689440995, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16641732283464564, "calib/std_conf": 0.29166612785556756, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2135.0, "completions/max_terminated_length": 2135.0, "completions/mean_length": 467.30078125, "completions/mean_terminated_length": 467.30078125, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.18666666666666668, "grad_norm": 0.02424977533519268, "learning_rate": 6.944444444444446e-07, "loss": 0.0268, "num_tokens": 36076220.0, "reward": 1.0250205993652344, "reward_std": 0.298775851726532, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.7016448974609375, "rewards/format_reward_step_strict": 0.97265625, "step": 175 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 9.710879667057526e-08, "aux_brier/mean_group_std": 0.13257063657494714, "aux_brier/mean_r": 0.8193998864175998, "aux_brier/n_active_tok": 204.5, "aux_brier/n_groups": 13.0, "aux_brier/n_step_records": 51.125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6728689567430025, "calib/avg_num_step_conf": 6.390625, "calib/ece": 0.13131474103585664, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.11155378486055777, "calib/gap": 0.17249809160305346, "calib/mean_conf": 0.5297211155378485, "calib/mu_c": 0.61975, "calib/mu_w": 0.44725190839694656, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.09147410358565741, "calib/std_conf": 0.27883565879186795, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3041.0, "completions/max_terminated_length": 3041.0, "completions/mean_length": 479.3671875, "completions/mean_terminated_length": 479.3671875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.18773333333333334, "grad_norm": 0.07288090884685516, "learning_rate": 6.666666666666667e-07, "loss": 0.0608, "num_tokens": 36303002.0, "reward": 1.1420562267303467, "reward_std": 0.26816827058792114, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.7401000261306763, "rewards/format_reward_step_strict": 0.9765625, "step": 176 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.3118862211311004e-07, "aux_brier/mean_group_std": 0.12202760582518828, "aux_brier/mean_r": 0.8576158931783181, "aux_brier/n_active_tok": 192.375, "aux_brier/n_groups": 10.34375, "aux_brier/n_step_records": 48.09375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5516837301097817, "calib/avg_num_step_conf": 6.12890625, "calib/ece": 0.22098039215686271, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.054901960784313725, "calib/gap": 0.05024608363142952, "calib/mean_conf": 0.49690196078431365, "calib/mu_c": 0.5233057851239669, "calib/mu_w": 0.47305970149253734, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12168627450980393, "calib/std_conf": 0.2691267836586129, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1452.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 444.20703125, "completions/mean_terminated_length": 445.94903564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.1888, "grad_norm": 0.08046219497919083, "learning_rate": 6.388888888888889e-07, "loss": 0.0143, "num_tokens": 36520551.0, "reward": 1.145711064338684, "reward_std": 0.2732439637184143, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.7000316381454468, "rewards/format_reward_step_strict": 0.99609375, "step": 177 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 7.932638351748622e-09, "aux_brier/mean_group_std": 0.12115816613451483, "aux_brier/mean_r": 0.8684018938381981, "aux_brier/n_active_tok": 180.25, "aux_brier/n_groups": 10.75, "aux_brier/n_step_records": 45.0625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7313086999505685, "calib/avg_num_step_conf": 5.703125, "calib/ece": 0.10501960784313727, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0392156862745098, "calib/gap": 0.19745798319327718, "calib/mean_conf": 0.4632941176470588, "calib/mu_c": 0.5554411764705882, "calib/mu_w": 0.35798319327731104, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.017490196078431403, "calib/std_conf": 0.2523492206865006, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2440.0, "completions/max_terminated_length": 2440.0, "completions/mean_length": 446.4609375, "completions/mean_terminated_length": 446.4609375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.18986666666666666, "grad_norm": 0.07407964766025543, "learning_rate": 6.111111111111112e-07, "loss": 0.0421, "num_tokens": 36740917.0, "reward": 1.2188652753829956, "reward_std": 0.2631143033504486, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.7738984823226929, "rewards/format_reward_step_strict": 0.98828125, "step": 178 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.7091194815254127e-08, "aux_brier/mean_group_std": 0.13085583064637563, "aux_brier/mean_r": 0.8254122367004574, "aux_brier/n_active_tok": 190.875, "aux_brier/n_groups": 11.65625, "aux_brier/n_step_records": 47.71875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5959916939340549, "calib/avg_num_step_conf": 6.00390625, "calib/ece": 0.1741501976284585, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.1067193675889328, "calib/gap": 0.08586521520261775, "calib/mean_conf": 0.5053754940711463, "calib/mu_c": 0.5447445255474452, "calib/mu_w": 0.4588793103448275, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.06901185770750987, "calib/std_conf": 0.2716039994587508, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2760.0, "completions/max_terminated_length": 2760.0, "completions/mean_length": 471.39453125, "completions/mean_terminated_length": 471.39453125, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 0.19093333333333334, "grad_norm": 0.08145998418331146, "learning_rate": 5.833333333333334e-07, "loss": 0.0602, "num_tokens": 36967858.0, "reward": 1.1990129947662354, "reward_std": 0.3276754319667816, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.710114061832428, "rewards/format_reward_step_strict": 0.97265625, "step": 179 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 7.524062489716599e-08, "aux_brier/mean_group_std": 0.13352768024902548, "aux_brier/mean_r": 0.8255162120203959, "aux_brier/n_active_tok": 205.125, "aux_brier/n_groups": 11.28125, "aux_brier/n_step_records": 51.28125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6434640522875816, "calib/avg_num_step_conf": 6.4296875, "calib/ece": 0.11480158730158735, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.08333333333333333, "calib/gap": 0.12870980392156856, "calib/mean_conf": 0.565436507936508, "calib/mu_c": 0.6175333333333333, "calib/mu_w": 0.4888235294117647, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.042500000000000024, "calib/std_conf": 0.25846964133640027, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2741.0, "completions/max_terminated_length": 2741.0, "completions/mean_length": 512.1640625, "completions/mean_terminated_length": 512.1640625, "completions/min_length": 191.0, "completions/min_terminated_length": 191.0, "epoch": 0.192, "grad_norm": 0.019185282289981842, "learning_rate": 5.555555555555555e-07, "loss": 0.0737, "num_tokens": 37202828.0, "reward": 1.263530969619751, "reward_std": 0.252987265586853, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.7416238188743591, "rewards/format_reward_step_strict": 0.984375, "step": 180 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.2639901064991363e-10, "aux_brier/mean_group_std": 0.13063977231238041, "aux_brier/mean_r": 0.8416393269425503, "aux_brier/n_active_tok": 184.75, "aux_brier/n_groups": 11.40625, "aux_brier/n_step_records": 46.1875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6182467532467533, "calib/avg_num_step_conf": 5.7734375, "calib/ece": 0.18424000000000004, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.084, "calib/gap": 0.10808441558441567, "calib/mean_conf": 0.47120000000000006, "calib/mu_c": 0.5317272727272728, "calib/mu_w": 0.42364285714285715, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.10772000000000002, "calib/std_conf": 0.27343035676383853, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2823.0, "completions/max_terminated_length": 2823.0, "completions/mean_length": 455.5, "completions/mean_terminated_length": 457.28631591796875, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.19306666666666666, "grad_norm": 0.018800193443894386, "learning_rate": 5.277777777777779e-07, "loss": 0.0728, "num_tokens": 37425700.0, "reward": 1.091705560684204, "reward_std": 0.3459656238555908, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.7105726599693298, "rewards/format_reward_step_strict": 0.96875, "step": 181 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -9.713614287920613e-09, "aux_brier/mean_group_std": 0.14413104201360125, "aux_brier/mean_r": 0.8129653799484661, "aux_brier/n_active_tok": 199.125, "aux_brier/n_groups": 11.34375, "aux_brier/n_step_records": 49.78125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6696347328740414, "calib/avg_num_step_conf": 6.22265625, "calib/ece": 0.12356862745098036, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.11764705882352941, "calib/gap": 0.1591940725334719, "calib/mean_conf": 0.5606666666666666, "calib/mu_c": 0.6218471337579617, "calib/mu_w": 0.46265306122448985, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03427450980392154, "calib/std_conf": 0.2644801535729797, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1275.0, "completions/max_terminated_length": 1275.0, "completions/mean_length": 447.87109375, "completions/mean_terminated_length": 449.6274719238281, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.19413333333333332, "grad_norm": 0.0645512193441391, "learning_rate": 5.000000000000001e-07, "loss": -0.0039, "num_tokens": 37646515.0, "reward": 1.2991081476211548, "reward_std": 0.2522338628768921, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.7589328289031982, "rewards/format_reward_step_strict": 0.9921875, "step": 182 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.7656256224316564e-08, "aux_brier/mean_group_std": 0.12085042887777117, "aux_brier/mean_r": 0.8576194004594221, "aux_brier/n_active_tok": 180.25, "aux_brier/n_groups": 11.53125, "aux_brier/n_step_records": 45.0625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5519468245967742, "calib/avg_num_step_conf": 5.6328125, "calib/ece": 0.20027777777777775, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.07142857142857142, "calib/gap": 0.05053175403225807, "calib/mean_conf": 0.49384920634920637, "calib/mu_c": 0.5195161290322581, "calib/mu_w": 0.468984375, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.10103174603174603, "calib/std_conf": 0.2710148935010217, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2652.0, "completions/max_terminated_length": 2652.0, "completions/mean_length": 487.875, "completions/mean_terminated_length": 487.875, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.1952, "grad_norm": 0.13578976690769196, "learning_rate": 4.7222222222222226e-07, "loss": 0.035, "num_tokens": 37878091.0, "reward": 1.149288296699524, "reward_std": 0.33050286769866943, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.6909034848213196, "rewards/format_reward_step_strict": 0.984375, "step": 183 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.248651426508321e-08, "aux_brier/mean_group_std": 0.13784666836150583, "aux_brier/mean_r": 0.8222724938785408, "aux_brier/n_active_tok": 195.125, "aux_brier/n_groups": 12.34375, "aux_brier/n_step_records": 48.78125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6505063115550008, "calib/avg_num_step_conf": 6.17578125, "calib/ece": 0.19458167330677292, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.11155378486055777, "calib/gap": 0.14484949368844502, "calib/mean_conf": 0.532589641434263, "calib/mu_c": 0.5839506172839506, "calib/mu_w": 0.43910112359550557, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04087649402390439, "calib/std_conf": 0.27776725569965094, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2872.0, "completions/max_terminated_length": 2872.0, "completions/mean_length": 458.30078125, "completions/mean_terminated_length": 463.7351989746094, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.19626666666666667, "grad_norm": 0.07469954341650009, "learning_rate": 4.444444444444445e-07, "loss": 0.0107, "num_tokens": 38100696.0, "reward": 1.3042383193969727, "reward_std": 0.29574257135391235, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.7325785160064697, "rewards/format_reward_step_strict": 0.9765625, "step": 184 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 7.120681821959352e-11, "aux_brier/mean_group_std": 0.13905444224610794, "aux_brier/mean_r": 0.8207141758269085, "aux_brier/n_active_tok": 224.125, "aux_brier/n_groups": 16.3125, "aux_brier/n_step_records": 56.03125, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.6688735177865613, "calib/avg_num_step_conf": 7.2578125, "calib/ece": 0.15439516129032257, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.11290322580645161, "calib/gap": 0.17096837944664034, "calib/mean_conf": 0.5528629032258066, "calib/mu_c": 0.628695652173913, "calib/mu_w": 0.45772727272727265, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.07540322580645162, "calib/std_conf": 0.2864019517717504, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2759.0, "completions/max_terminated_length": 2759.0, "completions/mean_length": 497.87890625, "completions/mean_terminated_length": 501.7992248535156, "completions/min_length": 0.0, "completions/min_terminated_length": 191.0, "epoch": 0.19733333333333333, "grad_norm": 0.06929118931293488, "learning_rate": 4.1666666666666667e-07, "loss": 0.0718, "num_tokens": 38335073.0, "reward": 1.198161244392395, "reward_std": 0.33943843841552734, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.7301449775695801, "rewards/format_reward_step_strict": 0.953125, "step": 185 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.6373698278933126e-08, "aux_brier/mean_group_std": 0.13836463521961348, "aux_brier/mean_r": 0.818759891310837, "aux_brier/n_active_tok": 198.625, "aux_brier/n_groups": 10.25, "aux_brier/n_step_records": 49.65625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6347631645250693, "calib/avg_num_step_conf": 6.29296875, "calib/ece": 0.16996078431372552, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.11764705882352941, "calib/gap": 0.13632653061224498, "calib/mean_conf": 0.5452549019607844, "calib/mu_c": 0.6029931972789117, "calib/mu_w": 0.4666666666666667, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06937254901960788, "calib/std_conf": 0.2880683363327865, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1518.0, "completions/max_terminated_length": 1518.0, "completions/mean_length": 460.73828125, "completions/mean_terminated_length": 462.5451354980469, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.1984, "grad_norm": 0.035251058638095856, "learning_rate": 3.8888888888888895e-07, "loss": -0.0145, "num_tokens": 38558062.0, "reward": 1.2561593055725098, "reward_std": 0.25657111406326294, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.7355742454528809, "rewards/format_reward_step_strict": 0.99609375, "step": 186 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.1440203027719456e-08, "aux_brier/mean_group_std": 0.15637971338084253, "aux_brier/mean_r": 0.8131682977508826, "aux_brier/n_active_tok": 241.75, "aux_brier/n_groups": 20.59375, "aux_brier/n_step_records": 60.4375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5420480000000001, "calib/avg_num_step_conf": 7.62109375, "calib/ece": 0.21943999999999997, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.144, "calib/gap": 0.04031999999999991, "calib/mean_conf": 0.5599200000000001, "calib/mu_c": 0.58008, "calib/mu_w": 0.5397600000000001, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.13968000000000003, "calib/std_conf": 0.2832948880583623, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3057.0, "completions/max_terminated_length": 3057.0, "completions/mean_length": 520.73828125, "completions/mean_terminated_length": 522.7803955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.19946666666666665, "grad_norm": 0.0872163325548172, "learning_rate": 3.611111111111111e-07, "loss": 0.0568, "num_tokens": 38792915.0, "reward": 1.1417391300201416, "reward_std": 0.37993553280830383, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.668519139289856, "rewards/format_reward_step_strict": 0.97265625, "step": 187 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.4688578898212086e-08, "aux_brier/mean_group_std": 0.15248406147803498, "aux_brier/mean_r": 0.7917357955861555, "aux_brier/n_active_tok": 209.125, "aux_brier/n_groups": 12.53125, "aux_brier/n_step_records": 52.28125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5558268664704308, "calib/avg_num_step_conf": 6.921875, "calib/ece": 0.1791164658634538, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.13253012048192772, "calib/gap": 0.05036794219962537, "calib/mean_conf": 0.5883534136546185, "calib/mu_c": 0.6087837837837837, "calib/mu_w": 0.5584158415841584, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.08654618473895584, "calib/std_conf": 0.25686732546190727, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2373.0, "completions/max_terminated_length": 2373.0, "completions/mean_length": 477.734375, "completions/mean_terminated_length": 483.39923095703125, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.20053333333333334, "grad_norm": 0.017694227397441864, "learning_rate": 3.3333333333333335e-07, "loss": 0.0407, "num_tokens": 39019287.0, "reward": 1.2328319549560547, "reward_std": 0.30562183260917664, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.6969531774520874, "rewards/format_reward_step_strict": 0.9609375, "step": 188 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.837181542289738e-08, "aux_brier/mean_group_std": 0.15039783826741232, "aux_brier/mean_r": 0.8286980934908081, "aux_brier/n_active_tok": 177.5, "aux_brier/n_groups": 9.375, "aux_brier/n_step_records": 44.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.686502450672364, "calib/avg_num_step_conf": 5.578125, "calib/ece": 0.14486274509803923, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.07450980392156863, "calib/gap": 0.18592748523312808, "calib/mean_conf": 0.5133333333333333, "calib/mu_c": 0.5928082191780821, "calib/mu_w": 0.40688073394495405, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04282352941176474, "calib/std_conf": 0.27847812366409885, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1232.0, "completions/max_terminated_length": 1232.0, "completions/mean_length": 410.68359375, "completions/mean_terminated_length": 412.29412841796875, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.2016, "grad_norm": 0.08387917280197144, "learning_rate": 3.055555555555556e-07, "loss": 0.0142, "num_tokens": 39232190.0, "reward": 1.2589154243469238, "reward_std": 0.25592151284217834, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.7622241973876953, "rewards/format_reward_step_strict": 0.99609375, "step": 189 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.975207943900493e-08, "aux_brier/mean_group_std": 0.13552512342297607, "aux_brier/mean_r": 0.8317818418564614, "aux_brier/n_active_tok": 210.0, "aux_brier/n_groups": 13.34375, "aux_brier/n_step_records": 52.5, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.6978187042842215, "calib/avg_num_step_conf": 6.5625, "calib/ece": 0.11016129032258075, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.13709677419354838, "calib/gap": 0.19750261233019856, "calib/mean_conf": 0.5082258064516129, "calib/mu_c": 0.6006060606060606, "calib/mu_w": 0.40310344827586203, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04306451612903234, "calib/std_conf": 0.2801599439119317, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2950.0, "completions/max_terminated_length": 2950.0, "completions/mean_length": 524.015625, "completions/mean_terminated_length": 528.1417236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.20266666666666666, "grad_norm": 0.059735845774412155, "learning_rate": 2.7777777777777776e-07, "loss": 0.0617, "num_tokens": 39471946.0, "reward": 1.1839261054992676, "reward_std": 0.32794398069381714, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.7435168027877808, "rewards/format_reward_step_strict": 0.96484375, "step": 190 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 6.356232853943489e-08, "aux_brier/mean_group_std": 0.1400925439453117, "aux_brier/mean_r": 0.8067248661498898, "aux_brier/n_active_tok": 201.375, "aux_brier/n_groups": 10.8125, "aux_brier/n_step_records": 50.34375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5943521805273833, "calib/avg_num_step_conf": 6.38671875, "calib/ece": 0.22932539682539682, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.15079365079365079, "calib/gap": 0.09945233265720066, "calib/mean_conf": 0.5721031746031746, "calib/mu_c": 0.6257758620689654, "calib/mu_w": 0.5263235294117647, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.17055555555555557, "calib/std_conf": 0.2983844417346303, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1674.0, "completions/max_terminated_length": 1674.0, "completions/mean_length": 441.0390625, "completions/mean_terminated_length": 446.268798828125, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.20373333333333332, "grad_norm": 0.02101707272231579, "learning_rate": 2.5000000000000004e-07, "loss": -0.0177, "num_tokens": 39689020.0, "reward": 1.1145371198654175, "reward_std": 0.2624858617782593, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6847109198570251, "rewards/format_reward_step_strict": 0.98046875, "step": 191 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 6.586907241867834e-09, "aux_brier/mean_group_std": 0.13470038081018282, "aux_brier/mean_r": 0.8196568557086462, "aux_brier/n_active_tok": 199.5, "aux_brier/n_groups": 14.375, "aux_brier/n_step_records": 49.875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6477460317460317, "calib/avg_num_step_conf": 6.40234375, "calib/ece": 0.1749402390438246, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.09561752988047809, "calib/gap": 0.14562031746031734, "calib/mean_conf": 0.49494023904382467, "calib/mu_c": 0.5674603174603173, "calib/mu_w": 0.42184, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0839442231075697, "calib/std_conf": 0.28622194953699065, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2280.0, "completions/max_terminated_length": 2280.0, "completions/mean_length": 516.08203125, "completions/mean_terminated_length": 520.1456909179688, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.2048, "grad_norm": 0.06061521917581558, "learning_rate": 2.2222222222222224e-07, "loss": 0.0706, "num_tokens": 39926113.0, "reward": 1.148072600364685, "reward_std": 0.32368919253349304, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.7172901630401611, "rewards/format_reward_step_strict": 0.953125, "step": 192 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 8.200539272640306e-08, "aux_brier/mean_group_std": 0.1353109952018515, "aux_brier/mean_r": 0.8160838623355592, "aux_brier/n_active_tok": 180.5, "aux_brier/n_groups": 10.0625, "aux_brier/n_step_records": 45.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5372860255213197, "calib/avg_num_step_conf": 5.640625, "calib/ece": 0.21177165354330713, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.11023622047244094, "calib/gap": 0.03582508558979136, "calib/mean_conf": 0.5511417322834645, "calib/mu_c": 0.5679259259259258, "calib/mu_w": 0.5321008403361345, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.11570866141732286, "calib/std_conf": 0.2668964889568082, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1614.0, "completions/max_terminated_length": 1614.0, "completions/mean_length": 441.7109375, "completions/mean_terminated_length": 445.18896484375, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.20586666666666667, "grad_norm": 0.03532715514302254, "learning_rate": 1.9444444444444447e-07, "loss": -0.0115, "num_tokens": 40144903.0, "reward": 1.1935105323791504, "reward_std": 0.3720396161079407, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.6881046295166016, "rewards/format_reward_step_strict": 0.98828125, "step": 193 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -7.981806809248537e-08, "aux_brier/mean_group_std": 0.12829936317939683, "aux_brier/mean_r": 0.8432791857705928, "aux_brier/n_active_tok": 179.375, "aux_brier/n_groups": 10.15625, "aux_brier/n_step_records": 44.84375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7160797677647356, "calib/avg_num_step_conf": 5.6953125, "calib/ece": 0.09490118577075095, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.07905138339920949, "calib/gap": 0.20161996718414749, "calib/mean_conf": 0.5029644268774703, "calib/mu_c": 0.5938129496402879, "calib/mu_w": 0.39219298245614037, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.02422924901185769, "calib/std_conf": 0.2695221703600293, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2452.0, "completions/max_terminated_length": 2452.0, "completions/mean_length": 414.88671875, "completions/mean_terminated_length": 416.5137634277344, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.20693333333333333, "grad_norm": 0.07502194494009018, "learning_rate": 1.6666666666666668e-07, "loss": -0.0167, "num_tokens": 40357058.0, "reward": 1.219529151916504, "reward_std": 0.25393715500831604, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7609288692474365, "rewards/format_reward_step_strict": 0.97265625, "step": 194 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -9.058440089693143e-08, "aux_brier/mean_group_std": 0.1539960502725514, "aux_brier/mean_r": 0.8259485195647986, "aux_brier/n_active_tok": 195.875, "aux_brier/n_groups": 12.1875, "aux_brier/n_step_records": 48.96875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5588293001962066, "calib/avg_num_step_conf": 6.16796875, "calib/ece": 0.20899598393574292, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.08433734939759036, "calib/gap": 0.05838848920863321, "calib/mean_conf": 0.5085943775100401, "calib/mu_c": 0.5343884892086331, "calib/mu_w": 0.4759999999999999, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.07967871485943773, "calib/std_conf": 0.2882073664443802, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2635.0, "completions/max_terminated_length": 2635.0, "completions/mean_length": 458.7421875, "completions/mean_terminated_length": 464.18182373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.208, "grad_norm": 0.01408204436302185, "learning_rate": 1.3888888888888888e-07, "loss": 0.0481, "num_tokens": 40580480.0, "reward": 1.1898844242095947, "reward_std": 0.330066442489624, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.6736003756523132, "rewards/format_reward_step_strict": 0.95703125, "step": 195 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.894387193281723e-08, "aux_brier/mean_group_std": 0.13635845266434798, "aux_brier/mean_r": 0.8143593968249906, "aux_brier/n_active_tok": 170.75, "aux_brier/n_groups": 8.9375, "aux_brier/n_step_records": 42.6875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5740601503759399, "calib/avg_num_step_conf": 5.46875, "calib/ece": 0.19381889763779522, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.1141732283464567, "calib/gap": 0.06238345864661643, "calib/mean_conf": 0.5788582677165355, "calib/mu_c": 0.6068571428571428, "calib/mu_w": 0.5444736842105263, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11074803149606295, "calib/std_conf": 0.2641772914716226, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1830.0, "completions/max_terminated_length": 1830.0, "completions/mean_length": 392.921875, "completions/mean_terminated_length": 396.0157470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.20906666666666668, "grad_norm": 0.029272418469190598, "learning_rate": 1.1111111111111112e-07, "loss": 0.0052, "num_tokens": 40783612.0, "reward": 1.217855453491211, "reward_std": 0.21489381790161133, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.7073589563369751, "rewards/format_reward_step_strict": 0.98828125, "step": 196 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.242070534594578e-09, "aux_brier/mean_group_std": 0.13752268409516394, "aux_brier/mean_r": 0.8122654082982423, "aux_brier/n_active_tok": 195.875, "aux_brier/n_groups": 12.25, "aux_brier/n_step_records": 48.96875, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6483347039473685, "calib/avg_num_step_conf": 6.1953125, "calib/ece": 0.18508064516129033, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.07661290322580645, "calib/gap": 0.13848135964912278, "calib/mean_conf": 0.5431451612903225, "calib/mu_c": 0.6280208333333334, "calib/mu_w": 0.4895394736842106, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.17056451612903228, "calib/std_conf": 0.2660474337269877, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2940.0, "completions/max_terminated_length": 2940.0, "completions/mean_length": 491.66796875, "completions/mean_terminated_length": 497.498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.21013333333333334, "grad_norm": 0.04839777201414108, "learning_rate": 8.333333333333334e-08, "loss": 0.0557, "num_tokens": 41014535.0, "reward": 1.031530499458313, "reward_std": 0.35210874676704407, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.7042469382286072, "rewards/format_reward_step_strict": 0.9609375, "step": 197 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.1480705691679916e-08, "aux_brier/mean_group_std": 0.134179582540571, "aux_brier/mean_r": 0.821017030260576, "aux_brier/n_active_tok": 185.0, "aux_brier/n_groups": 10.03125, "aux_brier/n_step_records": 46.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5749704840613932, "calib/avg_num_step_conf": 6.02734375, "calib/ece": 0.21648221343873514, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.11067193675889328, "calib/gap": 0.07393217893217896, "calib/mean_conf": 0.5276284584980238, "calib/mu_c": 0.5565584415584416, "calib/mu_w": 0.4826262626262626, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06770750988142296, "calib/std_conf": 0.27381059403089686, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2729.0, "completions/max_terminated_length": 2729.0, "completions/mean_length": 419.56640625, "completions/mean_terminated_length": 421.2117919921875, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.2112, "grad_norm": 0.050839271396398544, "learning_rate": 5.555555555555556e-08, "loss": 0.0199, "num_tokens": 41227328.0, "reward": 1.2744324207305908, "reward_std": 0.28782498836517334, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.7071050405502319, "rewards/format_reward_step_strict": 0.984375, "step": 198 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.76849689509573e-08, "aux_brier/mean_group_std": 0.1566791763056614, "aux_brier/mean_r": 0.8025530939803857, "aux_brier/n_active_tok": 193.875, "aux_brier/n_groups": 10.59375, "aux_brier/n_step_records": 48.46875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6311512561512561, "calib/avg_num_step_conf": 6.1640625, "calib/ece": 0.15795180722891572, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.11646586345381527, "calib/gap": 0.12969891219891216, "calib/mean_conf": 0.5814056224899599, "calib/mu_c": 0.6423484848484848, "calib/mu_w": 0.5126495726495727, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10461847389558235, "calib/std_conf": 0.27786050089992187, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2573.0, "completions/max_terminated_length": 2573.0, "completions/mean_length": 497.98046875, "completions/mean_terminated_length": 503.8854064941406, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.21226666666666666, "grad_norm": 0.0713457241654396, "learning_rate": 2.777777777777778e-08, "loss": 0.0314, "num_tokens": 41459011.0, "reward": 1.1808452606201172, "reward_std": 0.4092795252799988, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.7155683040618896, "rewards/format_reward_step_strict": 0.97265625, "step": 199 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.708029370861013e-08, "aux_brier/mean_group_std": 0.134669445541197, "aux_brier/mean_r": 0.8316067381304841, "aux_brier/n_active_tok": 191.75, "aux_brier/n_groups": 10.875, "aux_brier/n_step_records": 47.9375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.787124183006536, "calib/avg_num_step_conf": 6.1015625, "calib/ece": 0.09166007905138342, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.1067193675889328, "calib/gap": 0.26843137254901955, "calib/mean_conf": 0.5423320158102767, "calib/mu_c": 0.6484313725490196, "calib/mu_w": 0.38000000000000006, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.014624505928853771, "calib/std_conf": 0.26725159579916696, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2950.0, "completions/max_terminated_length": 2950.0, "completions/mean_length": 470.2109375, "completions/mean_terminated_length": 472.054931640625, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.21333333333333335, "grad_norm": 0.03595075011253357, "learning_rate": 0.0, "loss": 0.0202, "num_tokens": 41687433.0, "reward": 1.292906641960144, "reward_std": 0.2238655984401703, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.8044394254684448, "rewards/format_reward_step_strict": 0.98828125, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.0346094365126919, "train_runtime": 13274.6065, "train_samples_per_second": 3.857, "train_steps_per_second": 0.015 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 41687433, "num_train_epochs": 1, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }