Files
PureRL-1.5B-v6f-analysis-20…/trainer_state.json

10240 lines
396 KiB
JSON
Raw Normal View History

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.21333333333333335,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"aux_distill/lambda": 0.10000000000000002,
"aux_distill/loss": 1.2309403103940628,
"aux_distill/mean_u": 0.40830236726303903,
"aux_distill/n_active_tok": 26.11764705882353,
"calib/answer_extract_rate": 0.140625,
"calib/auroc": 0.2954545454545454,
"calib/avg_num_step_conf": 0.43359375,
"calib/ece": 0.5410526315789472,
"calib/final_conf_rate": 0.07421875,
"calib/format_rate": 0.06640625,
"calib/frac_conf_gt_0.9": 0.7368421052631579,
"calib/gap": -0.04511363636363652,
"calib/mean_conf": 0.9073684210526317,
"calib/mu_c": 0.88125,
"calib/mu_w": 0.9263636363636365,
"calib/nonempty_final_conf_rate": 0.07421875,
"calib/nonempty_reasoning_rate": 0.1484375,
"calib/nonempty_step_conf_rate": 0.08984375,
"calib/pce": 0.5136842105263156,
"calib/std_conf": 0.140069234957993,
"calib/step_conf_rate": 0.08984375,
"calib/step_q_c": 0.855,
"calib/step_q_c_n": 30.0,
"calib/step_q_gap": 0.07586419753086415,
"calib/step_q_w": 0.7791358024691358,
"calib/step_q_w_n": 81.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.10546875,
"completions/max_length": 3023.0,
"completions/max_terminated_length": 3023.0,
"completions/mean_length": 639.21875,
"completions/mean_terminated_length": 714.5851440429688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0010666666666666667,
"grad_norm": 0.013296756893396378,
"learning_rate": 2.5000000000000004e-07,
"loss": 0.0803,
"num_tokens": 295032.0,
"reward": 0.06443203240633011,
"reward_std": 0.14603616297245026,
"rewards/accuracy_reward_step": 0.03125,
"rewards/final_brier_reward_step": 0.03120781108736992,
"rewards/format_reward_step": 0.06640625,
"step": 1
},
{
"aux_distill/lambda": 0.10000000000000003,
"aux_distill/loss": 1.2242749134699504,
"aux_distill/mean_u": 0.22649021059963792,
"aux_distill/n_active_tok": 21.77777777777778,
"calib/answer_extract_rate": 0.1171875,
"calib/auroc": 0.45535714285714285,
"calib/avg_num_step_conf": 0.3828125,
"calib/ece": 0.7616666666666666,
"calib/final_conf_rate": 0.0703125,
"calib/format_rate": 0.0546875,
"calib/frac_conf_gt_0.9": 0.7777777777777778,
"calib/gap": -0.12250000000000016,
"calib/mean_conf": 0.9227777777777777,
"calib/mu_c": 0.8275,
"calib/mu_w": 0.9500000000000002,
"calib/nonempty_final_conf_rate": 0.0703125,
"calib/nonempty_reasoning_rate": 0.1328125,
"calib/nonempty_step_conf_rate": 0.08984375,
"calib/pce": 0.731111111111111,
"calib/std_conf": 0.11882786595910515,
"calib/step_conf_rate": 0.08984375,
"calib/step_q_c": 0.6353333333333334,
"calib/step_q_c_n": 15.0,
"calib/step_q_gap": -0.11840160642570263,
"calib/step_q_w": 0.753734939759036,
"calib/step_q_w_n": 83.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.10546875,
"completions/max_length": 3040.0,
"completions/max_terminated_length": 3040.0,
"completions/mean_length": 671.4140625,
"completions/mean_terminated_length": 750.576416015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0021333333333333334,
"grad_norm": 0.017615454271435738,
"learning_rate": 5.000000000000001e-07,
"loss": 0.1193,
"num_tokens": 594010.0,
"reward": 0.04443163797259331,
"reward_std": 0.08920705318450928,
"rewards/accuracy_reward_step": 0.01953125,
"rewards/final_brier_reward_step": 0.014644531533122063,
"rewards/format_reward_step": 0.0546875,
"step": 2
},
{
"aux_distill/lambda": 0.09999999999999999,
"aux_distill/loss": 1.3238900985036577,
"aux_distill/mean_u": 0.29028764932618667,
"aux_distill/n_active_tok": 18.857142857142858,
"calib/answer_extract_rate": 0.0625,
"calib/auroc": 0.6666666666666667,
"calib/avg_num_step_conf": 0.12890625,
"calib/ece": 0.7514285714285714,
"calib/final_conf_rate": 0.02734375,
"calib/format_rate": 0.0234375,
"calib/frac_conf_gt_0.9": 0.7142857142857143,
"calib/gap": 0.08833333333333326,
"calib/mean_conf": 0.8942857142857144,
"calib/mu_c": 0.97,
"calib/mu_w": 0.8816666666666667,
"calib/nonempty_final_conf_rate": 0.02734375,
"calib/nonempty_reasoning_rate": 0.06640625,
"calib/nonempty_step_conf_rate": 0.02734375,
"calib/pce": 0.7514285714285714,
"calib/std_conf": 0.16378183597676013,
"calib/step_conf_rate": 0.02734375,
"calib/step_q_c": 0.87875,
"calib/step_q_c_n": 8.0,
"calib/step_q_gap": 0.03194999999999992,
"calib/step_q_w": 0.8468000000000001,
"calib/step_q_w_n": 25.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.125,
"completions/max_length": 2943.0,
"completions/max_terminated_length": 2943.0,
"completions/mean_length": 702.1953125,
"completions/mean_terminated_length": 802.5089721679688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.0032,
"grad_norm": 0.008602521382272243,
"learning_rate": 7.5e-07,
"loss": 0.0409,
"num_tokens": 902836.0,
"reward": 0.017878906801342964,
"reward_std": 0.04549115151166916,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/final_brier_reward_step": 0.008414062671363354,
"rewards/format_reward_step": 0.0234375,
"step": 3
},
{
"aux_distill/lambda": 0.09999999999999999,
"aux_distill/loss": 1.3307591259479523,
"aux_distill/mean_u": 0.2824160253664306,
"aux_distill/n_active_tok": 20.4,
"calib/answer_extract_rate": 0.08203125,
"calib/auroc": 0.09090909090909094,
"calib/avg_num_step_conf": 0.19921875,
"calib/ece": 0.8541666666666666,
"calib/final_conf_rate": 0.046875,
"calib/format_rate": 0.03515625,
"calib/frac_conf_gt_0.9": 0.8333333333333334,
"calib/gap": -0.040909090909091006,
"calib/mean_conf": 0.9375,
"calib/mu_c": 0.9,
"calib/mu_w": 0.940909090909091,
"calib/nonempty_final_conf_rate": 0.046875,
"calib/nonempty_reasoning_rate": 0.0859375,
"calib/nonempty_step_conf_rate": 0.04296875,
"calib/pce": 0.8541666666666666,
"calib/std_conf": 0.06546309392830946,
"calib/step_conf_rate": 0.04296875,
"calib/step_q_c": 0.8671428571428571,
"calib/step_q_c_n": 7.0,
"calib/step_q_gap": 0.10123376623376612,
"calib/step_q_w": 0.765909090909091,
"calib/step_q_w_n": 44.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.109375,
"completions/max_length": 3034.0,
"completions/max_terminated_length": 3034.0,
"completions/mean_length": 749.94140625,
"completions/mean_terminated_length": 842.0394897460938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 6.0,
"epoch": 0.004266666666666667,
"grad_norm": 0.009631999768316746,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.043,
"num_tokens": 1224797.0,
"reward": 0.025254102423787117,
"reward_std": 0.055353712290525436,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/final_brier_reward_step": 0.007539453450590372,
"rewards/format_reward_step": 0.03515625,
"step": 4
},
{
"aux_distill/lambda": 0.09999999999999999,
"aux_distill/loss": 1.2394245167573292,
"aux_distill/mean_u": 0.3190632179208172,
"aux_distill/n_active_tok": 20.0,
"calib/answer_extract_rate": 0.06640625,
"calib/auroc": 0.7857142857142857,
"calib/avg_num_step_conf": 0.23828125,
"calib/ece": 0.85,
"calib/final_conf_rate": 0.03125,
"calib/format_rate": 0.03125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.017142857142857015,
"calib/mean_conf": 0.975,
"calib/mu_c": 0.99,
"calib/mu_w": 0.972857142857143,
"calib/nonempty_final_conf_rate": 0.03125,
"calib/nonempty_reasoning_rate": 0.08203125,
"calib/nonempty_step_conf_rate": 0.05078125,
"calib/pce": 0.85,
"calib/std_conf": 0.016583123951777013,
"calib/step_conf_rate": 0.05078125,
"calib/step_q_c": 0.8677777777777779,
"calib/step_q_c_n": 9.0,
"calib/step_q_gap": 0.054508547008547126,
"calib/step_q_w": 0.8132692307692307,
"calib/step_q_w_n": 52.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.10546875,
"completions/max_length": 2903.0,
"completions/max_terminated_length": 2903.0,
"completions/mean_length": 791.6171875,
"completions/mean_terminated_length": 884.9519653320312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.005333333333333333,
"grad_norm": 0.010286133736371994,
"learning_rate": 1.25e-06,
"loss": 0.0523,
"num_tokens": 1557947.0,
"reward": 0.020259374752640724,
"reward_std": 0.04438989982008934,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/final_brier_reward_step": 0.005362499970942736,
"rewards/format_reward_step": 0.03125,
"step": 5
},
{
"aux_distill/lambda": 0.09999999999999999,
"aux_distill/loss": 1.355362464984258,
"aux_distill/mean_u": 0.2270808669147952,
"aux_distill/n_active_tok": 21.333333333333332,
"calib/answer_extract_rate": 0.1015625,
"calib/auroc": 0.6590909090909092,
"calib/avg_num_step_conf": 0.25,
"calib/ece": 0.796923076923077,
"calib/final_conf_rate": 0.05078125,
"calib/format_rate": 0.03515625,
"calib/frac_conf_gt_0.9": 0.8461538461538461,
"calib/gap": 0.02863636363636357,
"calib/mean_conf": 0.9507692307692307,
"calib/mu_c": 0.975,
"calib/mu_w": 0.9463636363636364,
"calib/nonempty_final_conf_rate": 0.05078125,
"calib/nonempty_reasoning_rate": 0.109375,
"calib/nonempty_step_conf_rate": 0.05859375,
"calib/pce": 0.796923076923077,
"calib/std_conf": 0.04322775790223728,
"calib/step_conf_rate": 0.05859375,
"calib/step_q_c": 0.8838461538461538,
"calib/step_q_c_n": 13.0,
"calib/step_q_gap": 0.03992458521870268,
"calib/step_q_w": 0.8439215686274512,
"calib/step_q_w_n": 51.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.078125,
"completions/max_length": 3071.0,
"completions/max_terminated_length": 3071.0,
"completions/mean_length": 666.00390625,
"completions/mean_terminated_length": 722.4449462890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0064,
"grad_norm": 0.014194191433489323,
"learning_rate": 1.5e-06,
"loss": 0.0803,
"num_tokens": 1858204.0,
"reward": 0.02712968736886978,
"reward_std": 0.049767978489398956,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/final_brier_reward_step": 0.011290624737739563,
"rewards/format_reward_step": 0.03515625,
"step": 6
},
{
"aux_distill/lambda": 0.10000000000000002,
"aux_distill/loss": 1.3300063119215124,
"aux_distill/mean_u": 0.376707364389986,
"aux_distill/n_active_tok": 27.764705882352942,
"calib/answer_extract_rate": 0.109375,
"calib/auroc": 0.625,
"calib/avg_num_step_conf": 0.4609375,
"calib/ece": 0.7336842105263157,
"calib/final_conf_rate": 0.07421875,
"calib/format_rate": 0.0625,
"calib/frac_conf_gt_0.9": 0.7368421052631579,
"calib/gap": 0.08125000000000004,
"calib/mean_conf": 0.8915789473684211,
"calib/mu_c": 0.96,
"calib/mu_w": 0.8787499999999999,
"calib/nonempty_final_conf_rate": 0.07421875,
"calib/nonempty_reasoning_rate": 0.12109375,
"calib/nonempty_step_conf_rate": 0.08984375,
"calib/pce": 0.7336842105263157,
"calib/std_conf": 0.14808472531823183,
"calib/step_conf_rate": 0.08984375,
"calib/step_q_c": 0.8224999999999998,
"calib/step_q_c_n": 20.0,
"calib/step_q_gap": 0.03698979591836704,
"calib/step_q_w": 0.7855102040816327,
"calib/step_q_w_n": 98.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09375,
"completions/max_length": 3059.0,
"completions/max_terminated_length": 3059.0,
"completions/mean_length": 744.8203125,
"completions/mean_terminated_length": 821.8706665039062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.007466666666666667,
"grad_norm": 0.016747502610087395,
"learning_rate": 1.75e-06,
"loss": 0.1105,
"num_tokens": 2180110.0,
"reward": 0.04752578213810921,
"reward_std": 0.10274682939052582,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/final_brier_reward_step": 0.020832812413573265,
"rewards/format_reward_step": 0.0625,
"step": 7
},
{
"aux_distill/lambda": 0.09999999999999999,
"aux_distill/loss": 1.3433888256549835,
"aux_distill/mean_u": 0.26888269331964815,
"aux_distill/n_active_tok": 23.666666666666668,
"calib/answer_extract_rate": 0.0703125,
"calib/auroc": 0.7708333333333333,
"calib/avg_num_step_conf": 0.27734375,
"calib/ece": 0.357,
"calib/final_conf_rate": 0.0390625,
"calib/format_rate": 0.03125,
"calib/frac_conf_gt_0.9": 0.9,
"calib/gap": 0.036666666666666625,
"calib/mean_conf": 0.9570000000000001,
"calib/mu_c": 0.9716666666666667,
"calib/mu_w": 0.935,
"calib/nonempty_final_conf_rate": 0.0390625,
"calib/nonempty_reasoning_rate": 0.08984375,
"calib/nonempty_step_conf_rate": 0.05859375,
"calib/pce": 0.357,
"calib/std_conf": 0.03606937759374285,
"calib/step_conf_rate": 0.05859375,
"calib/step_q_c": 0.8213636363636365,
"calib/step_q_c_n": 22.0,
"calib/step_q_gap": 0.1434044526901671,
"calib/step_q_w": 0.6779591836734694,
"calib/step_q_w_n": 49.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.11328125,
"completions/max_length": 3013.0,
"completions/max_terminated_length": 3013.0,
"completions/mean_length": 616.0390625,
"completions/mean_terminated_length": 694.7400512695312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.008533333333333334,
"grad_norm": 0.01233338937163353,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.079,
"num_tokens": 2468136.0,
"reward": 0.03779961168766022,
"reward_std": 0.09547053277492523,
"rewards/accuracy_reward_step": 0.0234375,
"rewards/final_brier_reward_step": 0.020911719650030136,
"rewards/format_reward_step": 0.03125,
"step": 8
},
{
"aux_distill/lambda": 0.09999999999999999,
"aux_distill/loss": 1.2926430225372314,
"aux_distill/mean_u": 0.2499272264449756,
"aux_distill/n_active_tok": 17.6,
"calib/answer_extract_rate": 0.0703125,
"calib/auroc": 0.7222222222222222,
"calib/avg_num_step_conf": 0.171875,
"calib/ece": 0.663,
"calib/final_conf_rate": 0.0390625,
"calib/format_rate": 0.03515625,
"calib/frac_conf_gt_0.9": 0.6,
"calib/gap": 0.24111111111111116,
"calib/mean_conf": 0.763,
"calib/mu_c": 0.98,
"calib/mu_w": 0.7388888888888888,
"calib/nonempty_final_conf_rate": 0.0390625,
"calib/nonempty_reasoning_rate": 0.07421875,
"calib/nonempty_step_conf_rate": 0.04296875,
"calib/pce": 0.663,
"calib/std_conf": 0.31752322749682427,
"calib/step_conf_rate": 0.04296875,
"calib/step_q_c": 0.8866666666666667,
"calib/step_q_c_n": 3.0,
"calib/step_q_gap": 0.0964227642276424,
"calib/step_q_w": 0.7902439024390243,
"calib/step_q_w_n": 41.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.125,
"completions/max_length": 3040.0,
"completions/max_terminated_length": 3040.0,
"completions/mean_length": 700.2109375,
"completions/mean_terminated_length": 800.2410888671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0096,
"grad_norm": 0.010681779123842716,
"learning_rate": 2.25e-06,
"loss": 0.0393,
"num_tokens": 2778734.0,
"reward": 0.025644725188612938,
"reward_std": 0.043472521007061005,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/final_brier_reward_step": 0.0122269531711936,
"rewards/format_reward_step": 0.03515625,
"step": 9
},
{
"aux_distill/lambda": 0.09999999999999999,
"aux_distill/loss": 1.3319018862464211,
"aux_distill/mean_u": 0.20504655473969555,
"aux_distill/n_active_tok": 17.454545454545453,
"calib/answer_extract_rate": 0.08984375,
"calib/auroc": 0.8076923076923077,
"calib/avg_num_step_conf": 0.19140625,
"calib/ece": 0.7885714285714284,
"calib/final_conf_rate": 0.0546875,
"calib/format_rate": 0.03515625,
"calib/frac_conf_gt_0.9": 0.8571428571428571,
"calib/gap": 0.11846153846153828,
"calib/mean_conf": 0.8600000000000001,
"calib/mu_c": 0.97,
"calib/mu_w": 0.8515384615384617,
"calib/nonempty_final_conf_rate": 0.0546875,
"calib/nonempty_reasoning_rate": 0.09765625,
"calib/nonempty_step_conf_rate": 0.05078125,
"calib/pce": 0.7885714285714284,
"calib/std_conf": 0.24848110017234135,
"calib/step_conf_rate": 0.05078125,
"calib/step_q_c": 0.754,
"calib/step_q_c_n": 5.0,
"calib/step_q_gap": -0.06918181818181812,
"calib/step_q_w": 0.8231818181818181,
"calib/step_q_w_n": 44.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0703125,
"completions/max_length": 3054.0,
"completions/max_terminated_length": 3054.0,
"completions/mean_length": 648.3046875,
"completions/mean_terminated_length": 697.336181640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 5.0,
"epoch": 0.010666666666666666,
"grad_norm": 0.013561662286520004,
"learning_rate": 2.5e-06,
"loss": 0.0141,
"num_tokens": 3075308.0,
"reward": 0.02494199201464653,
"reward_std": 0.0662628710269928,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/final_brier_reward_step": 0.006915234960615635,
"rewards/format_reward_step": 0.03515625,
"step": 10
},
{
"aux_distill/lambda": 0.1,
"aux_distill/loss": 1.142171167410337,
"aux_distill/mean_u": 0.24344688370614387,
"aux_distill/n_active_tok": 18.76923076923077,
"calib/answer_extract_rate": 0.12890625,
"calib/auroc": 0.4875,
"calib/avg_num_step_conf": 0.2421875,
"calib/ece": 0.662857142857143,
"calib/final_conf_rate": 0.0546875,
"calib/format_rate": 0.03515625,
"calib/frac_conf_gt_0.9": 0.7857142857142857,
"calib/gap": 0.012500000000000067,
"calib/mean_conf": 0.9485714285714286,
"calib/mu_c": 0.9575,
"calib/mu_w": 0.945,
"calib/nonempty_final_conf_rate": 0.0546875,
"calib/nonempty_reasoning_rate": 0.15625,
"calib/nonempty_step_conf_rate": 0.0703125,
"calib/pce": 0.662857142857143,
"calib/std_conf": 0.06322941707523065,
"calib/step_conf_rate": 0.0703125,
"calib/step_q_c": 0.9633333333333333,
"calib/step_q_c_n": 3.0,
"calib/step_q_gap": 0.2759195402298851,
"calib/step_q_w": 0.6874137931034482,
"calib/step_q_w_n": 58.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.078125,
"completions/max_length": 3046.0,
"completions/max_terminated_length": 3046.0,
"completions/mean_length": 745.5625,
"completions/mean_terminated_length": 808.7457885742188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.011733333333333333,
"grad_norm": 0.011814948171377182,
"learning_rate": 2.7500000000000004e-06,
"loss": 0.0691,
"num_tokens": 3394460.0,
"reward": 0.034773632884025574,
"reward_std": 0.07486072927713394,
"rewards/accuracy_reward_step": 0.0234375,
"rewards/final_brier_reward_step": 0.010953515768051147,
"rewards/format_reward_step": 0.03515625,
"step": 11
},
{
"aux_distill/lambda": 0.10000000000000002,
"aux_distill/loss": 1.3093594085602533,
"aux_distill/mean_u": 0.370681781070378,
"aux_distill/n_active_tok": 23.428571428571427,
"calib/answer_extract_rate": 0.1796875,
"calib/auroc": 0.5755208333333333,
"calib/avg_num_step_conf": 0.49609375,
"calib/ece": 0.6875,
"calib/final_conf_rate": 0.125,
"calib/format_rate": 0.0859375,
"calib/frac_conf_gt_0.9": 0.8125,
"calib/gap": -0.015833333333333255,
"calib/mean_conf": 0.893125,
"calib/mu_c": 0.88125,
"calib/mu_w": 0.8970833333333332,
"calib/nonempty_final_conf_rate": 0.125,
"calib/nonempty_reasoning_rate": 0.203125,
"calib/nonempty_step_conf_rate": 0.1171875,
"calib/pce": 0.6653125,
"calib/std_conf": 0.17207261366934598,
"calib/step_conf_rate": 0.1171875,
"calib/step_q_c": 0.7972727272727274,
"calib/step_q_c_n": 33.0,
"calib/step_q_gap": 0.0195067698259187,
"calib/step_q_w": 0.7777659574468087,
"calib/step_q_w_n": 94.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.06640625,
"completions/max_length": 3010.0,
"completions/max_terminated_length": 3010.0,
"completions/mean_length": 742.7421875,
"completions/mean_terminated_length": 795.5731811523438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.0128,
"grad_norm": 0.01771925762295723,
"learning_rate": 3e-06,
"loss": 0.1562,
"num_tokens": 3712586.0,
"reward": 0.07837206870317459,
"reward_std": 0.16047851741313934,
"rewards/accuracy_reward_step": 0.03515625,
"rewards/final_brier_reward_step": 0.03565039113163948,
"rewards/format_reward_step": 0.0859375,
"step": 12
},
{
"aux_distill/lambda": 0.09999999999999999,
"aux_distill/loss": 1.1611148168643315,
"aux_distill/mean_u": 0.23878970391290485,
"aux_distill/n_active_tok": 26.333333333333332,
"calib/answer_extract_rate": 0.09765625,
"calib/auroc": 0.5727272727272728,
"calib/avg_num_step_conf": 0.32421875,
"calib/ece": 0.574375,
"calib/final_conf_rate": 0.0625,
"calib/format_rate": 0.046875,
"calib/frac_conf_gt_0.9": 0.75,
"calib/gap": 0.10636363636363633,
"calib/mean_conf": 0.8868750000000001,
"calib/mu_c": 0.96,
"calib/mu_w": 0.8536363636363636,
"calib/nonempty_final_conf_rate": 0.0625,
"calib/nonempty_reasoning_rate": 0.1171875,
"calib/nonempty_step_conf_rate": 0.07421875,
"calib/pce": 0.574375,
"calib/std_conf": 0.16559433678420285,
"calib/step_conf_rate": 0.07421875,
"calib/step_q_c": 0.6991176470588236,
"calib/step_q_c_n": 17.0,
"calib/step_q_gap": -0.08073083778966128,
"calib/step_q_w": 0.7798484848484849,
"calib/step_q_w_n": 66.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.07421875,
"completions/max_length": 2675.0,
"completions/max_terminated_length": 2675.0,
"completions/mean_length": 697.2421875,
"completions/mean_terminated_length": 753.1392211914062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.013866666666666666,
"grad_norm": 0.00980466604232788,
"learning_rate": 3.2500000000000002e-06,
"loss": 0.0487,
"num_tokens": 4019480.0,
"reward": 0.0430777333676815,
"reward_std": 0.10306745767593384,
"rewards/accuracy_reward_step": 0.01953125,
"rewards/final_brier_reward_step": 0.019749218598008156,
"rewards/format_reward_step": 0.046875,
"step": 13
},
{
"aux_distill/lambda": 0.10000000000000003,
"aux_distill/loss": 1.1014714141686757,
"aux_distill/mean_u": 0.2636372189654983,
"aux_distill/n_active_tok": 21.555555555555557,
"calib/answer_extract_rate": 0.1171875,
"calib/auroc": 0.6818181818181819,
"calib/avg_num_step_conf": 0.3828125,
"calib/ece": 0.5940769230769232,
"calib/final_conf_rate": 0.05078125,
"calib/format_rate": 0.04296875,
"calib/frac_conf_gt_0.9": 0.23076923076923078,
"calib/gap": 0.0892272727272726,
"calib/mean_conf": 0.671,
"calib/mu_c": 0.7464999999999999,
"calib/mu_w": 0.6572727272727273,
"calib/nonempty_final_conf_rate": 0.05078125,
"calib/nonempty_reasoning_rate": 0.15234375,
"calib/nonempty_step_conf_rate": 0.09765625,
"calib/pce": 0.5556153846153846,
"calib/std_conf": 0.2858477270802115,
"calib/step_conf_rate": 0.09765625,
"calib/step_q_c": 0.554142857142857,
"calib/step_q_c_n": 7.0,
"calib/step_q_gap": 0.13351538461538448,
"calib/step_q_w": 0.42062747252747257,
"calib/step_q_w_n": 91.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 3069.0,
"completions/max_terminated_length": 3069.0,
"completions/mean_length": 786.0078125,
"completions/mean_terminated_length": 856.2467651367188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 7.0,
"epoch": 0.014933333333333333,
"grad_norm": 0.010665432550013065,
"learning_rate": 3.5e-06,
"loss": 0.1084,
"num_tokens": 4349906.0,
"reward": 0.03924267739057541,
"reward_std": 0.08956538140773773,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/final_brier_reward_step": 0.023797854781150818,
"rewards/format_reward_step": 0.04296875,
"step": 14
},
{
"aux_distill/lambda": 0.10000000000000003,
"aux_distill/loss": 1.1782517830530803,
"aux_distill/mean_u": 0.37329411304752624,
"aux_distill/n_active_tok": 25.5,
"calib/answer_extract_rate": 0.171875,
"calib/auroc": 0.7395833333333333,
"calib/avg_num_step_conf": 0.62109375,
"calib/ece": 0.7945000000000002,
"calib/final_conf_rate": 0.1015625,
"calib/format_rate": 0.0625,
"calib/frac_conf_gt_0.9": 0.6538461538461539,
"calib/gap": 0.10679166666666673,
"calib/mean_conf": 0.8714230769230771,
"calib/mu_c": 0.97,
"calib/mu_w": 0.8632083333333332,
"calib/nonempty_final_conf_rate": 0.1015625,
"calib/nonempty_reasoning_rate": 0.21875,
"calib/nonempty_step_conf_rate": 0.140625,
"calib/pce": 0.7945000000000002,
"calib/std_conf": 0.17340663040223006,
"calib/step_conf_rate": 0.140625,
"calib/step_q_w": 0.718860759493671,
"calib/step_q_w_n": 158.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.07421875,
"completions/max_length": 2988.0,
"completions/max_terminated_length": 2988.0,
"completions/mean_length": 665.02734375,
"completions/mean_terminated_length": 718.3417358398438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.016,
"grad_norm": 0.018191225826740265,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.1084,
"num_tokens": 4651841.0,
"reward": 0.04167646914720535,
"reward_std": 0.09166643768548965,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/final_brier_reward_step": 0.013040433637797832,
"rewards/format_reward_step": 0.0625,
"step": 15
},
{
"aux_distill/lambda": 0.10000000000000002,
"aux_distill/loss": 1.0293893954333138,
"aux_distill/mean_u": 0.4007917690219686,
"aux_distill/n_active_tok": 22.352941176470587,
"calib/answer_extract_rate": 0.12109375,
"calib/auroc": 0.1911764705882353,
"calib/avg_num_step_conf": 0.375,
"calib/ece": 0.7378947368421053,
"calib/final_conf_rate": 0.07421875,
"calib/format_rate": 0.05859375,
"calib/frac_conf_gt_0.9": 0.5789473684210527,
"calib/gap": -0.12735294117647045,
"calib/mean_conf": 0.7989473684210526,
"calib/mu_c": 0.685,
"calib/mu_w": 0.8123529411764705,
"calib/nonempty_final_conf_rate": 0.07421875,
"calib/nonempty_reasoning_rate": 0.14453125,
"calib/nonempty_step_conf_rate": 0.09375,
"calib/pce": 0.7157894736842105,
"calib/std_conf": 0.2579044036856355,
"calib/step_conf_rate": 0.09375,
"calib/step_q_c": 0.5181818181818183,
"calib/step_q_c_n": 11.0,
"calib/step_q_gap": -0.0858181818181818,
"calib/step_q_w": 0.6040000000000001,
"calib/step_q_w_n": 85.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1328125,
"completions/max_length": 2929.0,
"completions/max_terminated_length": 2929.0,
"completions/mean_length": 628.9453125,
"completions/mean_terminated_length": 725.270263671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.017066666666666667,
"grad_norm": 0.011694777756929398,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0626,
"num_tokens": 4945507.0,
"reward": 0.044773828238248825,
"reward_std": 0.10467442870140076,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/final_brier_reward_step": 0.01923515647649765,
"rewards/format_reward_step": 0.05859375,
"step": 16
},
{
"aux_distill/lambda": 0.10000000000000003,
"aux_distill/loss": 1.0415730786820252,
"aux_distill/mean_u": 0.2799439116063525,
"aux_distill/n_active_tok": 28.0,
"calib/answer_extract_rate": 0.15234375,
"calib/auroc": 0.6759259259259259,
"calib/avg_num_step_conf": 0.65625,
"calib/ece": 0.45083333333333336,
"calib/final_conf_rate": 0.09375,
"calib/format_rate": 0.07421875,
"calib/frac_conf_gt_0.9": 0.2916666666666667,
"calib/gap": 0.16666666666666685,
"calib/mean_conf": 0.6516666666666667,
"calib/mu_c": 0.7766666666666667,
"calib/mu_w": 0.6099999999999999,
"calib/nonempty_final_conf_rate": 0.09375,
"calib/nonempty_reasoning_rate": 0.203125,
"calib/nonempty_step_conf_rate": 0.14453125,
"calib/pce": 0.42625,
"calib/std_conf": 0.3062497165531568,
"calib/step_conf_rate": 0.14453125,
"calib/step_q_c": 0.38839629629629624,
"calib/step_q_c_n": 27.0,
"calib/step_q_gap": -0.0620292356185973,
"calib/step_q_w": 0.45042553191489354,
"calib/step_q_w_n": 141.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0703125,
"completions/max_length": 3039.0,
"completions/max_terminated_length": 3039.0,
"completions/mean_length": 688.16015625,
"completions/mean_terminated_length": 740.2059326171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.018133333333333335,
"grad_norm": 0.011651230975985527,
"learning_rate": 4.25e-06,
"loss": 0.1347,
"num_tokens": 5249012.0,
"reward": 0.07471387088298798,
"reward_std": 0.16528142988681793,
"rewards/accuracy_reward_step": 0.0234375,
"rewards/final_brier_reward_step": 0.051771484315395355,
"rewards/format_reward_step": 0.07421875,
"step": 17
},
{
"aux_distill/lambda": 0.10000000000000002,
"aux_distill/loss": 1.0446974494877983,
"aux_distill/mean_u": 0.33307486253661095,
"aux_distill/n_active_tok": 23.529411764705884,
"calib/answer_extract_rate": 0.1171875,
"calib/auroc": 0.6176470588235294,
"calib/avg_num_step_conf": 0.390625,
"calib/ece": 0.5987373737373737,
"calib/final_conf_rate": 0.0703125,
"calib/format_rate": 0.046875,
"calib/frac_conf_gt_0.9": 0.3333333333333333,
"calib/gap": 0.2601604278074868,
"calib/mean_conf": 0.6542929292929293,
"calib/mu_c": 0.9,
"calib/mu_w": 0.6398395721925132,
"calib/nonempty_final_conf_rate": 0.0703125,
"calib/nonempty_reasoning_rate": 0.15234375,
"calib/nonempty_step_conf_rate": 0.09765625,
"calib/pce": 0.5987373737373737,
"calib/std_conf": 0.3442238585416577,
"calib/step_conf_rate": 0.09765625,
"calib/step_q_c": 0.6000000000000001,
"calib/step_q_c_n": 8.0,
"calib/step_q_gap": 0.008249456521739251,
"calib/step_q_w": 0.5917505434782608,
"calib/step_q_w_n": 92.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.06640625,
"completions/max_length": 2962.0,
"completions/max_terminated_length": 2962.0,
"completions/mean_length": 748.33203125,
"completions/mean_terminated_length": 801.5606689453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0192,
"grad_norm": 0.007760821841657162,
"learning_rate": 4.5e-06,
"loss": 0.0604,
"num_tokens": 5575113.0,
"reward": 0.042026352137327194,
"reward_std": 0.0855361744761467,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/final_brier_reward_step": 0.02936520427465439,
"rewards/format_reward_step": 0.046875,
"step": 18
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 1.0255909500450924,
"aux_distill/mean_u": 0.387873029046073,
"aux_distill/n_active_tok": 48.689655172413794,
"calib/answer_extract_rate": 0.31640625,
"calib/auroc": 0.3815789473684211,
"calib/avg_num_step_conf": 1.390625,
"calib/ece": 0.39595238095238094,
"calib/final_conf_rate": 0.1640625,
"calib/format_rate": 0.15234375,
"calib/frac_conf_gt_0.9": 0.14285714285714285,
"calib/gap": -0.1797368421052632,
"calib/mean_conf": 0.47261904761904766,
"calib/mu_c": 0.31000000000000005,
"calib/mu_w": 0.48973684210526325,
"calib/nonempty_final_conf_rate": 0.1640625,
"calib/nonempty_reasoning_rate": 0.41796875,
"calib/nonempty_step_conf_rate": 0.32421875,
"calib/pce": 0.3866666666666666,
"calib/std_conf": 0.34127254520892636,
"calib/step_conf_rate": 0.32421875,
"calib/step_q_c": 0.36454545454545456,
"calib/step_q_c_n": 33.0,
"calib/step_q_gap": 0.03782997466929355,
"calib/step_q_w": 0.326715479876161,
"calib/step_q_w_n": 323.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.04296875,
"completions/max_length": 2840.0,
"completions/max_terminated_length": 2840.0,
"completions/mean_length": 646.0625,
"completions/mean_terminated_length": 675.0693359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 7.0,
"epoch": 0.020266666666666665,
"grad_norm": 0.01326060201972723,
"learning_rate": 4.75e-06,
"loss": 0.1531,
"num_tokens": 5869073.0,
"reward": 0.14051249623298645,
"reward_std": 0.28107649087905884,
"rewards/accuracy_reward_step": 0.03125,
"rewards/final_brier_reward_step": 0.0974312573671341,
"rewards/format_reward_step": 0.15234375,
"step": 19
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 1.0279449429363012,
"aux_distill/mean_u": 0.3457623021266588,
"aux_distill/n_active_tok": 70.375,
"calib/answer_extract_rate": 0.4140625,
"calib/auroc": 0.4934065934065934,
"calib/avg_num_step_conf": 2.20703125,
"calib/ece": 0.3231595511392405,
"calib/final_conf_rate": 0.30859375,
"calib/format_rate": 0.2421875,
"calib/frac_conf_gt_0.9": 0.1518987341772152,
"calib/gap": -0.02933201806593405,
"calib/mean_conf": 0.35369108177215186,
"calib/mu_c": 0.3295571428571428,
"calib/mu_w": 0.3588891609230769,
"calib/nonempty_final_conf_rate": 0.30859375,
"calib/nonempty_reasoning_rate": 0.578125,
"calib/nonempty_step_conf_rate": 0.47265625,
"calib/pce": 0.24981772151898735,
"calib/std_conf": 0.3633414831600973,
"calib/step_conf_rate": 0.47265625,
"calib/step_q_c": 0.27234264705882355,
"calib/step_q_c_n": 68.0,
"calib/step_q_gap": -0.02546191469868564,
"calib/step_q_w": 0.2978045617575092,
"calib/step_q_w_n": 497.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05859375,
"completions/max_length": 3032.0,
"completions/max_terminated_length": 3032.0,
"completions/mean_length": 606.62109375,
"completions/mean_terminated_length": 644.3776245117188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.021333333333333333,
"grad_norm": 0.013194161467254162,
"learning_rate": 5e-06,
"loss": 0.2451,
"num_tokens": 6153048.0,
"reward": 0.23589837551116943,
"reward_std": 0.35146909952163696,
"rewards/accuracy_reward_step": 0.06640625,
"rewards/final_brier_reward_step": 0.16320298612117767,
"rewards/format_reward_step": 0.2421875,
"step": 20
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9818312600255013,
"aux_distill/mean_u": 0.3322417602864148,
"aux_distill/n_active_tok": 90.125,
"calib/answer_extract_rate": 0.51953125,
"calib/auroc": 0.42969924812030086,
"calib/avg_num_step_conf": 2.8203125,
"calib/ece": 0.3284403486238532,
"calib/final_conf_rate": 0.42578125,
"calib/format_rate": 0.36328125,
"calib/frac_conf_gt_0.9": 0.12844036697247707,
"calib/gap": -0.10269009624060149,
"calib/mean_conf": 0.3572862568807339,
"calib/mu_c": 0.26778571428571424,
"calib/mu_w": 0.37047581052631573,
"calib/nonempty_final_conf_rate": 0.42578125,
"calib/nonempty_reasoning_rate": 0.6484375,
"calib/nonempty_step_conf_rate": 0.578125,
"calib/pce": 0.27864311926605506,
"calib/std_conf": 0.3425272573392338,
"calib/step_conf_rate": 0.578125,
"calib/step_q_c": 0.3607142857142857,
"calib/step_q_c_n": 56.0,
"calib/step_q_gap": 0.09586743886743887,
"calib/step_q_w": 0.26484684684684684,
"calib/step_q_w_n": 666.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0546875,
"completions/max_length": 3040.0,
"completions/max_terminated_length": 3040.0,
"completions/mean_length": 583.8125,
"completions/mean_terminated_length": 617.5867309570312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 18.0,
"epoch": 0.0224,
"grad_norm": 0.01277672778815031,
"learning_rate": 4.9722222222222224e-06,
"loss": 0.2213,
"num_tokens": 6429272.0,
"reward": 0.33688199520111084,
"reward_std": 0.37365442514419556,
"rewards/accuracy_reward_step": 0.05859375,
"rewards/final_brier_reward_step": 0.2518889605998993,
"rewards/format_reward_step": 0.36328125,
"step": 21
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9814799521118402,
"aux_distill/mean_u": 0.3856371479271701,
"aux_distill/n_active_tok": 107.125,
"calib/answer_extract_rate": 0.5703125,
"calib/auroc": 0.5931578947368421,
"calib/avg_num_step_conf": 3.39453125,
"calib/ece": 0.2696144017687075,
"calib/final_conf_rate": 0.41015625,
"calib/format_rate": 0.359375,
"calib/frac_conf_gt_0.9": 0.11428571428571428,
"calib/gap": 0.1432568040601503,
"calib/mean_conf": 0.3313867010884354,
"calib/mu_c": 0.46099999999999997,
"calib/mu_w": 0.3177431959398497,
"calib/nonempty_final_conf_rate": 0.41015625,
"calib/nonempty_reasoning_rate": 0.71875,
"calib/nonempty_step_conf_rate": 0.62890625,
"calib/pce": 0.2528815038095238,
"calib/std_conf": 0.3426169310066443,
"calib/step_conf_rate": 0.62890625,
"calib/step_q_c": 0.21620727272727272,
"calib/step_q_c_n": 55.0,
"calib/step_q_gap": -0.033706065082368813,
"calib/step_q_w": 0.24991333780964153,
"calib/step_q_w_n": 809.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03515625,
"completions/max_length": 3039.0,
"completions/max_terminated_length": 3039.0,
"completions/mean_length": 554.1015625,
"completions/mean_terminated_length": 574.29150390625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 5.0,
"epoch": 0.023466666666666667,
"grad_norm": 0.01487016212195158,
"learning_rate": 4.944444444444445e-06,
"loss": 0.1965,
"num_tokens": 6696746.0,
"reward": 0.3352200388908386,
"reward_std": 0.42807674407958984,
"rewards/accuracy_reward_step": 0.04296875,
"rewards/final_brier_reward_step": 0.26809632778167725,
"rewards/format_reward_step": 0.359375,
"step": 22
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9744270667433739,
"aux_distill/mean_u": 0.31499481351199665,
"aux_distill/n_active_tok": 121.125,
"calib/answer_extract_rate": 0.62109375,
"calib/auroc": 0.4381720430107527,
"calib/avg_num_step_conf": 3.79296875,
"calib/ece": 0.31510691602159824,
"calib/final_conf_rate": 0.53125,
"calib/format_rate": 0.421875,
"calib/frac_conf_gt_0.9": 0.11764705882352941,
"calib/gap": -0.04729891960110766,
"calib/mean_conf": 0.33540048551865703,
"calib/mu_c": 0.292275,
"calib/mu_w": 0.33957391960110767,
"calib/nonempty_final_conf_rate": 0.53125,
"calib/nonempty_reasoning_rate": 0.828125,
"calib/nonempty_step_conf_rate": 0.74609375,
"calib/pce": 0.2811360537113041,
"calib/std_conf": 0.34317791131441383,
"calib/step_conf_rate": 0.74609375,
"calib/step_q_c": 0.3123102040816327,
"calib/step_q_c_n": 49.0,
"calib/step_q_gap": 0.05884855061162214,
"calib/step_q_w": 0.25346165347001054,
"calib/step_q_w_n": 922.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 2973.0,
"completions/max_terminated_length": 2973.0,
"completions/mean_length": 566.89453125,
"completions/mean_terminated_length": 580.5,
"completions/min_length": 0.0,
"completions/min_terminated_length": 7.0,
"epoch": 0.024533333333333334,
"grad_norm": 0.011434352025389671,
"learning_rate": 4.9166666666666665e-06,
"loss": 0.2523,
"num_tokens": 6969615.0,
"reward": 0.39768069982528687,
"reward_std": 0.47356581687927246,
"rewards/accuracy_reward_step": 0.05859375,
"rewards/final_brier_reward_step": 0.31489264965057373,
"rewards/format_reward_step": 0.421875,
"step": 23
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9627590533345938,
"aux_distill/mean_u": 0.3975181422269218,
"aux_distill/n_active_tok": 141.125,
"calib/answer_extract_rate": 0.6796875,
"calib/auroc": 0.39983022071307295,
"calib/avg_num_step_conf": 4.6875,
"calib/ece": 0.27329715482517486,
"calib/final_conf_rate": 0.55859375,
"calib/format_rate": 0.44921875,
"calib/frac_conf_gt_0.9": 0.06993006993006994,
"calib/gap": -0.12481586148556881,
"calib/mean_conf": 0.3192845674125875,
"calib/mu_c": 0.21105263157894735,
"calib/mu_w": 0.33586849306451616,
"calib/nonempty_final_conf_rate": 0.55859375,
"calib/nonempty_reasoning_rate": 0.890625,
"calib/nonempty_step_conf_rate": 0.7890625,
"calib/pce": 0.22985729468531468,
"calib/std_conf": 0.31422027889339454,
"calib/step_conf_rate": 0.7890625,
"calib/step_q_c": 0.2549627617364461,
"calib/step_q_c_n": 115.0,
"calib/step_q_gap": 0.005681280665347288,
"calib/step_q_w": 0.24928148107109882,
"calib/step_q_w_n": 1083.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 3022.0,
"completions/max_terminated_length": 3022.0,
"completions/mean_length": 565.36328125,
"completions/mean_terminated_length": 576.62548828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.0256,
"grad_norm": 0.012067703530192375,
"learning_rate": 4.888888888888889e-06,
"loss": 0.3338,
"num_tokens": 7242668.0,
"reward": 0.43442392349243164,
"reward_std": 0.43748435378074646,
"rewards/accuracy_reward_step": 0.09375,
"rewards/final_brier_reward_step": 0.3258790969848633,
"rewards/format_reward_step": 0.44921875,
"step": 24
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9440481103956699,
"aux_distill/mean_u": 0.35665699968748665,
"aux_distill/n_active_tok": 139.125,
"calib/answer_extract_rate": 0.77734375,
"calib/auroc": 0.44574780058651026,
"calib/avg_num_step_conf": 4.61328125,
"calib/ece": 0.29719062486867937,
"calib/final_conf_rate": 0.69140625,
"calib/format_rate": 0.6015625,
"calib/frac_conf_gt_0.9": 0.11299435028248588,
"calib/gap": -0.059031992152093093,
"calib/mean_conf": 0.34487650057489405,
"calib/mu_c": 0.2931818181818182,
"calib/mu_w": 0.3522138103339113,
"calib/nonempty_final_conf_rate": 0.69140625,
"calib/nonempty_reasoning_rate": 0.9296875,
"calib/nonempty_step_conf_rate": 0.875,
"calib/pce": 0.2588866700664195,
"calib/std_conf": 0.3256446003305085,
"calib/step_conf_rate": 0.875,
"calib/step_q_c": 0.24164233576642338,
"calib/step_q_c_n": 137.0,
"calib/step_q_gap": -0.029769908720633148,
"calib/step_q_w": 0.27141224448705653,
"calib/step_q_w_n": 1043.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2833.0,
"completions/max_terminated_length": 2833.0,
"completions/mean_length": 507.72265625,
"completions/mean_terminated_length": 515.7817993164062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 22.0,
"epoch": 0.02666666666666667,
"grad_norm": 0.011941850185394287,
"learning_rate": 4.861111111111111e-06,
"loss": 0.242,
"num_tokens": 7499677.0,
"reward": 0.5643470883369446,
"reward_std": 0.4552688002586365,
"rewards/accuracy_reward_step": 0.09765625,
"rewards/final_brier_reward_step": 0.429475337266922,
"rewards/format_reward_step": 0.6015625,
"step": 25
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9647899996489286,
"aux_distill/mean_u": 0.43523350339639616,
"aux_distill/n_active_tok": 147.0,
"calib/answer_extract_rate": 0.77734375,
"calib/auroc": 0.47863924050632906,
"calib/avg_num_step_conf": 4.59375,
"calib/ece": 0.28037136764367815,
"calib/final_conf_rate": 0.6796875,
"calib/format_rate": 0.58984375,
"calib/frac_conf_gt_0.9": 0.06896551724137931,
"calib/gap": -0.0215346074050633,
"calib/mean_conf": 0.3493794136206897,
"calib/mu_c": 0.329825,
"calib/mu_w": 0.3513596074050633,
"calib/nonempty_final_conf_rate": 0.6796875,
"calib/nonempty_reasoning_rate": 0.9453125,
"calib/nonempty_step_conf_rate": 0.87890625,
"calib/pce": 0.268898379137931,
"calib/std_conf": 0.3015161495424351,
"calib/step_conf_rate": 0.87890625,
"calib/step_q_c": 0.2793808823529412,
"calib/step_q_c_n": 68.0,
"calib/step_q_gap": -0.004022890408791668,
"calib/step_q_w": 0.28340377276173284,
"calib/step_q_w_n": 1108.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2232.0,
"completions/max_terminated_length": 2232.0,
"completions/mean_length": 458.015625,
"completions/mean_terminated_length": 465.2857360839844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.027733333333333332,
"grad_norm": 0.012656077742576599,
"learning_rate": 4.833333333333333e-06,
"loss": 0.1847,
"num_tokens": 7745977.0,
"reward": 0.5486533641815186,
"reward_std": 0.44643837213516235,
"rewards/accuracy_reward_step": 0.0625,
"rewards/final_brier_reward_step": 0.4449630379676819,
"rewards/format_reward_step": 0.58984375,
"step": 26
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9289473351091146,
"aux_distill/mean_u": 0.37751402623273483,
"aux_distill/n_active_tok": 146.75,
"calib/answer_extract_rate": 0.859375,
"calib/auroc": 0.4695383150880533,
"calib/avg_num_step_conf": 4.5859375,
"calib/ece": 0.32406237623762374,
"calib/final_conf_rate": 0.7890625,
"calib/format_rate": 0.71875,
"calib/frac_conf_gt_0.9": 0.054455445544554455,
"calib/gap": 0.004386673012851006,
"calib/mean_conf": 0.3522158415841585,
"calib/mu_c": 0.3563636363636363,
"calib/mu_w": 0.3519769633507853,
"calib/nonempty_final_conf_rate": 0.7890625,
"calib/nonempty_reasoning_rate": 0.96875,
"calib/nonempty_step_conf_rate": 0.93359375,
"calib/pce": 0.31091138613861385,
"calib/std_conf": 0.2945769718143318,
"calib/step_conf_rate": 0.93359375,
"calib/step_q_c": 0.25133333333333335,
"calib/step_q_c_n": 60.0,
"calib/step_q_gap": -0.06902260023937762,
"calib/step_q_w": 0.32035593357271097,
"calib/step_q_w_n": 1114.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2577.0,
"completions/max_terminated_length": 2577.0,
"completions/mean_length": 423.5546875,
"completions/mean_terminated_length": 425.2156982421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.0288,
"grad_norm": 0.010853436775505543,
"learning_rate": 4.805555555555556e-06,
"loss": 0.2049,
"num_tokens": 7983431.0,
"reward": 0.6602060198783875,
"reward_std": 0.41780614852905273,
"rewards/accuracy_reward_step": 0.05078125,
"rewards/final_brier_reward_step": 0.5508807897567749,
"rewards/format_reward_step": 0.71875,
"step": 27
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9501634556800127,
"aux_distill/mean_u": 0.3617846596280876,
"aux_distill/n_active_tok": 118.25,
"calib/answer_extract_rate": 0.8359375,
"calib/auroc": 0.6288407821229051,
"calib/avg_num_step_conf": 3.703125,
"calib/ece": 0.29103288901767604,
"calib/final_conf_rate": 0.79296875,
"calib/format_rate": 0.671875,
"calib/frac_conf_gt_0.9": 0.09359605911330049,
"calib/gap": 0.15580383256654617,
"calib/mean_conf": 0.38630382497826715,
"calib/mu_c": 0.5236875,
"calib/mu_w": 0.3678836674334538,
"calib/nonempty_final_conf_rate": 0.79296875,
"calib/nonempty_reasoning_rate": 0.96484375,
"calib/nonempty_step_conf_rate": 0.90234375,
"calib/pce": 0.2795550565053608,
"calib/std_conf": 0.2957872801567985,
"calib/step_conf_rate": 0.90234375,
"calib/step_q_c": 0.38362744047619046,
"calib/step_q_c_n": 84.0,
"calib/step_q_gap": 0.03731586640211637,
"calib/step_q_w": 0.3463115740740741,
"calib/step_q_w_n": 864.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2702.0,
"completions/max_terminated_length": 2702.0,
"completions/mean_length": 391.390625,
"completions/mean_terminated_length": 394.4724426269531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 15.0,
"epoch": 0.029866666666666666,
"grad_norm": 0.011087479069828987,
"learning_rate": 4.777777777777778e-06,
"loss": 0.1907,
"num_tokens": 8214379.0,
"reward": 0.6477793455123901,
"reward_std": 0.4602370262145996,
"rewards/accuracy_reward_step": 0.10546875,
"rewards/final_brier_reward_step": 0.518215000629425,
"rewards/format_reward_step": 0.671875,
"step": 28
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9244299307465553,
"aux_distill/mean_u": 0.3844669224833983,
"aux_distill/n_active_tok": 126.375,
"calib/answer_extract_rate": 0.89453125,
"calib/auroc": 0.540199530516432,
"calib/avg_num_step_conf": 3.97265625,
"calib/ece": 0.37245397873303165,
"calib/final_conf_rate": 0.86328125,
"calib/format_rate": 0.77734375,
"calib/frac_conf_gt_0.9": 0.08144796380090498,
"calib/gap": 0.02993037887323935,
"calib/mean_conf": 0.4086530737556562,
"calib/mu_c": 0.4375,
"calib/mu_w": 0.40756962112676065,
"calib/nonempty_final_conf_rate": 0.86328125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.94140625,
"calib/pce": 0.37245397873303165,
"calib/std_conf": 0.29742238264250437,
"calib/step_conf_rate": 0.94140625,
"calib/step_q_c": 0.40611111111111114,
"calib/step_q_c_n": 18.0,
"calib/step_q_gap": 0.0017035261261261203,
"calib/step_q_w": 0.404407584984985,
"calib/step_q_w_n": 999.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3017.0,
"completions/max_terminated_length": 3017.0,
"completions/mean_length": 363.75390625,
"completions/mean_terminated_length": 365.180419921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 51.0,
"epoch": 0.030933333333333334,
"grad_norm": 0.01006327848881483,
"learning_rate": 4.75e-06,
"loss": 0.1764,
"num_tokens": 8438436.0,
"reward": 0.6926558017730713,
"reward_std": 0.38272884488105774,
"rewards/accuracy_reward_step": 0.03125,
"rewards/final_brier_reward_step": 0.5767179727554321,
"rewards/format_reward_step": 0.77734375,
"step": 29
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.963107381016016,
"aux_distill/mean_u": 0.35260435378325244,
"aux_distill/n_active_tok": 113.625,
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.4605082417582418,
"calib/avg_num_step_conf": 3.55078125,
"calib/ece": 0.3415881856540084,
"calib/final_conf_rate": 0.92578125,
"calib/format_rate": 0.88671875,
"calib/frac_conf_gt_0.9": 0.04219409282700422,
"calib/gap": -0.04613557692307696,
"calib/mean_conf": 0.38668185654008436,
"calib/mu_c": 0.34307692307692306,
"calib/mu_w": 0.3892125,
"calib/nonempty_final_conf_rate": 0.92578125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.953125,
"calib/pce": 0.33670886075949363,
"calib/std_conf": 0.27919238238117394,
"calib/step_conf_rate": 0.953125,
"calib/step_q_c": 0.4132608695652174,
"calib/step_q_c_n": 46.0,
"calib/step_q_gap": 0.0017828417001836683,
"calib/step_q_w": 0.41147802786503374,
"calib/step_q_w_n": 863.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2993.0,
"completions/max_terminated_length": 2993.0,
"completions/mean_length": 326.765625,
"completions/mean_terminated_length": 326.765625,
"completions/min_length": 1.0,
"completions/min_terminated_length": 1.0,
"epoch": 0.032,
"grad_norm": 0.011806800030171871,
"learning_rate": 4.722222222222222e-06,
"loss": 0.2685,
"num_tokens": 8652880.0,
"reward": 0.8086734414100647,
"reward_std": 0.2870190441608429,
"rewards/accuracy_reward_step": 0.0625,
"rewards/final_brier_reward_step": 0.6681281924247742,
"rewards/format_reward_step": 0.88671875,
"step": 30
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9272862039506435,
"aux_distill/mean_u": 0.33398219030968823,
"aux_distill/n_active_tok": 120.875,
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.5596361185983827,
"calib/avg_num_step_conf": 3.77734375,
"calib/ece": 0.3165919930890386,
"calib/final_conf_rate": 0.91015625,
"calib/format_rate": 0.8984375,
"calib/frac_conf_gt_0.9": 0.07296137339055794,
"calib/gap": 0.046519626643156786,
"calib/mean_conf": 0.40672074845384554,
"calib/mu_c": 0.44904761904761903,
"calib/mu_w": 0.40252799240446224,
"calib/nonempty_final_conf_rate": 0.91015625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.3165919930890386,
"calib/std_conf": 0.2837682946800375,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.48237500000000005,
"calib/step_q_c_n": 80.0,
"calib/step_q_gap": 0.053977691069966305,
"calib/step_q_w": 0.42839730893003375,
"calib/step_q_w_n": 887.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 3014.0,
"completions/max_terminated_length": 3014.0,
"completions/mean_length": 300.38671875,
"completions/mean_terminated_length": 302.751953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 29.0,
"epoch": 0.03306666666666667,
"grad_norm": 0.011283627711236477,
"learning_rate": 4.694444444444445e-06,
"loss": 0.238,
"num_tokens": 8859499.0,
"reward": 0.8242871761322021,
"reward_std": 0.3063260316848755,
"rewards/accuracy_reward_step": 0.08203125,
"rewards/final_brier_reward_step": 0.6681056022644043,
"rewards/format_reward_step": 0.8984375,
"step": 31
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.979875098913908,
"aux_distill/mean_u": 0.366037572053497,
"aux_distill/n_active_tok": 107.5,
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.5059424520433695,
"calib/avg_num_step_conf": 3.359375,
"calib/ece": 0.31686250000000005,
"calib/final_conf_rate": 0.9375,
"calib/format_rate": 0.90234375,
"calib/frac_conf_gt_0.9": 0.058333333333333334,
"calib/gap": 0.006073811509591465,
"calib/mean_conf": 0.3849375,
"calib/mu_c": 0.39045454545454555,
"calib/mu_w": 0.3843807339449541,
"calib/nonempty_final_conf_rate": 0.9375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.97265625,
"calib/pce": 0.3050666666666667,
"calib/std_conf": 0.282678896619026,
"calib/step_conf_rate": 0.97265625,
"calib/step_q_c": 0.4355492537313433,
"calib/step_q_c_n": 67.0,
"calib/step_q_gap": 0.03124660555984271,
"calib/step_q_w": 0.4043026481715006,
"calib/step_q_w_n": 793.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2144.0,
"completions/max_terminated_length": 2144.0,
"completions/mean_length": 267.640625,
"completions/mean_terminated_length": 267.640625,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"epoch": 0.034133333333333335,
"grad_norm": 0.011885612271726131,
"learning_rate": 4.666666666666667e-06,
"loss": 0.113,
"num_tokens": 9058527.0,
"reward": 0.833459734916687,
"reward_std": 0.2733987271785736,
"rewards/accuracy_reward_step": 0.0859375,
"rewards/final_brier_reward_step": 0.678638219833374,
"rewards/format_reward_step": 0.90234375,
"step": 32
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9799461141228676,
"aux_distill/mean_u": 0.37253693561336365,
"aux_distill/n_active_tok": 98.375,
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.5056140350877192,
"calib/avg_num_step_conf": 3.07421875,
"calib/ece": 0.2963055922373021,
"calib/final_conf_rate": 0.953125,
"calib/format_rate": 0.921875,
"calib/frac_conf_gt_0.9": 0.03278688524590164,
"calib/gap": 0.020465030552545727,
"calib/mean_conf": 0.3548127684184357,
"calib/mu_c": 0.37368421052631584,
"calib/mu_w": 0.3532191799737701,
"calib/nonempty_final_conf_rate": 0.953125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.2866247540983607,
"calib/std_conf": 0.25139900802320464,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.5187301587301587,
"calib/step_q_c_n": 63.0,
"calib/step_q_gap": 0.13456536591247914,
"calib/step_q_w": 0.38416479281767957,
"calib/step_q_w_n": 724.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1467.0,
"completions/max_terminated_length": 1467.0,
"completions/mean_length": 244.12890625,
"completions/mean_terminated_length": 244.12890625,
"completions/min_length": 31.0,
"completions/min_terminated_length": 31.0,
"epoch": 0.0352,
"grad_norm": 0.01415384653955698,
"learning_rate": 4.638888888888889e-06,
"loss": 0.1471,
"num_tokens": 9251704.0,
"reward": 0.867424726486206,
"reward_std": 0.2542864680290222,
"rewards/accuracy_reward_step": 0.078125,
"rewards/final_brier_reward_step": 0.7348494529724121,
"rewards/format_reward_step": 0.921875,
"step": 33
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9852313920855522,
"aux_distill/mean_u": 0.362606382446605,
"aux_distill/n_active_tok": 95.25,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5539965986394558,
"calib/avg_num_step_conf": 2.9765625,
"calib/ece": 0.2839067601360692,
"calib/final_conf_rate": 0.95703125,
"calib/format_rate": 0.93359375,
"calib/frac_conf_gt_0.9": 0.02040816326530612,
"calib/gap": 0.06695019538688857,
"calib/mean_conf": 0.34450267850341615,
"calib/mu_c": 0.4057142857142857,
"calib/mu_w": 0.3387640903273971,
"calib/nonempty_final_conf_rate": 0.95703125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.27134757646259977,
"calib/std_conf": 0.24820097732672813,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.3980327868852459,
"calib/step_q_c_n": 61.0,
"calib/step_q_gap": 0.013356609995088975,
"calib/step_q_w": 0.38467617689015693,
"calib/step_q_w_n": 701.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2447.0,
"completions/max_terminated_length": 2447.0,
"completions/mean_length": 223.28125,
"completions/mean_terminated_length": 223.28125,
"completions/min_length": 10.0,
"completions/min_terminated_length": 10.0,
"epoch": 0.03626666666666667,
"grad_norm": 0.013528089970350266,
"learning_rate": 4.611111111111112e-06,
"loss": 0.1617,
"num_tokens": 9437784.0,
"reward": 0.8848718404769897,
"reward_std": 0.24772733449935913,
"rewards/accuracy_reward_step": 0.08203125,
"rewards/final_brier_reward_step": 0.7541186809539795,
"rewards/format_reward_step": 0.93359375,
"step": 34
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9460540004074574,
"aux_distill/mean_u": 0.39174463892047273,
"aux_distill/n_active_tok": 95.5,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.4527986633249791,
"calib/avg_num_step_conf": 2.984375,
"calib/ece": 0.23544979919678713,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.01606425702811245,
"calib/gap": -0.04850814536340847,
"calib/mean_conf": 0.3087028112449799,
"calib/mu_c": 0.2642857142857143,
"calib/mu_w": 0.31279385964912276,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.22990763052208835,
"calib/std_conf": 0.2189346054831001,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.3818421052631578,
"calib/step_q_c_n": 57.0,
"calib/step_q_gap": 0.024514948261743386,
"calib/step_q_w": 0.35732715700141443,
"calib/step_q_w_n": 707.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1673.0,
"completions/max_terminated_length": 1673.0,
"completions/mean_length": 215.12890625,
"completions/mean_terminated_length": 215.12890625,
"completions/min_length": 70.0,
"completions/min_terminated_length": 70.0,
"epoch": 0.037333333333333336,
"grad_norm": 0.012646835297346115,
"learning_rate": 4.583333333333333e-06,
"loss": 0.1052,
"num_tokens": 9625921.0,
"reward": 0.9041399955749512,
"reward_std": 0.18901212513446808,
"rewards/accuracy_reward_step": 0.08203125,
"rewards/final_brier_reward_step": 0.7770299911499023,
"rewards/format_reward_step": 0.94921875,
"step": 35
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9608928207308054,
"aux_distill/mean_u": 0.3588672235356567,
"aux_distill/n_active_tok": 91.25,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4842995169082125,
"calib/avg_num_step_conf": 2.8515625,
"calib/ece": 0.1956403162055336,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.011857707509881422,
"calib/gap": -0.013333333333333308,
"calib/mean_conf": 0.29721343873517786,
"calib/mu_c": 0.286304347826087,
"calib/mu_w": 0.2996376811594203,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.15551778656126483,
"calib/std_conf": 0.22247753783059368,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.36039370078740157,
"calib/step_q_c_n": 127.0,
"calib/step_q_gap": 0.017015591334665237,
"calib/step_q_w": 0.34337810945273634,
"calib/step_q_w_n": 603.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 752.0,
"completions/max_terminated_length": 752.0,
"completions/mean_length": 185.0703125,
"completions/mean_terminated_length": 185.7960968017578,
"completions/min_length": 0.0,
"completions/min_terminated_length": 62.0,
"epoch": 0.0384,
"grad_norm": 0.01309342123568058,
"learning_rate": 4.555555555555556e-06,
"loss": 0.1108,
"num_tokens": 9799819.0,
"reward": 0.9677448272705078,
"reward_std": 0.16143733263015747,
"rewards/accuracy_reward_step": 0.1796875,
"rewards/final_brier_reward_step": 0.7714272737503052,
"rewards/format_reward_step": 0.984375,
"step": 36
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9541783407330513,
"aux_distill/mean_u": 0.3490026414154334,
"aux_distill/n_active_tok": 93.0,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5971202710333146,
"calib/avg_num_step_conf": 2.9140625,
"calib/ece": 0.19826377952755905,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.007874015748031496,
"calib/gap": 0.08735610766045548,
"calib/mean_conf": 0.2888149606299213,
"calib/mu_c": 0.3682608695652174,
"calib/mu_w": 0.28090476190476193,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.19826377952755905,
"calib/std_conf": 0.21542403070346172,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.37949152542372877,
"calib/step_q_c_n": 59.0,
"calib/step_q_gap": 0.046236794710482754,
"calib/step_q_w": 0.333254730713246,
"calib/step_q_w_n": 687.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 519.0,
"completions/max_terminated_length": 519.0,
"completions/mean_length": 186.54296875,
"completions/mean_terminated_length": 187.27452087402344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 67.0,
"epoch": 0.039466666666666664,
"grad_norm": 0.01288458239287138,
"learning_rate": 4.527777777777778e-06,
"loss": 0.1158,
"num_tokens": 9978478.0,
"reward": 0.9314919710159302,
"reward_std": 0.1761791706085205,
"rewards/accuracy_reward_step": 0.08984375,
"rewards/final_brier_reward_step": 0.8122028112411499,
"rewards/format_reward_step": 0.9609375,
"step": 37
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9978272467851639,
"aux_distill/mean_u": 0.41385125474943524,
"aux_distill/n_active_tok": 93.125,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5765827093260721,
"calib/avg_num_step_conf": 2.91796875,
"calib/ece": 0.14998690476190477,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.003968253968253968,
"calib/gap": 0.019804935330156537,
"calib/mean_conf": 0.25316150793650793,
"calib/mu_c": 0.27092307692307693,
"calib/mu_w": 0.2511181415929204,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.14998690476190477,
"calib/std_conf": 0.20493503148430275,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.3125,
"calib/step_q_c_n": 72.0,
"calib/step_q_gap": 0.02171377777777772,
"calib/step_q_w": 0.2907862222222223,
"calib/step_q_w_n": 675.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 542.0,
"completions/max_terminated_length": 542.0,
"completions/mean_length": 180.4921875,
"completions/mean_terminated_length": 181.20001220703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 71.0,
"epoch": 0.04053333333333333,
"grad_norm": 0.013265673071146011,
"learning_rate": 4.5e-06,
"loss": 0.0651,
"num_tokens": 10155380.0,
"reward": 0.9324043989181519,
"reward_std": 0.16627129912376404,
"rewards/accuracy_reward_step": 0.10546875,
"rewards/final_brier_reward_step": 0.8062150478363037,
"rewards/format_reward_step": 0.953125,
"step": 38
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9659969937056303,
"aux_distill/mean_u": 0.3698160120751096,
"aux_distill/n_active_tok": 103.5,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4778632478632479,
"calib/avg_num_step_conf": 3.234375,
"calib/ece": 0.1762390438247012,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.015045470085470053,
"calib/mean_conf": 0.23882071713147413,
"calib/mu_c": 0.2523076923076923,
"calib/mu_w": 0.23726222222222224,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.15573705179282873,
"calib/std_conf": 0.1808239985600988,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.3414432989690722,
"calib/step_q_c_n": 97.0,
"calib/step_q_gap": 0.04963755341503667,
"calib/step_q_w": 0.29180574555403554,
"calib/step_q_w_n": 731.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 559.0,
"completions/max_terminated_length": 559.0,
"completions/mean_length": 194.640625,
"completions/mean_terminated_length": 195.4039306640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 55.0,
"epoch": 0.0416,
"grad_norm": 0.011580703780055046,
"learning_rate": 4.472222222222223e-06,
"loss": 0.0894,
"num_tokens": 10335104.0,
"reward": 0.9487248063087463,
"reward_std": 0.14561089873313904,
"rewards/accuracy_reward_step": 0.10546875,
"rewards/final_brier_reward_step": 0.8271371126174927,
"rewards/format_reward_step": 0.96484375,
"step": 39
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9587426632642746,
"aux_distill/mean_u": 0.3633853945422882,
"aux_distill/n_active_tok": 110.125,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4965065502183406,
"calib/avg_num_step_conf": 3.45703125,
"calib/ece": 0.15555118110236218,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.007874015748031496,
"calib/gap": 0.0032855895196506613,
"calib/mean_conf": 0.22263779527559055,
"calib/mu_c": 0.2256,
"calib/mu_w": 0.22231441048034933,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.13988188976377955,
"calib/std_conf": 0.1837077947369346,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.31063829787234043,
"calib/step_q_c_n": 94.0,
"calib/step_q_gap": 0.024195819996234214,
"calib/step_q_w": 0.2864424778761062,
"calib/step_q_w_n": 791.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 507.0,
"completions/max_terminated_length": 507.0,
"completions/mean_length": 197.40625,
"completions/mean_terminated_length": 198.18040466308594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 48.0,
"epoch": 0.042666666666666665,
"grad_norm": 0.012859523296356201,
"learning_rate": 4.444444444444444e-06,
"loss": 0.1088,
"num_tokens": 10516208.0,
"reward": 0.9728861451148987,
"reward_std": 0.09098789095878601,
"rewards/accuracy_reward_step": 0.09765625,
"rewards/final_brier_reward_step": 0.8559284806251526,
"rewards/format_reward_step": 0.9921875,
"step": 40
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9850291721522808,
"aux_distill/mean_u": 0.3810529126961046,
"aux_distill/n_active_tok": 105.75,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5601572598018095,
"calib/avg_num_step_conf": 3.3046875,
"calib/ece": 0.1174117683764826,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.00392156862745098,
"calib/gap": 0.02264431306585027,
"calib/mean_conf": 0.22149019240783113,
"calib/mu_c": 0.24022725145447588,
"calib/mu_w": 0.2175829383886256,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0831764705882353,
"calib/std_conf": 0.1776340592509435,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3362070422535211,
"calib/step_q_c_n": 142.0,
"calib/step_q_gap": 0.05128942861715746,
"calib/step_q_w": 0.28491761363636364,
"calib/step_q_w_n": 704.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 437.0,
"completions/max_terminated_length": 437.0,
"completions/mean_length": 182.26171875,
"completions/mean_terminated_length": 182.9764862060547,
"completions/min_length": 0.0,
"completions/min_terminated_length": 60.0,
"epoch": 0.04373333333333333,
"grad_norm": 0.01246651541441679,
"learning_rate": 4.416666666666667e-06,
"loss": 0.0824,
"num_tokens": 10693923.0,
"reward": 0.9991874694824219,
"reward_std": 0.10035333037376404,
"rewards/accuracy_reward_step": 0.17578125,
"rewards/final_brier_reward_step": 0.8264999985694885,
"rewards/format_reward_step": 0.99609375,
"step": 41
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9697458352893591,
"aux_distill/mean_u": 0.39303880177944706,
"aux_distill/n_active_tok": 110.625,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5225159079784629,
"calib/avg_num_step_conf": 3.45703125,
"calib/ece": 0.13940944881889764,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.011210637950726021,
"calib/mean_conf": 0.2240551181102362,
"calib/mu_c": 0.23407407407407405,
"calib/mu_w": 0.22286343612334802,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.12858267716535435,
"calib/std_conf": 0.15931599892407236,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2636585365853659,
"calib/step_q_c_n": 82.0,
"calib/step_q_gap": -0.009936731160586743,
"calib/step_q_w": 0.27359526774595266,
"calib/step_q_w_n": 803.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1751.0,
"completions/max_terminated_length": 1751.0,
"completions/mean_length": 197.6328125,
"completions/mean_terminated_length": 198.40785217285156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 63.0,
"epoch": 0.0448,
"grad_norm": 0.011651011183857918,
"learning_rate": 4.388888888888889e-06,
"loss": 0.0682,
"num_tokens": 10872693.0,
"reward": 0.9721810817718506,
"reward_std": 0.10417618602514267,
"rewards/accuracy_reward_step": 0.10546875,
"rewards/final_brier_reward_step": 0.8545183539390564,
"rewards/format_reward_step": 0.984375,
"step": 42
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9164891820400953,
"aux_distill/mean_u": 0.3497377000059944,
"aux_distill/n_active_tok": 127.25,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4870480302212628,
"calib/avg_num_step_conf": 3.9765625,
"calib/ece": 0.16513095238095243,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.011904761904761904,
"calib/gap": 0.008839449541284405,
"calib/mean_conf": 0.2123531746031746,
"calib/mu_c": 0.22,
"calib/mu_w": 0.2111605504587156,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.12128174603174603,
"calib/std_conf": 0.1834661160647659,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2771200000000001,
"calib/step_q_c_n": 125.0,
"calib/step_q_gap": -0.01632882418812981,
"calib/step_q_w": 0.2934488241881299,
"calib/step_q_w_n": 893.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2723.0,
"completions/max_terminated_length": 2723.0,
"completions/mean_length": 206.765625,
"completions/mean_terminated_length": 207.57647705078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 54.0,
"epoch": 0.04586666666666667,
"grad_norm": 0.011534487828612328,
"learning_rate": 4.361111111111112e-06,
"loss": 0.1498,
"num_tokens": 11054657.0,
"reward": 0.9709699153900146,
"reward_std": 0.12181063741445541,
"rewards/accuracy_reward_step": 0.1328125,
"rewards/final_brier_reward_step": 0.8286584615707397,
"rewards/format_reward_step": 0.98046875,
"step": 43
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9449839890003204,
"aux_distill/mean_u": 0.37906744113113067,
"aux_distill/n_active_tok": 125.875,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5441608312627061,
"calib/avg_num_step_conf": 3.93359375,
"calib/ece": 0.1222142857142857,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.004717415857239643,
"calib/mean_conf": 0.18826984126984128,
"calib/mu_c": 0.19263157894736843,
"calib/mu_w": 0.1879141630901288,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.1175436507936508,
"calib/std_conf": 0.1649879222466796,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.33276923076923076,
"calib/step_q_c_n": 65.0,
"calib/step_q_gap": 0.053753307202351774,
"calib/step_q_w": 0.279015923566879,
"calib/step_q_w_n": 942.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 551.0,
"completions/max_terminated_length": 551.0,
"completions/mean_length": 198.57421875,
"completions/mean_terminated_length": 200.13780212402344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 40.0,
"epoch": 0.046933333333333334,
"grad_norm": 0.012412100099027157,
"learning_rate": 4.333333333333334e-06,
"loss": 0.0443,
"num_tokens": 11235620.0,
"reward": 0.9523601531982422,
"reward_std": 0.13291436433792114,
"rewards/accuracy_reward_step": 0.07421875,
"rewards/final_brier_reward_step": 0.8617515563964844,
"rewards/format_reward_step": 0.96875,
"step": 44
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9304977301508188,
"aux_distill/mean_u": 0.33005481343618503,
"aux_distill/n_active_tok": 125.375,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.49855861627162074,
"calib/avg_num_step_conf": 3.91796875,
"calib/ece": 0.1155418326693227,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0063657911595131345,
"calib/mean_conf": 0.18422709163346612,
"calib/mu_c": 0.17857142857142858,
"calib/mu_w": 0.1849372197309417,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.09410756972111552,
"calib/std_conf": 0.15502340211174045,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2783673469387756,
"calib/step_q_c_n": 98.0,
"calib/step_q_gap": -0.016820498365091807,
"calib/step_q_w": 0.2951878453038674,
"calib/step_q_w_n": 905.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2175.0,
"completions/max_terminated_length": 2175.0,
"completions/mean_length": 212.3984375,
"completions/mean_terminated_length": 212.3984375,
"completions/min_length": 60.0,
"completions/min_terminated_length": 60.0,
"epoch": 0.048,
"grad_norm": 0.010993365198373795,
"learning_rate": 4.305555555555556e-06,
"loss": 0.1596,
"num_tokens": 11418850.0,
"reward": 0.9715802073478699,
"reward_std": 0.10800088942050934,
"rewards/accuracy_reward_step": 0.109375,
"rewards/final_brier_reward_step": 0.8533166646957397,
"rewards/format_reward_step": 0.98046875,
"step": 45
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9679493922740221,
"aux_distill/mean_u": 0.40249134664638886,
"aux_distill/n_active_tok": 144.25,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.477030529953917,
"calib/avg_num_step_conf": 4.5390625,
"calib/ece": 0.12313725490196079,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.00392156862745098,
"calib/gap": -0.002782258064516152,
"calib/mean_conf": 0.17341176470588235,
"calib/mu_c": 0.17096774193548386,
"calib/mu_w": 0.17375000000000002,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.08749019607843136,
"calib/std_conf": 0.15773807732460254,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.22230769230769235,
"calib/step_q_c_n": 104.0,
"calib/step_q_gap": -0.04604816780572921,
"calib/step_q_w": 0.26835586011342155,
"calib/step_q_w_n": 1058.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 736.0,
"completions/max_terminated_length": 736.0,
"completions/mean_length": 213.00390625,
"completions/mean_terminated_length": 213.8392333984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 46.0,
"epoch": 0.04906666666666667,
"grad_norm": 0.010834389366209507,
"learning_rate": 4.277777777777778e-06,
"loss": 0.1079,
"num_tokens": 11601955.0,
"reward": 0.9894277453422546,
"reward_std": 0.0719679743051529,
"rewards/accuracy_reward_step": 0.12109375,
"rewards/final_brier_reward_step": 0.8616679906845093,
"rewards/format_reward_step": 0.99609375,
"step": 46
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9069111328572035,
"aux_distill/mean_u": 0.4040764168674257,
"aux_distill/n_active_tok": 168.125,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5417624521072797,
"calib/avg_num_step_conf": 5.25390625,
"calib/ece": 0.11180708661417323,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.002361839080459782,
"calib/mean_conf": 0.16756299212598424,
"calib/mu_c": 0.16965517241379313,
"calib/mu_w": 0.16729333333333335,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.08259842519685039,
"calib/std_conf": 0.16273949634242862,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.24847328244274808,
"calib/step_q_c_n": 131.0,
"calib/step_q_gap": -0.0478842134386358,
"calib/step_q_w": 0.2963574958813839,
"calib/step_q_w_n": 1214.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1203.0,
"completions/max_terminated_length": 1203.0,
"completions/mean_length": 240.28515625,
"completions/mean_terminated_length": 241.2274627685547,
"completions/min_length": 0.0,
"completions/min_terminated_length": 63.0,
"epoch": 0.050133333333333335,
"grad_norm": 0.010445504449307919,
"learning_rate": 4.25e-06,
"loss": 0.0842,
"num_tokens": 11793252.0,
"reward": 0.9773474335670471,
"reward_std": 0.10240879654884338,
"rewards/accuracy_reward_step": 0.11328125,
"rewards/final_brier_reward_step": 0.8570386171340942,
"rewards/format_reward_step": 0.984375,
"step": 47
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9124491214752197,
"aux_distill/mean_u": 0.3623996561858781,
"aux_distill/n_active_tok": 172.25,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4587806985067259,
"calib/avg_num_step_conf": 5.39453125,
"calib/ece": 0.12156250000000002,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.00390625,
"calib/gap": -0.014392200419597684,
"calib/mean_conf": 0.139609375,
"calib/mu_c": 0.1272972972972973,
"calib/mu_w": 0.14168949771689499,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.05832031249999999,
"calib/std_conf": 0.1444380400452366,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.30833333333333335,
"calib/step_q_c_n": 144.0,
"calib/step_q_gap": 0.04539073026138507,
"calib/step_q_w": 0.2629426030719483,
"calib/step_q_w_n": 1237.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 734.0,
"completions/max_terminated_length": 734.0,
"completions/mean_length": 249.046875,
"completions/mean_terminated_length": 250.02354431152344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 66.0,
"epoch": 0.0512,
"grad_norm": 0.010337688960134983,
"learning_rate": 4.222222222222223e-06,
"loss": 0.1028,
"num_tokens": 11984504.0,
"reward": 0.9982218742370605,
"reward_std": 0.05581098794937134,
"rewards/accuracy_reward_step": 0.14453125,
"rewards/final_brier_reward_step": 0.8519124984741211,
"rewards/format_reward_step": 1.0,
"step": 48
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9039432909339666,
"aux_distill/mean_u": 0.39276995387199315,
"aux_distill/n_active_tok": 187.875,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4854655148772796,
"calib/avg_num_step_conf": 5.99609375,
"calib/ece": 0.10531102362204726,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.003937007874015748,
"calib/gap": -0.017046894282188363,
"calib/mean_conf": 0.1284685039370079,
"calib/mu_c": 0.11363636363636365,
"calib/mu_w": 0.130683257918552,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.05192913385826772,
"calib/std_conf": 0.1368993717457314,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.27117647058823524,
"calib/step_q_c_n": 187.0,
"calib/step_q_gap": -0.011019375109094176,
"calib/step_q_w": 0.2821958456973294,
"calib/step_q_w_n": 1348.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1228.0,
"completions/max_terminated_length": 1228.0,
"completions/mean_length": 258.82421875,
"completions/mean_terminated_length": 259.8392333984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 78.0,
"epoch": 0.05226666666666667,
"grad_norm": 0.009132636711001396,
"learning_rate": 4.194444444444445e-06,
"loss": 0.0703,
"num_tokens": 12179107.0,
"reward": 0.9893507957458496,
"reward_std": 0.06767857074737549,
"rewards/accuracy_reward_step": 0.12890625,
"rewards/final_brier_reward_step": 0.8576078414916992,
"rewards/format_reward_step": 0.9921875,
"step": 49
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8850006274878979,
"aux_distill/mean_u": 0.3797831229916548,
"aux_distill/n_active_tok": 185.375,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.6007246376811595,
"calib/avg_num_step_conf": 5.79296875,
"calib/ece": 0.06984251968503936,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.03944202898550725,
"calib/mean_conf": 0.11511811023622047,
"calib/mu_c": 0.15083333333333335,
"calib/mu_w": 0.1113913043478261,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.04523622047244095,
"calib/std_conf": 0.12903253658977257,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.33408333333333334,
"calib/step_q_c_n": 120.0,
"calib/step_q_gap": 0.04620365615064809,
"calib/step_q_w": 0.28787967718268526,
"calib/step_q_w_n": 1363.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2113.0,
"completions/max_terminated_length": 2113.0,
"completions/mean_length": 270.01953125,
"completions/mean_terminated_length": 270.01953125,
"completions/min_length": 77.0,
"completions/min_terminated_length": 77.0,
"epoch": 0.05333333333333334,
"grad_norm": 0.01040023472160101,
"learning_rate": 4.166666666666667e-06,
"loss": 0.1518,
"num_tokens": 12377400.0,
"reward": 0.9875878691673279,
"reward_std": 0.07120494544506073,
"rewards/accuracy_reward_step": 0.09375,
"rewards/final_brier_reward_step": 0.8931445479393005,
"rewards/format_reward_step": 0.98828125,
"step": 50
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9002226144075394,
"aux_distill/mean_u": 0.33791306730053283,
"aux_distill/n_active_tok": 180.5,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.47374512670565305,
"calib/avg_num_step_conf": 5.640625,
"calib/ece": 0.10523622047244093,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0018153021442495337,
"calib/mean_conf": 0.09838582677165356,
"calib/mu_c": 0.09684210526315788,
"calib/mu_w": 0.09865740740740742,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.02700787401574803,
"calib/std_conf": 0.10150517206822475,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.2531891891891892,
"calib/step_q_c_n": 185.0,
"calib/step_q_gap": -0.03856299508404354,
"calib/step_q_w": 0.29175218427323274,
"calib/step_q_w_n": 1259.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 607.0,
"completions/max_terminated_length": 607.0,
"completions/mean_length": 250.42578125,
"completions/mean_terminated_length": 251.40785217285156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 84.0,
"epoch": 0.0544,
"grad_norm": 0.0104011669754982,
"learning_rate": 4.138888888888889e-06,
"loss": 0.1156,
"num_tokens": 12574613.0,
"reward": 0.9927429556846619,
"reward_std": 0.06316731870174408,
"rewards/accuracy_reward_step": 0.1484375,
"rewards/final_brier_reward_step": 0.8487671613693237,
"rewards/format_reward_step": 0.98828125,
"step": 51
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9108906034380198,
"aux_distill/mean_u": 0.37710724738107515,
"aux_distill/n_active_tok": 184.0,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4271367521367522,
"calib/avg_num_step_conf": 5.75,
"calib/ece": 0.06745669291338584,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.01048290598290598,
"calib/mean_conf": 0.07915748031496064,
"calib/mu_c": 0.0695,
"calib/mu_w": 0.07998290598290599,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.03393700787401575,
"calib/std_conf": 0.08907043352922596,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.1876635514018692,
"calib/step_q_c_n": 107.0,
"calib/step_q_gap": -0.08009469035637257,
"calib/step_q_w": 0.26775824175824176,
"calib/step_q_w_n": 1365.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1690.0,
"completions/max_terminated_length": 1690.0,
"completions/mean_length": 264.8359375,
"completions/mean_terminated_length": 264.8359375,
"completions/min_length": 69.0,
"completions/min_terminated_length": 69.0,
"epoch": 0.055466666666666664,
"grad_norm": 0.00995530467480421,
"learning_rate": 4.111111111111111e-06,
"loss": 0.1265,
"num_tokens": 12774171.0,
"reward": 0.9905729293823242,
"reward_std": 0.04434080421924591,
"rewards/accuracy_reward_step": 0.078125,
"rewards/final_brier_reward_step": 0.9108333587646484,
"rewards/format_reward_step": 0.9921875,
"step": 52
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9225151222199202,
"aux_distill/mean_u": 0.37064661099430596,
"aux_distill/n_active_tok": 182.5,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4493873811977556,
"calib/avg_num_step_conf": 5.74609375,
"calib/ece": 0.11933070866141732,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.006633459292339397,
"calib/mean_conf": 0.0601968503937008,
"calib/mu_c": 0.05463414634146342,
"calib/mu_w": 0.06126760563380282,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.009055118110236224,
"calib/std_conf": 0.07679839025647278,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.23280612244897958,
"calib/step_q_c_n": 196.0,
"calib/step_q_gap": -0.060803132452981246,
"calib/step_q_w": 0.2936092549019608,
"calib/step_q_w_n": 1275.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 720.0,
"completions/max_terminated_length": 720.0,
"completions/mean_length": 256.7265625,
"completions/mean_terminated_length": 257.73333740234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 82.0,
"epoch": 0.05653333333333333,
"grad_norm": 0.011194716207683086,
"learning_rate": 4.083333333333334e-06,
"loss": 0.1016,
"num_tokens": 12969525.0,
"reward": 0.9962138533592224,
"reward_std": 0.04443935677409172,
"rewards/accuracy_reward_step": 0.16015625,
"rewards/final_brier_reward_step": 0.8400839567184448,
"rewards/format_reward_step": 0.9921875,
"step": 53
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8998712226748466,
"aux_distill/mean_u": 0.32628339549880664,
"aux_distill/n_active_tok": 176.5,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.46494611457742485,
"calib/avg_num_step_conf": 5.54296875,
"calib/ece": 0.13329296875000002,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.01152546795235393,
"calib/mean_conf": 0.05358203125000001,
"calib/mu_c": 0.04390243902439025,
"calib/mu_w": 0.05542790697674418,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.013359375,
"calib/std_conf": 0.07227076575887392,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2622346368715084,
"calib/step_q_c_n": 179.0,
"calib/step_q_gap": -0.048313750225265795,
"calib/step_q_w": 0.3105483870967742,
"calib/step_q_w_n": 1240.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 651.0,
"completions/max_terminated_length": 651.0,
"completions/mean_length": 247.37890625,
"completions/mean_terminated_length": 248.34902954101562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 66.0,
"epoch": 0.0576,
"grad_norm": 0.011143301613628864,
"learning_rate": 4.055555555555556e-06,
"loss": 0.1154,
"num_tokens": 13162894.0,
"reward": 1.0029842853546143,
"reward_std": 0.01903366856276989,
"rewards/accuracy_reward_step": 0.16015625,
"rewards/final_brier_reward_step": 0.8458121418952942,
"rewards/format_reward_step": 1.0,
"step": 54
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8807304184883833,
"aux_distill/mean_u": 0.37027624704653356,
"aux_distill/n_active_tok": 185.125,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.47440860215053765,
"calib/avg_num_step_conf": 5.83984375,
"calib/ece": 0.09199218749999999,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.005567025089605729,
"calib/mean_conf": 0.0445703125,
"calib/mu_c": 0.03967741935483871,
"calib/mu_w": 0.04524444444444444,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.007734375000000001,
"calib/std_conf": 0.05816707933747701,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2614261744966443,
"calib/step_q_c_n": 149.0,
"calib/step_q_gap": -0.03416520737556966,
"calib/step_q_w": 0.29559138187221395,
"calib/step_q_w_n": 1346.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 632.0,
"completions/max_terminated_length": 632.0,
"completions/mean_length": 252.5703125,
"completions/mean_terminated_length": 253.56080627441406,
"completions/min_length": 0.0,
"completions/min_terminated_length": 73.0,
"epoch": 0.058666666666666666,
"grad_norm": 0.012026661075651646,
"learning_rate": 4.027777777777779e-06,
"loss": 0.1282,
"num_tokens": 13359184.0,
"reward": 0.9982151985168457,
"reward_std": 0.024000566452741623,
"rewards/accuracy_reward_step": 0.12109375,
"rewards/final_brier_reward_step": 0.879243016242981,
"rewards/format_reward_step": 0.99609375,
"step": 55
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9121517166495323,
"aux_distill/mean_u": 0.37407740327435157,
"aux_distill/n_active_tok": 160.625,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5019558214450068,
"calib/avg_num_step_conf": 5.0625,
"calib/ece": 0.148300395256917,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.01022434422457432,
"calib/mean_conf": 0.03320158102766799,
"calib/mu_c": 0.024634146341463416,
"calib/mu_w": 0.034858490566037736,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.009723320158102768,
"calib/std_conf": 0.07660552202945613,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.297914691943128,
"calib/step_q_c_n": 211.0,
"calib/step_q_gap": -0.015171022342586271,
"calib/step_q_w": 0.31308571428571424,
"calib/step_q_w_n": 1085.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 559.0,
"completions/max_terminated_length": 559.0,
"completions/mean_length": 234.5703125,
"completions/mean_terminated_length": 235.49020385742188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 56.0,
"epoch": 0.05973333333333333,
"grad_norm": 0.011675729416310787,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0984,
"num_tokens": 13549882.0,
"reward": 0.9887820482254028,
"reward_std": 0.04803510010242462,
"rewards/accuracy_reward_step": 0.16015625,
"rewards/final_brier_reward_step": 0.8291265368461609,
"rewards/format_reward_step": 0.98828125,
"step": 56
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8941332511603832,
"aux_distill/mean_u": 0.34353156945045377,
"aux_distill/n_active_tok": 161.125,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.47129928093230844,
"calib/avg_num_step_conf": 5.03515625,
"calib/ece": 0.12662745098039216,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0010562856434416061,
"calib/mean_conf": 0.01847058823529412,
"calib/mu_c": 0.017567567567567572,
"calib/mu_w": 0.018623853211009178,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.021337946444714605,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.24946524064171127,
"calib/step_q_c_n": 187.0,
"calib/step_q_gap": -0.02679610237099289,
"calib/step_q_w": 0.27626134301270416,
"calib/step_q_w_n": 1102.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1091.0,
"completions/max_terminated_length": 1091.0,
"completions/mean_length": 234.6171875,
"completions/mean_terminated_length": 235.53726196289062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 68.0,
"epoch": 0.0608,
"grad_norm": 0.009379012510180473,
"learning_rate": 3.972222222222223e-06,
"loss": 0.0756,
"num_tokens": 13740544.0,
"reward": 0.9982361793518066,
"reward_std": 0.017334245145320892,
"rewards/accuracy_reward_step": 0.14453125,
"rewards/final_brier_reward_step": 0.8558472394943237,
"rewards/format_reward_step": 0.99609375,
"step": 57
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9004692044109106,
"aux_distill/mean_u": 0.3421467995106455,
"aux_distill/n_active_tok": 145.75,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5516298153635742,
"calib/avg_num_step_conf": 4.5546875,
"calib/ece": 0.14568627450980393,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.006524960109414179,
"calib/mean_conf": 0.017450980392156864,
"calib/mu_c": 0.022926829268292686,
"calib/mu_w": 0.016401869158878507,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.001176470588235294,
"calib/std_conf": 0.029786372542404516,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.302636815920398,
"calib/step_q_c_n": 201.0,
"calib/step_q_gap": -0.01488650014177817,
"calib/step_q_w": 0.31752331606217615,
"calib/step_q_w_n": 965.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1124.0,
"completions/max_terminated_length": 1124.0,
"completions/mean_length": 220.375,
"completions/mean_terminated_length": 221.23922729492188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 66.0,
"epoch": 0.06186666666666667,
"grad_norm": 0.010609041899442673,
"learning_rate": 3.944444444444445e-06,
"loss": 0.1015,
"num_tokens": 13927088.0,
"reward": 0.9991719722747803,
"reward_std": 0.01902610808610916,
"rewards/accuracy_reward_step": 0.16015625,
"rewards/final_brier_reward_step": 0.8420941829681396,
"rewards/format_reward_step": 0.99609375,
"step": 58
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8911302741616964,
"aux_distill/mean_u": 0.33356761779633115,
"aux_distill/n_active_tok": 145.875,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4503644939965694,
"calib/avg_num_step_conf": 4.5625,
"calib/ece": 0.1625390625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.002714408233276157,
"calib/mean_conf": 0.0129296875,
"calib/mu_c": 0.010681818181818183,
"calib/mu_w": 0.01339622641509434,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.001796875,
"calib/std_conf": 0.0249767653260454,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2742857142857143,
"calib/step_q_c_n": 182.0,
"calib/step_q_gap": -0.03988264271225728,
"calib/step_q_w": 0.3141683569979716,
"calib/step_q_w_n": 986.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 543.0,
"completions/max_terminated_length": 543.0,
"completions/mean_length": 214.80078125,
"completions/mean_terminated_length": 215.6431427001953,
"completions/min_length": 0.0,
"completions/min_terminated_length": 56.0,
"epoch": 0.06293333333333333,
"grad_norm": 0.008717049844563007,
"learning_rate": 3.916666666666667e-06,
"loss": 0.0844,
"num_tokens": 14112133.0,
"reward": 1.001440405845642,
"reward_std": 0.004281938541680574,
"rewards/accuracy_reward_step": 0.171875,
"rewards/final_brier_reward_step": 0.831005871295929,
"rewards/format_reward_step": 1.0,
"step": 59
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8966669980436563,
"aux_distill/mean_u": 0.32837913568838606,
"aux_distill/n_active_tok": 151.25,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5467383512544803,
"calib/avg_num_step_conf": 4.76953125,
"calib/ece": 0.111015625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00045591397849462263,
"calib/mean_conf": 0.010078125,
"calib/mu_c": 0.009677419354838712,
"calib/mu_w": 0.010133333333333334,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.01484272200387702,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2873717948717949,
"calib/step_q_c_n": 156.0,
"calib/step_q_gap": -0.07487383893102206,
"calib/step_q_w": 0.36224563380281694,
"calib/step_q_w_n": 1065.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 612.0,
"completions/max_terminated_length": 612.0,
"completions/mean_length": 211.99609375,
"completions/mean_terminated_length": 212.8274688720703,
"completions/min_length": 0.0,
"completions/min_terminated_length": 54.0,
"epoch": 0.064,
"grad_norm": 0.008605522103607655,
"learning_rate": 3.88888888888889e-06,
"loss": 0.0757,
"num_tokens": 14299068.0,
"reward": 1.0010108947753906,
"reward_std": 0.0022478175815194845,
"rewards/accuracy_reward_step": 0.12109375,
"rewards/final_brier_reward_step": 0.8809281587600708,
"rewards/format_reward_step": 1.0,
"step": 60
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9263600539416075,
"aux_distill/mean_u": 0.31786048663733524,
"aux_distill/n_active_tok": 124.625,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.47050865800865793,
"calib/avg_num_step_conf": 3.89453125,
"calib/ece": 0.1665748031496063,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.001725108225108224,
"calib/mean_conf": 0.0066535433070866136,
"calib/mu_c": 0.005227272727272728,
"calib/mu_w": 0.006952380952380952,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.010320744306275885,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.29112499999999997,
"calib/step_q_c_n": 160.0,
"calib/step_q_gap": -0.026999253285543645,
"calib/step_q_w": 0.3181242532855436,
"calib/step_q_w_n": 837.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3006.0,
"completions/max_terminated_length": 3006.0,
"completions/mean_length": 211.0234375,
"completions/mean_terminated_length": 211.0234375,
"completions/min_length": 44.0,
"completions/min_terminated_length": 44.0,
"epoch": 0.06506666666666666,
"grad_norm": 0.008169060572981834,
"learning_rate": 3.861111111111112e-06,
"loss": 0.1759,
"num_tokens": 14480962.0,
"reward": 0.9930111169815063,
"reward_std": 0.024283651262521744,
"rewards/accuracy_reward_step": 0.171875,
"rewards/final_brier_reward_step": 0.8219597339630127,
"rewards/format_reward_step": 0.9921875,
"step": 61
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9069723356515169,
"aux_distill/mean_u": 0.33958075347316097,
"aux_distill/n_active_tok": 129.625,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5061648745519713,
"calib/avg_num_step_conf": 4.06640625,
"calib/ece": 0.11655078125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00039756272401433694,
"calib/mean_conf": 0.00454296875,
"calib/mu_c": 0.004193548387096774,
"calib/mu_w": 0.004591111111111111,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.006994843900082648,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.24023076923076922,
"calib/step_q_c_n": 130.0,
"calib/step_q_gap": -0.04175715612598155,
"calib/step_q_w": 0.28198792535675077,
"calib/step_q_w_n": 911.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 449.0,
"completions/max_terminated_length": 449.0,
"completions/mean_length": 193.44140625,
"completions/mean_terminated_length": 194.20001220703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 56.0,
"epoch": 0.06613333333333334,
"grad_norm": 0.006717332173138857,
"learning_rate": 3.833333333333334e-06,
"loss": 0.0766,
"num_tokens": 14661371.0,
"reward": 1.0004730224609375,
"reward_std": 0.001246248371899128,
"rewards/accuracy_reward_step": 0.12109375,
"rewards/final_brier_reward_step": 0.879852294921875,
"rewards/format_reward_step": 1.0,
"step": 62
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9175008162856102,
"aux_distill/mean_u": 0.37723775517435953,
"aux_distill/n_active_tok": 136.875,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5109437035528861,
"calib/avg_num_step_conf": 4.2890625,
"calib/ece": 0.17965234375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0006451185992059454,
"calib/mean_conf": 0.0039414062500000005,
"calib/mu_c": 0.004468085106382979,
"calib/mu_w": 0.0038229665071770336,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.007311196757197889,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.3179463687150838,
"calib/step_q_c_n": 179.0,
"calib/step_q_gap": 0.03303777241475736,
"calib/step_q_w": 0.28490859630032644,
"calib/step_q_w_n": 919.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 523.0,
"completions/max_terminated_length": 523.0,
"completions/mean_length": 203.7265625,
"completions/mean_terminated_length": 204.52549743652344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 46.0,
"epoch": 0.0672,
"grad_norm": 0.008239316754043102,
"learning_rate": 3.8055555555555556e-06,
"loss": 0.0949,
"num_tokens": 14845973.0,
"reward": 0.989067018032074,
"reward_std": 0.035046808421611786,
"rewards/accuracy_reward_step": 0.18359375,
"rewards/final_brier_reward_step": 0.8062590956687927,
"rewards/format_reward_step": 0.98828125,
"step": 63
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.905761431902647,
"aux_distill/mean_u": 0.3367796770974856,
"aux_distill/n_active_tok": 133.25,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4940740740740741,
"calib/avg_num_step_conf": 4.1796875,
"calib/ece": 0.17380666666666666,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00015857142857142893,
"calib/mean_conf": 0.002663921568627451,
"calib/mu_c": 0.002533333333333333,
"calib/mu_w": 0.002691904761904762,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005241571414739029,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3325,
"calib/step_q_c_n": 168.0,
"calib/step_q_gap": 0.05929600886917963,
"calib/step_q_w": 0.2732039911308204,
"calib/step_q_w_n": 902.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 531.0,
"completions/max_terminated_length": 531.0,
"completions/mean_length": 186.58203125,
"completions/mean_terminated_length": 187.31373596191406,
"completions/min_length": 0.0,
"completions/min_terminated_length": 47.0,
"epoch": 0.06826666666666667,
"grad_norm": 0.006117277778685093,
"learning_rate": 3.777777777777778e-06,
"loss": 0.0806,
"num_tokens": 15021322.0,
"reward": 0.9965218901634216,
"reward_std": 0.011855566874146461,
"rewards/accuracy_reward_step": 0.17578125,
"rewards/final_brier_reward_step": 0.8211686611175537,
"rewards/format_reward_step": 0.99609375,
"step": 64
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9172791615128517,
"aux_distill/mean_u": 0.3287449626680117,
"aux_distill/n_active_tok": 129.375,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4908621651785714,
"calib/avg_num_step_conf": 4.0703125,
"calib/ece": 0.1229140625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0005982142857142857,
"calib/mean_conf": 0.0020859375,
"calib/mu_c": 0.0015625,
"calib/mu_w": 0.0021607142857142858,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005229912976914028,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.35062162162162164,
"calib/step_q_c_n": 111.0,
"calib/step_q_gap": 0.07429509100937676,
"calib/step_q_w": 0.2763265306122449,
"calib/step_q_w_n": 931.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 476.0,
"completions/max_terminated_length": 476.0,
"completions/mean_length": 185.33984375,
"completions/mean_terminated_length": 186.06668090820312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 46.0,
"epoch": 0.06933333333333333,
"grad_norm": 0.005622010678052902,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.0765,
"num_tokens": 15197601.0,
"reward": 0.992367148399353,
"reward_std": 0.02259797602891922,
"rewards/accuracy_reward_step": 0.125,
"rewards/final_brier_reward_step": 0.867546796798706,
"rewards/format_reward_step": 0.9921875,
"step": 65
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9069628901779652,
"aux_distill/mean_u": 0.3493697144402945,
"aux_distill/n_active_tok": 134.375,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5360857432775241,
"calib/avg_num_step_conf": 4.2265625,
"calib/ece": 0.1396078431372549,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00043759512937595097,
"calib/mean_conf": 0.0015686274509803923,
"calib/mu_c": 0.0019444444444444442,
"calib/mu_w": 0.0015068493150684932,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.00467473351221509,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.32437086092715234,
"calib/step_q_c_n": 151.0,
"calib/step_q_gap": 0.03487354621179256,
"calib/step_q_w": 0.2894973147153598,
"calib/step_q_w_n": 931.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 553.0,
"completions/max_terminated_length": 553.0,
"completions/mean_length": 196.44921875,
"completions/mean_terminated_length": 197.21961975097656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 46.0,
"epoch": 0.0704,
"grad_norm": 0.005124520510435104,
"learning_rate": 3.7222222222222225e-06,
"loss": 0.0888,
"num_tokens": 15378052.0,
"reward": 0.9963550567626953,
"reward_std": 0.011599044315516949,
"rewards/accuracy_reward_step": 0.140625,
"rewards/final_brier_reward_step": 0.8559914231300354,
"rewards/format_reward_step": 0.99609375,
"step": 66
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8992666602134705,
"aux_distill/mean_u": 0.34523840359572344,
"aux_distill/n_active_tok": 141.5,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5186297465132852,
"calib/avg_num_step_conf": 4.4453125,
"calib/ece": 0.18244140625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0004388679629441104,
"calib/mean_conf": 0.0011523437500000002,
"calib/mu_c": 0.0015106382978723402,
"calib/mu_w": 0.0010717703349282298,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.003941495291362903,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.30930314136125653,
"calib/step_q_c_n": 191.0,
"calib/step_q_gap": 0.02800430292408651,
"calib/step_q_w": 0.28129883843717,
"calib/step_q_w_n": 947.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 481.0,
"completions/max_terminated_length": 481.0,
"completions/mean_length": 196.73828125,
"completions/mean_terminated_length": 197.5098114013672,
"completions/min_length": 0.0,
"completions/min_terminated_length": 52.0,
"epoch": 0.07146666666666666,
"grad_norm": 0.006514097563922405,
"learning_rate": 3.694444444444445e-06,
"loss": 0.0888,
"num_tokens": 15557233.0,
"reward": 0.9924563765525818,
"reward_std": 0.022890709340572357,
"rewards/accuracy_reward_step": 0.18359375,
"rewards/final_brier_reward_step": 0.8091315627098083,
"rewards/format_reward_step": 0.9921875,
"step": 67
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8872856739908457,
"aux_distill/mean_u": 0.3291543344773491,
"aux_distill/n_active_tok": 139.125,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5060248604769152,
"calib/avg_num_step_conf": 4.34765625,
"calib/ece": 0.1403450980392157,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0006491628614916287,
"calib/mean_conf": 0.0008313725490196078,
"calib/mu_c": 0.001388888888888889,
"calib/mu_w": 0.0007397260273972603,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.003281408724031251,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2880645161290322,
"calib/step_q_c_n": 124.0,
"calib/step_q_gap": -0.001834371636387444,
"calib/step_q_w": 0.28989888776541967,
"calib/step_q_w_n": 989.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2429.0,
"completions/max_terminated_length": 2429.0,
"completions/mean_length": 210.1015625,
"completions/mean_terminated_length": 210.1015625,
"completions/min_length": 60.0,
"completions/min_terminated_length": 60.0,
"epoch": 0.07253333333333334,
"grad_norm": 0.0044059958308935165,
"learning_rate": 3.6666666666666666e-06,
"loss": 0.1246,
"num_tokens": 15738915.0,
"reward": 0.9923771619796753,
"reward_std": 0.022651750594377518,
"rewards/accuracy_reward_step": 0.140625,
"rewards/final_brier_reward_step": 0.851941704750061,
"rewards/format_reward_step": 0.9921875,
"step": 68
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8799595925956964,
"aux_distill/mean_u": 0.34479266047882573,
"aux_distill/n_active_tok": 145.75,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.48727272727272725,
"calib/avg_num_step_conf": 4.5546875,
"calib/ece": 0.13633607843137255,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00040266233766233757,
"calib/mean_conf": 0.0009188235294117647,
"calib/mu_c": 0.0005714285714285715,
"calib/mu_w": 0.000974090909090909,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0033741866976827045,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.32007142857142856,
"calib/step_q_c_n": 140.0,
"calib/step_q_gap": 0.012712754107490898,
"calib/step_q_w": 0.30735867446393766,
"calib/step_q_w_n": 1026.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1206.0,
"completions/max_terminated_length": 1206.0,
"completions/mean_length": 211.64453125,
"completions/mean_terminated_length": 212.47451782226562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 58.0,
"epoch": 0.0736,
"grad_norm": 0.003253097180277109,
"learning_rate": 3.638888888888889e-06,
"loss": 0.0896,
"num_tokens": 15921400.0,
"reward": 0.9961657524108887,
"reward_std": 0.011282737366855145,
"rewards/accuracy_reward_step": 0.13671875,
"rewards/final_brier_reward_step": 0.8595190048217773,
"rewards/format_reward_step": 0.99609375,
"step": 69
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8753404449671507,
"aux_distill/mean_u": 0.33883971306083144,
"aux_distill/n_active_tok": 152.625,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4919236417033774,
"calib/avg_num_step_conf": 4.78515625,
"calib/ece": 0.10570472440944882,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00025077500407896894,
"calib/mean_conf": 0.000594488188976378,
"calib/mu_c": 0.00037037037037037035,
"calib/mu_w": 0.0006211453744493393,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0,
"calib/std_conf": 0.002813936270862344,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.22784955752212388,
"calib/step_q_c_n": 113.0,
"calib/step_q_gap": -0.08005421945629337,
"calib/step_q_w": 0.30790377697841725,
"calib/step_q_w_n": 1112.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 787.0,
"completions/max_terminated_length": 787.0,
"completions/mean_length": 208.73828125,
"completions/mean_terminated_length": 209.55686950683594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 5.0,
"epoch": 0.07466666666666667,
"grad_norm": 0.004173743538558483,
"learning_rate": 3.6111111111111115e-06,
"loss": 0.0841,
"num_tokens": 16105637.0,
"reward": 0.9883161783218384,
"reward_std": 0.03326448053121567,
"rewards/accuracy_reward_step": 0.10546875,
"rewards/final_brier_reward_step": 0.8828824162483215,
"rewards/format_reward_step": 0.98828125,
"step": 70
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8868246395140886,
"aux_distill/mean_u": 0.3468969045243898,
"aux_distill/n_active_tok": 146.5,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4878048780487805,
"calib/avg_num_step_conf": 4.58203125,
"calib/ece": 0.19591764705882353,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0002,
"calib/mean_conf": 0.0001607843137254902,
"calib/mu_c": 0.0,
"calib/mu_w": 0.0002,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.001243662906123547,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3097014925373134,
"calib/step_q_c_n": 201.0,
"calib/step_q_gap": 0.023662397887107667,
"calib/step_q_w": 0.2860390946502057,
"calib/step_q_w_n": 972.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 592.0,
"completions/max_terminated_length": 592.0,
"completions/mean_length": 208.421875,
"completions/mean_terminated_length": 209.23922729492188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 75.0,
"epoch": 0.07573333333333333,
"grad_norm": 0.002800512593239546,
"learning_rate": 3.5833333333333335e-06,
"loss": 0.0817,
"num_tokens": 16287209.0,
"reward": 0.9960929751396179,
"reward_std": 0.011049911379814148,
"rewards/accuracy_reward_step": 0.1953125,
"rewards/final_brier_reward_step": 0.8007797002792358,
"rewards/format_reward_step": 0.99609375,
"step": 71
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8944362998008728,
"aux_distill/mean_u": 0.37095525634503335,
"aux_distill/n_active_tok": 160.875,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.528703180877094,
"calib/avg_num_step_conf": 5.078125,
"calib/ece": 0.09025196850393702,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0006271409749670619,
"calib/mean_conf": 0.00029921259842519685,
"calib/mu_c": 0.0008695652173913044,
"calib/mu_w": 0.00024242424242424242,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0018774038056023267,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3302631578947368,
"calib/step_q_c_n": 114.0,
"calib/step_q_gap": 0.05938626076151593,
"calib/step_q_w": 0.2708768971332209,
"calib/step_q_w_n": 1186.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 555.0,
"completions/max_terminated_length": 555.0,
"completions/mean_length": 207.21484375,
"completions/mean_terminated_length": 208.0274658203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 73.0,
"epoch": 0.0768,
"grad_norm": 0.0037375171668827534,
"learning_rate": 3.555555555555556e-06,
"loss": 0.0848,
"num_tokens": 16468472.0,
"reward": 0.9922637939453125,
"reward_std": 0.022244982421398163,
"rewards/accuracy_reward_step": 0.08984375,
"rewards/final_brier_reward_step": 0.9024963974952698,
"rewards/format_reward_step": 0.9921875,
"step": 72
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8917406033724546,
"aux_distill/mean_u": 0.2963726889406619,
"aux_distill/n_active_tok": 146.75,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.49830397912589686,
"calib/avg_num_step_conf": 4.6015625,
"calib/ece": 0.13739763779527558,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0001298108284409654,
"calib/mean_conf": 0.00039763779527559055,
"calib/mu_c": 0.00028571428571428574,
"calib/mu_w": 0.00041552511415525115,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.002916717078236597,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.36020689655172416,
"calib/step_q_c_n": 145.0,
"calib/step_q_gap": 0.045115705845044596,
"calib/step_q_w": 0.31509119070667957,
"calib/step_q_w_n": 1033.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 562.0,
"completions/max_terminated_length": 562.0,
"completions/mean_length": 203.85546875,
"completions/mean_terminated_length": 204.6549072265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 77.0,
"epoch": 0.07786666666666667,
"grad_norm": 0.0038799333851784468,
"learning_rate": 3.5277777777777784e-06,
"loss": 0.0907,
"num_tokens": 16651499.0,
"reward": 0.9922223091125488,
"reward_std": 0.022208530455827713,
"rewards/accuracy_reward_step": 0.13671875,
"rewards/final_brier_reward_step": 0.8555382490158081,
"rewards/format_reward_step": 0.9921875,
"step": 73
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.893372992053628,
"aux_distill/mean_u": 0.33364173770817035,
"aux_distill/n_active_tok": 148.75,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4790697674418605,
"calib/avg_num_step_conf": 4.703125,
"calib/ece": 0.15624666666666667,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0007306976744186046,
"calib/mean_conf": 0.0006160784313725491,
"calib/mu_c": 0.0,
"calib/mu_w": 0.0007306976744186046,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.005473724787808759,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.2705263157894737,
"calib/step_q_c_n": 171.0,
"calib/step_q_gap": -0.02696138992204611,
"calib/step_q_w": 0.29748770571151983,
"calib/step_q_w_n": 1033.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 514.0,
"completions/max_terminated_length": 514.0,
"completions/mean_length": 198.390625,
"completions/mean_terminated_length": 199.16864013671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 48.0,
"epoch": 0.07893333333333333,
"grad_norm": 0.004464035853743553,
"learning_rate": 3.5e-06,
"loss": 0.1062,
"num_tokens": 16830023.0,
"reward": 0.992172360420227,
"reward_std": 0.022139202803373337,
"rewards/accuracy_reward_step": 0.15625,
"rewards/final_brier_reward_step": 0.8359072804450989,
"rewards/format_reward_step": 0.9921875,
"step": 74
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8780819457024336,
"aux_distill/mean_u": 0.308437020329091,
"aux_distill/n_active_tok": 152.125,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.49765258215962443,
"calib/avg_num_step_conf": 4.76953125,
"calib/ece": 0.16466666666666666,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -4.694835680751174e-05,
"calib/mean_conf": 3.9215686274509805e-05,
"calib/mu_c": 0.0,
"calib/mu_w": 4.694835680751174e-05,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0006249951941376166,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3568253968253968,
"calib/step_q_c_n": 189.0,
"calib/step_q_gap": 0.05972268364710226,
"calib/step_q_w": 0.29710271317829456,
"calib/step_q_w_n": 1032.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 442.0,
"completions/max_terminated_length": 442.0,
"completions/mean_length": 204.1484375,
"completions/mean_terminated_length": 204.94903564453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 48.0,
"epoch": 0.08,
"grad_norm": 0.003264680504798889,
"learning_rate": 3.4722222222222224e-06,
"loss": 0.0864,
"num_tokens": 17010845.0,
"reward": 0.9960935711860657,
"reward_std": 0.01104909647256136,
"rewards/accuracy_reward_step": 0.1640625,
"rewards/final_brier_reward_step": 0.8320308923721313,
"rewards/format_reward_step": 0.99609375,
"step": 75
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8988117761909962,
"aux_distill/mean_u": 0.3584846229869654,
"aux_distill/n_active_tok": 158.25,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.49528301886792453,
"calib/avg_num_step_conf": 4.953125,
"calib/ece": 0.16854901960784316,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -9.433962264150943e-05,
"calib/mean_conf": 7.843137254901961e-05,
"calib/mu_c": 0.0,
"calib/mu_w": 9.433962264150943e-05,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0008821350493491759,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3034035897435898,
"calib/step_q_c_n": 195.0,
"calib/step_q_gap": 0.03379501565225712,
"calib/step_q_w": 0.26960857409133265,
"calib/step_q_w_n": 1073.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 530.0,
"completions/max_terminated_length": 530.0,
"completions/mean_length": 208.640625,
"completions/mean_terminated_length": 209.45883178710938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 82.0,
"epoch": 0.08106666666666666,
"grad_norm": 0.003597772680222988,
"learning_rate": 3.444444444444445e-06,
"loss": 0.09,
"num_tokens": 17191121.0,
"reward": 0.9941402673721313,
"reward_std": 0.016573920845985413,
"rewards/accuracy_reward_step": 0.171875,
"rewards/final_brier_reward_step": 0.8242179751396179,
"rewards/format_reward_step": 0.9921875,
"step": 76
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8938879240304232,
"aux_distill/mean_u": 0.3483319028167608,
"aux_distill/n_active_tok": 160.0,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.49333333333333335,
"calib/avg_num_step_conf": 5.01171875,
"calib/ece": 0.12101171875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -9.333333333333334e-05,
"calib/mean_conf": 8.203125e-05,
"calib/mu_c": 0.0,
"calib/mu_w": 9.333333333333334e-05,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0008822851715989779,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.35564285714285715,
"calib/step_q_c_n": 140.0,
"calib/step_q_gap": 0.04764460692413447,
"calib/step_q_w": 0.3079982502187227,
"calib/step_q_w_n": 1143.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 432.0,
"completions/max_terminated_length": 432.0,
"completions/mean_length": 203.703125,
"completions/mean_terminated_length": 204.50196838378906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 77.0,
"epoch": 0.08213333333333334,
"grad_norm": 0.0016254527727141976,
"learning_rate": 3.416666666666667e-06,
"loss": 0.0897,
"num_tokens": 17371741.0,
"reward": 0.9999996423721313,
"reward_std": 1.1108706985396566e-06,
"rewards/accuracy_reward_step": 0.12109375,
"rewards/final_brier_reward_step": 0.8789054751396179,
"rewards/format_reward_step": 1.0,
"step": 77
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8695187773555517,
"aux_distill/mean_u": 0.3649508815295921,
"aux_distill/n_active_tok": 184.75,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5129910714285715,
"calib/avg_num_step_conf": 5.77734375,
"calib/ece": 0.2186171875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0003557142857142857,
"calib/mean_conf": 0.0001328125,
"calib/mu_c": 0.0004107142857142857,
"calib/mu_w": 5.4999999999999995e-05,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0011948528527997704,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.32073717948717945,
"calib/step_q_c_n": 312.0,
"calib/step_q_gap": 0.033710615648276265,
"calib/step_q_w": 0.2870265638389032,
"calib/step_q_w_n": 1167.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 679.0,
"completions/max_terminated_length": 679.0,
"completions/mean_length": 235.140625,
"completions/mean_terminated_length": 236.06275939941406,
"completions/min_length": 0.0,
"completions/min_terminated_length": 75.0,
"epoch": 0.0832,
"grad_norm": 0.003223991021513939,
"learning_rate": 3.3888888888888893e-06,
"loss": 0.0852,
"num_tokens": 17563769.0,
"reward": 1.000089168548584,
"reward_std": 0.0002531877835281193,
"rewards/accuracy_reward_step": 0.21875,
"rewards/final_brier_reward_step": 0.7814282178878784,
"rewards/format_reward_step": 1.0,
"step": 78
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8652449604123831,
"aux_distill/mean_u": 0.3407855392361494,
"aux_distill/n_active_tok": 176.875,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4996230073244291,
"calib/avg_num_step_conf": 5.54296875,
"calib/ece": 0.1723729411764706,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 6.186988367083154e-05,
"calib/mean_conf": 0.000176078431372549,
"calib/mu_c": 0.00022727272727272727,
"calib/mu_w": 0.00016540284360189574,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.0012663117331894332,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.33054794520547953,
"calib/step_q_c_n": 219.0,
"calib/step_q_gap": 0.027218778538812882,
"calib/step_q_w": 0.30332916666666665,
"calib/step_q_w_n": 1200.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 629.0,
"completions/max_terminated_length": 629.0,
"completions/mean_length": 228.3046875,
"completions/mean_terminated_length": 229.20001220703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 64.0,
"epoch": 0.08426666666666667,
"grad_norm": 0.0034459615126252174,
"learning_rate": 3.3611111111111117e-06,
"loss": 0.0825,
"num_tokens": 17752399.0,
"reward": 0.9922257661819458,
"reward_std": 0.022208131849765778,
"rewards/accuracy_reward_step": 0.171875,
"rewards/final_brier_reward_step": 0.8203890323638916,
"rewards/format_reward_step": 0.9921875,
"step": 79
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9012061804533005,
"aux_distill/mean_u": 0.3593384027984835,
"aux_distill/n_active_tok": 172.875,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.49230769230769234,
"calib/avg_num_step_conf": 5.74609375,
"calib/ece": 0.22915573122529645,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00012102564102564102,
"calib/mean_conf": 9.328063241106719e-05,
"calib/mu_c": 0.0,
"calib/mu_w": 0.00012102564102564102,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0009022557963437884,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.24938356164383566,
"calib/step_q_c_n": 292.0,
"calib/step_q_gap": -0.06631151893292433,
"calib/step_q_w": 0.31569508057676,
"calib/step_q_w_n": 1179.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 554.0,
"completions/max_terminated_length": 554.0,
"completions/mean_length": 227.3203125,
"completions/mean_terminated_length": 228.21177673339844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 62.0,
"epoch": 0.08533333333333333,
"grad_norm": 0.002913886681199074,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0709,
"num_tokens": 17936561.0,
"reward": 0.9882808327674866,
"reward_std": 0.033146779984235764,
"rewards/accuracy_reward_step": 0.2265625,
"rewards/final_brier_reward_step": 0.7617179155349731,
"rewards/format_reward_step": 0.98828125,
"step": 80
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8805477563291788,
"aux_distill/mean_u": 0.3763560604832592,
"aux_distill/n_active_tok": 201.125,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4953051643192488,
"calib/avg_num_step_conf": 6.3203125,
"calib/ece": 0.167890625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -9.389671361502347e-05,
"calib/mean_conf": 7.8125e-05,
"calib/mu_c": 0.0,
"calib/mu_w": 9.389671361502347e-05,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0008804240366863005,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3075,
"calib/step_q_c_n": 260.0,
"calib/step_q_gap": 0.007315169366715779,
"calib/step_q_w": 0.3001848306332842,
"calib/step_q_w_n": 1358.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 822.0,
"completions/max_terminated_length": 822.0,
"completions/mean_length": 253.32421875,
"completions/mean_terminated_length": 254.31765747070312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 63.0,
"epoch": 0.0864,
"grad_norm": 0.0014355859020724893,
"learning_rate": 3.3055555555555558e-06,
"loss": 0.0871,
"num_tokens": 18131468.0,
"reward": 0.9999996423721313,
"reward_std": 1.1056023367927992e-06,
"rewards/accuracy_reward_step": 0.16796875,
"rewards/final_brier_reward_step": 0.8320304751396179,
"rewards/format_reward_step": 1.0,
"step": 81
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8585575744509697,
"aux_distill/mean_u": 0.3321043982053774,
"aux_distill/n_active_tok": 180.375,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5007708911501696,
"calib/avg_num_step_conf": 5.671875,
"calib/ece": 0.18479409448818898,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0002748586699558023,
"calib/mean_conf": 0.00024527559055118113,
"calib/mu_c": 2.1276595744680852e-05,
"calib/mu_w": 0.00029613526570048315,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0026527428463929965,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3399958333333333,
"calib/step_q_c_n": 240.0,
"calib/step_q_gap": 0.04995127887788775,
"calib/step_q_w": 0.2900445544554455,
"calib/step_q_w_n": 1212.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 508.0,
"completions/max_terminated_length": 508.0,
"completions/mean_length": 239.60546875,
"completions/mean_terminated_length": 240.54510498046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 82.0,
"epoch": 0.08746666666666666,
"grad_norm": 0.003587129293009639,
"learning_rate": 3.277777777777778e-06,
"loss": 0.0863,
"num_tokens": 18322167.0,
"reward": 0.9921879172325134,
"reward_std": 0.022108623757958412,
"rewards/accuracy_reward_step": 0.18359375,
"rewards/final_brier_reward_step": 0.8085945248603821,
"rewards/format_reward_step": 0.9921875,
"step": 82
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8511873986572027,
"aux_distill/mean_u": 0.33659491248510026,
"aux_distill/n_active_tok": 194.5,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.49765258215962443,
"calib/avg_num_step_conf": 6.359375,
"calib/ece": 0.16469411764705882,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -1.4084507042253522e-05,
"calib/mean_conf": 1.1764705882352942e-05,
"calib/mu_c": 0.0,
"calib/mu_w": 1.4084507042253522e-05,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.000187498558241285,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3059545454545455,
"calib/step_q_c_n": 220.0,
"calib/step_q_gap": 0.007154829545454544,
"calib/step_q_w": 0.29879971590909093,
"calib/step_q_w_n": 1408.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 654.0,
"completions/max_terminated_length": 654.0,
"completions/mean_length": 251.76171875,
"completions/mean_terminated_length": 252.74903869628906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 58.0,
"epoch": 0.08853333333333334,
"grad_norm": 0.0016355804400518537,
"learning_rate": 3.2500000000000002e-06,
"loss": 0.0747,
"num_tokens": 18517690.0,
"reward": 0.99609375,
"reward_std": 0.011048593558371067,
"rewards/accuracy_reward_step": 0.1640625,
"rewards/final_brier_reward_step": 0.83203125,
"rewards/format_reward_step": 0.99609375,
"step": 83
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9107771255075932,
"aux_distill/mean_u": 0.38344631357969455,
"aux_distill/n_active_tok": 192.25,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5032362459546925,
"calib/avg_num_step_conf": 6.0078125,
"calib/ece": 0.1888740157480315,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00013066343042071198,
"calib/mean_conf": 0.00010236220472440946,
"calib/mu_c": 0.00020833333333333335,
"calib/mu_w": 7.766990291262136e-05,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0009377023827267995,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2435294117647059,
"calib/step_q_c_n": 323.0,
"calib/step_q_gap": -0.08156408617768093,
"calib/step_q_w": 0.32509349794238684,
"calib/step_q_w_n": 1215.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1439.0,
"completions/max_terminated_length": 1439.0,
"completions/mean_length": 246.78515625,
"completions/mean_terminated_length": 246.78515625,
"completions/min_length": 81.0,
"completions/min_terminated_length": 81.0,
"epoch": 0.0896,
"grad_norm": 0.003886830760166049,
"learning_rate": 3.2222222222222227e-06,
"loss": 0.1125,
"num_tokens": 18710595.0,
"reward": 0.9922261238098145,
"reward_std": 0.02220771461725235,
"rewards/accuracy_reward_step": 0.1875,
"rewards/final_brier_reward_step": 0.8047647476196289,
"rewards/format_reward_step": 0.9921875,
"step": 84
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8618598934262991,
"aux_distill/mean_u": 0.33070176505424564,
"aux_distill/n_active_tok": 199.25,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5092694164107064,
"calib/avg_num_step_conf": 6.2265625,
"calib/ece": 0.16854901960784316,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00018538832821412903,
"calib/mean_conf": 7.843137254901961e-05,
"calib/mu_c": 0.00023255813953488373,
"calib/mu_w": 4.7169811320754715e-05,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.000882135049349176,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2995575221238938,
"calib/step_q_c_n": 226.0,
"calib/step_q_gap": -0.04049028489365003,
"calib/step_q_w": 0.34004780701754383,
"calib/step_q_w_n": 1368.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 626.0,
"completions/max_terminated_length": 626.0,
"completions/mean_length": 251.81640625,
"completions/mean_terminated_length": 252.80393981933594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 90.0,
"epoch": 0.09066666666666667,
"grad_norm": 0.002688215347006917,
"learning_rate": 3.1944444444444443e-06,
"loss": 0.0684,
"num_tokens": 18906692.0,
"reward": 0.9961324334144592,
"reward_std": 0.011159027926623821,
"rewards/accuracy_reward_step": 0.16796875,
"rewards/final_brier_reward_step": 0.8282023668289185,
"rewards/format_reward_step": 0.99609375,
"step": 85
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8568615298718214,
"aux_distill/mean_u": 0.35021126297831856,
"aux_distill/n_active_tok": 209.75,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.49980776624375234,
"calib/avg_num_step_conf": 6.5546875,
"calib/ece": 0.19983921568627452,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0001764705882352941,
"calib/mean_conf": 0.00016078431372549016,
"calib/mu_c": 1.9607843137254903e-05,
"calib/mu_w": 0.000196078431372549,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.001243662906123547,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3664554545454546,
"calib/step_q_c_n": 330.0,
"calib/step_q_gap": 0.06424432694901538,
"calib/step_q_w": 0.3022111275964392,
"calib/step_q_w_n": 1348.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 664.0,
"completions/max_terminated_length": 664.0,
"completions/mean_length": 257.46484375,
"completions/mean_terminated_length": 258.4745178222656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 64.0,
"epoch": 0.09173333333333333,
"grad_norm": 0.0028597498312592506,
"learning_rate": 3.1666666666666667e-06,
"loss": 0.074,
"num_tokens": 19101923.0,
"reward": 0.9960969090461731,
"reward_std": 0.01106179878115654,
"rewards/accuracy_reward_step": 0.19921875,
"rewards/final_brier_reward_step": 0.7968812584877014,
"rewards/format_reward_step": 0.99609375,
"step": 86
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.883620098233223,
"aux_distill/mean_u": 0.3155454990718053,
"aux_distill/n_active_tok": 179.375,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5023886639676113,
"calib/avg_num_step_conf": 5.6328125,
"calib/ece": 0.25470588235294117,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -5.6680161943319844e-05,
"calib/mean_conf": 0.00019607843137254904,
"calib/mu_c": 0.00015384615384615385,
"calib/mu_w": 0.0002105263157894737,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0020676747178767167,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.31411209439528026,
"calib/step_q_c_n": 339.0,
"calib/step_q_gap": -0.0005639708812383293,
"calib/step_q_w": 0.3146760652765186,
"calib/step_q_w_n": 1103.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 582.0,
"completions/max_terminated_length": 582.0,
"completions/mean_length": 238.671875,
"completions/mean_terminated_length": 239.6078643798828,
"completions/min_length": 0.0,
"completions/min_terminated_length": 87.0,
"epoch": 0.0928,
"grad_norm": 0.0026609438937157393,
"learning_rate": 3.138888888888889e-06,
"loss": 0.0891,
"num_tokens": 19292327.0,
"reward": 0.9961307048797607,
"reward_std": 0.011164000257849693,
"rewards/accuracy_reward_step": 0.25390625,
"rewards/final_brier_reward_step": 0.7422612905502319,
"rewards/format_reward_step": 0.99609375,
"step": 87
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8428022786974907,
"aux_distill/mean_u": 0.33032704923695255,
"aux_distill/n_active_tok": 211.75,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.49774774774774777,
"calib/avg_num_step_conf": 6.7109375,
"calib/ece": 0.132796875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -1.801801801801802e-05,
"calib/mean_conf": 1.5625e-05,
"calib/mu_c": 0.0,
"calib/mu_w": 1.801801801801802e-05,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0002495112409792393,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.38029126213592235,
"calib/step_q_c_n": 206.0,
"calib/step_q_gap": 0.04931903991370007,
"calib/step_q_w": 0.3309722222222223,
"calib/step_q_w_n": 1512.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 924.0,
"completions/max_terminated_length": 924.0,
"completions/mean_length": 270.84765625,
"completions/mean_terminated_length": 271.9098205566406,
"completions/min_length": 0.0,
"completions/min_terminated_length": 92.0,
"epoch": 0.09386666666666667,
"grad_norm": 0.0018712286837399006,
"learning_rate": 3.1111111111111116e-06,
"loss": 0.0842,
"num_tokens": 19495320.0,
"reward": 1.0,
"reward_std": 8.843431942295865e-08,
"rewards/accuracy_reward_step": 0.1328125,
"rewards/final_brier_reward_step": 0.8671874403953552,
"rewards/format_reward_step": 1.0,
"step": 88
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8876792825758457,
"aux_distill/mean_u": 0.3874043072057041,
"aux_distill/n_active_tok": 203.0,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.49770642201834864,
"calib/avg_num_step_conf": 6.3671875,
"calib/ece": 0.1483984375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -4.587155963302752e-05,
"calib/mean_conf": 3.90625e-05,
"calib/mu_c": 0.0,
"calib/mu_w": 4.587155963302752e-05,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.000623778102448098,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.30855855855855857,
"calib/step_q_c_n": 222.0,
"calib/step_q_gap": -0.03795564598689599,
"calib/step_q_w": 0.34651420454545456,
"calib/step_q_w_n": 1408.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 622.0,
"completions/max_terminated_length": 622.0,
"completions/mean_length": 261.14453125,
"completions/mean_terminated_length": 262.16864013671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 79.0,
"epoch": 0.09493333333333333,
"grad_norm": 0.002203390235081315,
"learning_rate": 3.0833333333333336e-06,
"loss": 0.0853,
"num_tokens": 19694869.0,
"reward": 0.9960935711860657,
"reward_std": 0.01104909647256136,
"rewards/accuracy_reward_step": 0.1484375,
"rewards/final_brier_reward_step": 0.8476558923721313,
"rewards/format_reward_step": 0.99609375,
"step": 89
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8904574010521173,
"aux_distill/mean_u": 0.38580885479020766,
"aux_distill/n_active_tok": 204.5,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5131578947368421,
"calib/avg_num_step_conf": 6.42578125,
"calib/ece": 0.1483984375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0002631578947368421,
"calib/mean_conf": 3.90625e-05,
"calib/mu_c": 0.0002631578947368421,
"calib/mu_w": 0.0,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.000623778102448098,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.339378947368421,
"calib/step_q_c_n": 209.0,
"calib/step_q_gap": -0.00023860137809711413,
"calib/step_q_w": 0.3396175487465181,
"calib/step_q_w_n": 1436.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 945.0,
"completions/max_terminated_length": 945.0,
"completions/mean_length": 265.22265625,
"completions/mean_terminated_length": 266.26275634765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.096,
"grad_norm": 0.0022498685866594315,
"learning_rate": 3.055555555555556e-06,
"loss": 0.0823,
"num_tokens": 19889894.0,
"reward": 1.0000388622283936,
"reward_std": 0.00010993177420459688,
"rewards/accuracy_reward_step": 0.1484375,
"rewards/final_brier_reward_step": 0.8516402244567871,
"rewards/format_reward_step": 1.0,
"step": 90
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8649949803948402,
"aux_distill/mean_u": 0.33981641652048383,
"aux_distill/n_active_tok": 213.625,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5069330712959931,
"calib/avg_num_step_conf": 6.69921875,
"calib/ece": 0.1678515625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00013866142591986025,
"calib/mean_conf": 0.0001171875,
"calib/mu_c": 0.00023255813953488373,
"calib/mu_w": 9.389671361502347e-05,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0010761701026528055,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3283916083916084,
"calib/step_q_c_n": 286.0,
"calib/step_q_gap": -0.01630048398067996,
"calib/step_q_w": 0.3446920923722884,
"calib/step_q_w_n": 1429.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 625.0,
"completions/max_terminated_length": 625.0,
"completions/mean_length": 267.734375,
"completions/mean_terminated_length": 268.7843322753906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 98.0,
"epoch": 0.09706666666666666,
"grad_norm": 0.0024775159545242786,
"learning_rate": 3.0277777777777776e-06,
"loss": 0.0816,
"num_tokens": 20089954.0,
"reward": 1.000038504600525,
"reward_std": 0.00011103737051598728,
"rewards/accuracy_reward_step": 0.16796875,
"rewards/final_brier_reward_step": 0.832108199596405,
"rewards/format_reward_step": 1.0,
"step": 91
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8489798828959465,
"aux_distill/mean_u": 0.3153927456116345,
"aux_distill/n_active_tok": 210.75,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.49324324324324326,
"calib/avg_num_step_conf": 6.58984375,
"calib/ece": 0.1224110671936759,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00013513513513513514,
"calib/mean_conf": 0.00011857707509881423,
"calib/mu_c": 0.0,
"calib/mu_w": 0.00013513513513513514,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0010824556472434112,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.297,
"calib/step_q_c_n": 190.0,
"calib/step_q_gap": -0.021366065464261896,
"calib/step_q_w": 0.3183660654642619,
"calib/step_q_w_n": 1497.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 765.0,
"completions/max_terminated_length": 765.0,
"completions/mean_length": 269.25390625,
"completions/mean_terminated_length": 270.309814453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 66.0,
"epoch": 0.09813333333333334,
"grad_norm": 0.002843436785042286,
"learning_rate": 3e-06,
"loss": 0.0741,
"num_tokens": 20289411.0,
"reward": 0.9882806539535522,
"reward_std": 0.033146657049655914,
"rewards/accuracy_reward_step": 0.12109375,
"rewards/final_brier_reward_step": 0.8671863079071045,
"rewards/format_reward_step": 0.98828125,
"step": 92
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8637429755181074,
"aux_distill/mean_u": 0.3244587877849719,
"aux_distill/n_active_tok": 224.5,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4953271028037383,
"calib/avg_num_step_conf": 7.0625,
"calib/ece": 0.1574015748031496,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -9.345794392523364e-05,
"calib/mean_conf": 7.874015748031496e-05,
"calib/mu_c": 0.0,
"calib/mu_w": 9.345794392523364e-05,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0008838560756158915,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3286440677966102,
"calib/step_q_c_n": 236.0,
"calib/step_q_gap": -9.639021865692765e-05,
"calib/step_q_w": 0.32874045801526713,
"calib/step_q_w_n": 1572.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 968.0,
"completions/max_terminated_length": 968.0,
"completions/mean_length": 285.59765625,
"completions/mean_terminated_length": 286.7176513671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 83.0,
"epoch": 0.0992,
"grad_norm": 0.0027552025858312845,
"learning_rate": 2.9722222222222225e-06,
"loss": 0.0923,
"num_tokens": 20492108.0,
"reward": 0.9921871423721313,
"reward_std": 0.02209819294512272,
"rewards/accuracy_reward_step": 0.15625,
"rewards/final_brier_reward_step": 0.8359367251396179,
"rewards/format_reward_step": 0.9921875,
"step": 93
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.862243564799428,
"aux_distill/mean_u": 0.30363283298666166,
"aux_distill/n_active_tok": 213.25,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4951923076923077,
"calib/avg_num_step_conf": 6.7734375,
"calib/ece": 0.18742578125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -9.134615384615385e-05,
"calib/mean_conf": 7.421875e-05,
"calib/mu_c": 0.0,
"calib/mu_w": 9.134615384615385e-05,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0008375695954059205,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2911636363636364,
"calib/step_q_c_n": 275.0,
"calib/step_q_gap": -0.06487872141566448,
"calib/step_q_w": 0.3560423577793009,
"calib/step_q_w_n": 1459.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1027.0,
"completions/max_terminated_length": 1027.0,
"completions/mean_length": 270.23828125,
"completions/mean_terminated_length": 271.2980651855469,
"completions/min_length": 0.0,
"completions/min_terminated_length": 89.0,
"epoch": 0.10026666666666667,
"grad_norm": 0.0026547429151833057,
"learning_rate": 2.944444444444445e-06,
"loss": 0.086,
"num_tokens": 20693777.0,
"reward": 0.9999996423721313,
"reward_std": 1.0006114052885096e-06,
"rewards/accuracy_reward_step": 0.1875,
"rewards/final_brier_reward_step": 0.8124992847442627,
"rewards/format_reward_step": 1.0,
"step": 94
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.871882077306509,
"aux_distill/mean_u": 0.3612003757729328,
"aux_distill/n_active_tok": 210.125,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5216162287994679,
"calib/avg_num_step_conf": 6.58984375,
"calib/ece": 0.24203125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00043232457598935814,
"calib/mean_conf": 0.00015625,
"calib/mu_c": 0.0004838709677419355,
"calib/mu_w": 5.154639175257732e-05,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0012401959270615269,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.38400000000000006,
"calib/step_q_c_n": 310.0,
"calib/step_q_gap": 0.05170951343500374,
"calib/step_q_w": 0.3322904865649963,
"calib/step_q_w_n": 1377.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 858.0,
"completions/max_terminated_length": 858.0,
"completions/mean_length": 271.01171875,
"completions/mean_terminated_length": 272.07452392578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 68.0,
"epoch": 0.10133333333333333,
"grad_norm": 0.0028460733592510223,
"learning_rate": 2.916666666666667e-06,
"loss": 0.0904,
"num_tokens": 20893092.0,
"reward": 1.0001163482666016,
"reward_std": 0.00033034812076948583,
"rewards/accuracy_reward_step": 0.2421875,
"rewards/final_brier_reward_step": 0.7580453157424927,
"rewards/format_reward_step": 1.0,
"step": 95
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8325482532382011,
"aux_distill/mean_u": 0.35870209660360397,
"aux_distill/n_active_tok": 222.0,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5041836515769148,
"calib/avg_num_step_conf": 6.95703125,
"calib/ece": 0.3083984375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 8.367303153829653e-05,
"calib/mean_conf": 0.0001953125,
"calib/mu_c": 0.00025316455696202533,
"calib/mu_w": 0.0001694915254237288,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0013838273112436214,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3241626506024096,
"calib/step_q_c_n": 498.0,
"calib/step_q_gap": -0.007832283146616104,
"calib/step_q_w": 0.3319949337490257,
"calib/step_q_w_n": 1283.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 749.0,
"completions/max_terminated_length": 749.0,
"completions/mean_length": 270.953125,
"completions/mean_terminated_length": 272.0157165527344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.1024,
"grad_norm": 0.00353445066139102,
"learning_rate": 2.888888888888889e-06,
"loss": 0.0788,
"num_tokens": 21092080.0,
"reward": 1.0000771284103394,
"reward_std": 0.00022152194287627935,
"rewards/accuracy_reward_step": 0.30859375,
"rewards/final_brier_reward_step": 0.6915605068206787,
"rewards/format_reward_step": 1.0,
"step": 96
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8440025225281715,
"aux_distill/mean_u": 0.35295863647655806,
"aux_distill/n_active_tok": 232.75,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.49776785714285715,
"calib/avg_num_step_conf": 7.3046875,
"calib/ece": 0.1249609375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -4.464285714285714e-05,
"calib/mean_conf": 3.90625e-05,
"calib/mu_c": 0.0,
"calib/mu_w": 4.464285714285714e-05,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.000623778102448098,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3241626794258373,
"calib/step_q_c_n": 209.0,
"calib/step_q_gap": -0.011048097214740704,
"calib/step_q_w": 0.335210776640578,
"calib/step_q_w_n": 1661.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 802.0,
"completions/max_terminated_length": 802.0,
"completions/mean_length": 280.48046875,
"completions/mean_terminated_length": 281.5804138183594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 73.0,
"epoch": 0.10346666666666667,
"grad_norm": 0.0012264709221199155,
"learning_rate": 2.861111111111111e-06,
"loss": 0.0844,
"num_tokens": 21292763.0,
"reward": 0.9999998211860657,
"reward_std": 5.528011683963996e-07,
"rewards/accuracy_reward_step": 0.125,
"rewards/final_brier_reward_step": 0.8749996423721313,
"rewards/format_reward_step": 1.0,
"step": 97
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8470690567046404,
"aux_distill/mean_u": 0.3851828100309297,
"aux_distill/n_active_tok": 223.125,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.49780997304582203,
"calib/avg_num_step_conf": 6.97265625,
"calib/ece": 0.16510236220472443,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -1.6621743036837383e-05,
"calib/mean_conf": 0.0002519685039370079,
"calib/mu_c": 0.0002380952380952381,
"calib/mu_w": 0.0002547169811320755,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0015367851910457256,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2570394265232975,
"calib/step_q_c_n": 279.0,
"calib/step_q_gap": -0.05668029459224033,
"calib/step_q_w": 0.3137197211155378,
"calib/step_q_w_n": 1506.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1559.0,
"completions/max_terminated_length": 1559.0,
"completions/mean_length": 283.00390625,
"completions/mean_terminated_length": 283.00390625,
"completions/min_length": 77.0,
"completions/min_terminated_length": 77.0,
"epoch": 0.10453333333333334,
"grad_norm": 0.0035589830949902534,
"learning_rate": 2.8333333333333335e-06,
"loss": 0.1186,
"num_tokens": 21495204.0,
"reward": 0.9922254085540771,
"reward_std": 0.02220987156033516,
"rewards/accuracy_reward_step": 0.1640625,
"rewards/final_brier_reward_step": 0.8282006978988647,
"rewards/format_reward_step": 0.9921875,
"step": 98
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8080371990799904,
"aux_distill/mean_u": 0.3410837156640096,
"aux_distill/n_active_tok": 241.0,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4928571428571429,
"calib/avg_num_step_conf": 7.6328125,
"calib/ece": 0.17953125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00019047619047619048,
"calib/mean_conf": 0.00015625,
"calib/mu_c": 0.0,
"calib/mu_w": 0.00019047619047619048,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0015229366163764003,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.28961538461538466,
"calib/step_q_c_n": 286.0,
"calib/step_q_gap": -0.0284649511160302,
"calib/step_q_w": 0.31808033573141486,
"calib/step_q_w_n": 1668.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 875.0,
"completions/max_terminated_length": 875.0,
"completions/mean_length": 286.265625,
"completions/mean_terminated_length": 287.38824462890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 73.0,
"epoch": 0.1056,
"grad_norm": 0.0018437359249219298,
"learning_rate": 2.805555555555556e-06,
"loss": 0.0813,
"num_tokens": 21698096.0,
"reward": 0.9999988079071045,
"reward_std": 3.3156775316456333e-06,
"rewards/accuracy_reward_step": 0.1796875,
"rewards/final_brier_reward_step": 0.820310115814209,
"rewards/format_reward_step": 1.0,
"step": 99
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8550351019948721,
"aux_distill/mean_u": 0.38581492120683264,
"aux_distill/n_active_tok": 234.375,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5162135604323617,
"calib/avg_num_step_conf": 7.46484375,
"calib/ece": 0.1677734375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00032427120864723225,
"calib/mean_conf": 0.0001953125,
"calib/mu_c": 0.00046511627906976747,
"calib/mu_w": 0.00014084507042253522,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0013838273112436216,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3404095563139932,
"calib/step_q_c_n": 293.0,
"calib/step_q_gap": 0.015230322692237952,
"calib/step_q_w": 0.32517923362175527,
"calib/step_q_w_n": 1618.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 886.0,
"completions/max_terminated_length": 886.0,
"completions/mean_length": 282.94921875,
"completions/mean_terminated_length": 284.058837890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 74.0,
"epoch": 0.10666666666666667,
"grad_norm": 0.0025342495646327734,
"learning_rate": 2.7777777777777783e-06,
"loss": 0.092,
"num_tokens": 21901747.0,
"reward": 1.000077247619629,
"reward_std": 0.00022152194287627935,
"rewards/accuracy_reward_step": 0.16796875,
"rewards/final_brier_reward_step": 0.8321855068206787,
"rewards/format_reward_step": 1.0,
"step": 100
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8608838450163603,
"aux_distill/mean_u": 0.33259501798987035,
"aux_distill/n_active_tok": 235.25,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5026641705069124,
"calib/avg_num_step_conf": 7.41015625,
"calib/ece": 0.12125490196078431,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 1.0080645161290316e-05,
"calib/mean_conf": 0.00031372549019607844,
"calib/mu_c": 0.0003225806451612903,
"calib/mu_w": 0.0003125,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0019552864097753587,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.39691919191919184,
"calib/step_q_c_n": 198.0,
"calib/step_q_gap": 0.061659980618426646,
"calib/step_q_w": 0.3352592113007652,
"calib/step_q_w_n": 1699.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 832.0,
"completions/max_terminated_length": 832.0,
"completions/mean_length": 292.34765625,
"completions/mean_terminated_length": 293.494140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 82.0,
"epoch": 0.10773333333333333,
"grad_norm": 0.0023431070148944855,
"learning_rate": 2.7500000000000004e-06,
"loss": 0.0809,
"num_tokens": 22107388.0,
"reward": 0.9961308240890503,
"reward_std": 0.01106975693255663,
"rewards/accuracy_reward_step": 0.12109375,
"rewards/final_brier_reward_step": 0.8750742077827454,
"rewards/format_reward_step": 0.99609375,
"step": 101
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8173305280506611,
"aux_distill/mean_u": 0.3504695026670667,
"aux_distill/n_active_tok": 244.625,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.49953297216514103,
"calib/avg_num_step_conf": 7.64453125,
"calib/ece": 0.20764705882352943,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -9.340556697179174e-06,
"calib/mean_conf": 0.00019607843137254904,
"calib/mu_c": 0.00018867924528301886,
"calib/mu_w": 0.00019801980198019803,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.001386483884679505,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.33429508196721314,
"calib/step_q_c_n": 305.0,
"calib/step_q_gap": 0.04493951295994919,
"calib/step_q_w": 0.28935556900726395,
"calib/step_q_w_n": 1652.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2730.0,
"completions/max_terminated_length": 2730.0,
"completions/mean_length": 290.765625,
"completions/mean_terminated_length": 290.765625,
"completions/min_length": 96.0,
"completions/min_terminated_length": 96.0,
"epoch": 0.1088,
"grad_norm": 0.0022615944035351276,
"learning_rate": 2.7222222222222224e-06,
"loss": 0.1134,
"num_tokens": 22312328.0,
"reward": 0.9961318373680115,
"reward_std": 0.011160054244101048,
"rewards/accuracy_reward_step": 0.20703125,
"rewards/final_brier_reward_step": 0.789138674736023,
"rewards/format_reward_step": 0.99609375,
"step": 102
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8737247381359339,
"aux_distill/mean_u": 0.36661455320470204,
"aux_distill/n_active_tok": 225.625,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5039115646258504,
"calib/avg_num_step_conf": 7.1328125,
"calib/ece": 0.23409765625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 7.312925170068022e-05,
"calib/mean_conf": 0.00027734375,
"calib/mu_c": 0.0003333333333333333,
"calib/mu_w": 0.0002602040816326531,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.0018554276311233315,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.41829850746268654,
"calib/step_q_c_n": 335.0,
"calib/step_q_gap": 0.0982855631300239,
"calib/step_q_w": 0.32001294433266264,
"calib/step_q_w_n": 1491.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 943.0,
"completions/max_terminated_length": 943.0,
"completions/mean_length": 295.15625,
"completions/mean_terminated_length": 296.3137512207031,
"completions/min_length": 0.0,
"completions/min_terminated_length": 56.0,
"epoch": 0.10986666666666667,
"grad_norm": 0.0028516261372715235,
"learning_rate": 2.6944444444444444e-06,
"loss": 0.0924,
"num_tokens": 22516248.0,
"reward": 0.9961703419685364,
"reward_std": 0.011269855313003063,
"rewards/accuracy_reward_step": 0.234375,
"rewards/final_brier_reward_step": 0.761871874332428,
"rewards/format_reward_step": 0.99609375,
"step": 103
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8354058209806681,
"aux_distill/mean_u": 0.3212144089934268,
"aux_distill/n_active_tok": 226.75,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4834905660377358,
"calib/avg_num_step_conf": 7.1171875,
"calib/ece": 0.1716015625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00033018867924528304,
"calib/mean_conf": 0.0002734375,
"calib/mu_c": 0.0,
"calib/mu_w": 0.00033018867924528304,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0016308301363396958,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3244781144781145,
"calib/step_q_c_n": 297.0,
"calib/step_q_gap": -0.013587459292377313,
"calib/step_q_w": 0.3380655737704918,
"calib/step_q_w_n": 1525.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1290.0,
"completions/max_terminated_length": 1290.0,
"completions/mean_length": 286.76953125,
"completions/mean_terminated_length": 287.8941345214844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 72.0,
"epoch": 0.11093333333333333,
"grad_norm": 0.0023796723689883947,
"learning_rate": 2.666666666666667e-06,
"loss": 0.1023,
"num_tokens": 22720149.0,
"reward": 0.9960923790931702,
"reward_std": 0.01105241384357214,
"rewards/accuracy_reward_step": 0.171875,
"rewards/final_brier_reward_step": 0.8242160081863403,
"rewards/format_reward_step": 0.99609375,
"step": 104
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.868424192070961,
"aux_distill/mean_u": 0.32407661465809195,
"aux_distill/n_active_tok": 222.125,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.48873873873873874,
"calib/avg_num_step_conf": 7.3125,
"calib/ece": 0.12233201581027668,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00022522522522522523,
"calib/mean_conf": 0.0001976284584980237,
"calib/mu_c": 0.0,
"calib/mu_w": 0.00022522522522522523,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0013918432301706727,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2927948453608248,
"calib/step_q_c_n": 194.0,
"calib/step_q_gap": -0.03498703783345414,
"calib/step_q_w": 0.3277818831942789,
"calib/step_q_w_n": 1678.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2293.0,
"completions/max_terminated_length": 2293.0,
"completions/mean_length": 286.6796875,
"completions/mean_terminated_length": 287.8039245605469,
"completions/min_length": 0.0,
"completions/min_terminated_length": 83.0,
"epoch": 0.112,
"grad_norm": 0.0025983734522014856,
"learning_rate": 2.6388888888888893e-06,
"loss": 0.1346,
"num_tokens": 22923107.0,
"reward": 0.9882802963256836,
"reward_std": 0.03314775973558426,
"rewards/accuracy_reward_step": 0.12109375,
"rewards/final_brier_reward_step": 0.8671855926513672,
"rewards/format_reward_step": 0.98828125,
"step": 105
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8548662774264812,
"aux_distill/mean_u": 0.3412352244089931,
"aux_distill/n_active_tok": 195.375,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.48,
"calib/avg_num_step_conf": 6.10546875,
"calib/ece": 0.11729411764705883,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00039999999999999996,
"calib/mean_conf": 0.0003529411764705882,
"calib/mu_c": 0.0,
"calib/mu_w": 0.00039999999999999996,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0018452220166303669,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3631176470588235,
"calib/step_q_c_n": 170.0,
"calib/step_q_gap": 0.06649725940627504,
"calib/step_q_w": 0.29662038765254845,
"calib/step_q_w_n": 1393.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2404.0,
"completions/max_terminated_length": 2404.0,
"completions/mean_length": 262.546875,
"completions/mean_terminated_length": 262.546875,
"completions/min_length": 91.0,
"completions/min_terminated_length": 91.0,
"epoch": 0.11306666666666666,
"grad_norm": 0.0020745107904076576,
"learning_rate": 2.6111111111111113e-06,
"loss": 0.1196,
"num_tokens": 23118711.0,
"reward": 0.9960920214653015,
"reward_std": 0.011053518392145634,
"rewards/accuracy_reward_step": 0.1171875,
"rewards/final_brier_reward_step": 0.8789027333259583,
"rewards/format_reward_step": 0.99609375,
"step": 106
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8563811108469963,
"aux_distill/mean_u": 0.358172699943934,
"aux_distill/n_active_tok": 218.75,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5149049539170507,
"calib/avg_num_step_conf": 6.8359375,
"calib/ece": 0.12086274509803921,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0002980990783410138,
"calib/mean_conf": 0.0007058823529411764,
"calib/mu_c": 0.000967741935483871,
"calib/mu_w": 0.0006696428571428571,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0025613577714208515,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2366346153846154,
"calib/step_q_c_n": 208.0,
"calib/step_q_gap": -0.10943451561408757,
"calib/step_q_w": 0.34606913099870296,
"calib/step_q_w_n": 1542.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 980.0,
"completions/max_terminated_length": 980.0,
"completions/mean_length": 277.1171875,
"completions/mean_terminated_length": 277.1171875,
"completions/min_length": 53.0,
"completions/min_terminated_length": 53.0,
"epoch": 0.11413333333333334,
"grad_norm": 0.0028200845699757338,
"learning_rate": 2.5833333333333337e-06,
"loss": 0.1,
"num_tokens": 23318077.0,
"reward": 0.9962074756622314,
"reward_std": 0.01130930706858635,
"rewards/accuracy_reward_step": 0.12109375,
"rewards/final_brier_reward_step": 0.8752273321151733,
"rewards/format_reward_step": 0.99609375,
"step": 107
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8544626701623201,
"aux_distill/mean_u": 0.3307901570017504,
"aux_distill/n_active_tok": 203.0,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4869109947643979,
"calib/avg_num_step_conf": 6.42578125,
"calib/ece": 0.2537109375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0002617801047120419,
"calib/mean_conf": 0.0001953125,
"calib/mu_c": 0.0,
"calib/mu_w": 0.0002617801047120419,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0013838273112436214,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.29852248520710056,
"calib/step_q_c_n": 338.0,
"calib/step_q_gap": 0.005442990180321683,
"calib/step_q_w": 0.2930794950267789,
"calib/step_q_w_n": 1307.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1036.0,
"completions/max_terminated_length": 1036.0,
"completions/mean_length": 258.140625,
"completions/mean_terminated_length": 259.1529541015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 82.0,
"epoch": 0.1152,
"grad_norm": 0.0021364479325711727,
"learning_rate": 2.5555555555555557e-06,
"loss": 0.0865,
"num_tokens": 23511201.0,
"reward": 0.9999990463256836,
"reward_std": 2.764005785138579e-06,
"rewards/accuracy_reward_step": 0.25390625,
"rewards/final_brier_reward_step": 0.7460918426513672,
"rewards/format_reward_step": 1.0,
"step": 108
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8432301990687847,
"aux_distill/mean_u": 0.3266780022049978,
"aux_distill/n_active_tok": 221.875,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4978860703159768,
"calib/avg_num_step_conf": 7.0,
"calib/ece": 0.1637890625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -4.227859368046284e-05,
"calib/mean_conf": 0.00027343749999999997,
"calib/mu_c": 0.0002380952380952381,
"calib/mu_w": 0.00028037383177570094,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0016308301363396956,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.34582068965517243,
"calib/step_q_c_n": 232.0,
"calib/step_q_gap": -0.007160079575596778,
"calib/step_q_w": 0.3529807692307692,
"calib/step_q_w_n": 1560.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1145.0,
"completions/max_terminated_length": 1145.0,
"completions/mean_length": 277.08984375,
"completions/mean_terminated_length": 278.1764831542969,
"completions/min_length": 0.0,
"completions/min_terminated_length": 85.0,
"epoch": 0.11626666666666667,
"grad_norm": 0.002132768277078867,
"learning_rate": 2.5277777777777778e-06,
"loss": 0.0849,
"num_tokens": 23710544.0,
"reward": 1.000037670135498,
"reward_std": 0.00011277615703875199,
"rewards/accuracy_reward_step": 0.1640625,
"rewards/final_brier_reward_step": 0.8360128998756409,
"rewards/format_reward_step": 1.0,
"step": 109
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8624531216919422,
"aux_distill/mean_u": 0.3428116708899324,
"aux_distill/n_active_tok": 202.5,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5102040816326531,
"calib/avg_num_step_conf": 6.3515625,
"calib/ece": 0.1913671875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00020408163265306123,
"calib/mean_conf": 3.90625e-05,
"calib/mu_c": 0.00020408163265306123,
"calib/mu_w": 0.0,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.000623778102448098,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.3759057971014493,
"calib/step_q_c_n": 276.0,
"calib/step_q_gap": 0.062253648953301155,
"calib/step_q_w": 0.3136521481481481,
"calib/step_q_w_n": 1350.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 688.0,
"completions/max_terminated_length": 688.0,
"completions/mean_length": 261.16015625,
"completions/mean_terminated_length": 262.184326171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 87.0,
"epoch": 0.11733333333333333,
"grad_norm": 0.003013479057699442,
"learning_rate": 2.5e-06,
"loss": 0.0906,
"num_tokens": 23906129.0,
"reward": 0.9961326122283936,
"reward_std": 0.0111584747210145,
"rewards/accuracy_reward_step": 0.19140625,
"rewards/final_brier_reward_step": 0.8047652244567871,
"rewards/format_reward_step": 0.99609375,
"step": 110
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8441929463297129,
"aux_distill/mean_u": 0.3441107825296833,
"aux_distill/n_active_tok": 198.625,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5240320427236315,
"calib/avg_num_step_conf": 6.21875,
"calib/ece": 0.16375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0004806408544726302,
"calib/mean_conf": 0.0003125,
"calib/mu_c": 0.0007142857142857143,
"calib/mu_w": 0.00023364485981308412,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.001739926363384382,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.3002446351931331,
"calib/step_q_c_n": 233.0,
"calib/step_q_gap": -0.02867736627853723,
"calib/step_q_w": 0.32892200147167033,
"calib/step_q_w_n": 1359.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 701.0,
"completions/max_terminated_length": 701.0,
"completions/mean_length": 261.015625,
"completions/mean_terminated_length": 262.03924560546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 91.0,
"epoch": 0.1184,
"grad_norm": 0.0034395901020616293,
"learning_rate": 2.4722222222222226e-06,
"loss": 0.0851,
"num_tokens": 24104165.0,
"reward": 0.9923031330108643,
"reward_std": 0.022335954010486603,
"rewards/accuracy_reward_step": 0.1640625,
"rewards/final_brier_reward_step": 0.8283562660217285,
"rewards/format_reward_step": 0.9921875,
"step": 111
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8398895040154457,
"aux_distill/mean_u": 0.35261902089203984,
"aux_distill/n_active_tok": 203.75,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5112972555935192,
"calib/avg_num_step_conf": 6.390625,
"calib/ece": 0.1689763779527559,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00018075608949630773,
"calib/mean_conf": 0.00031496062992125983,
"calib/mu_c": 0.00046511627906976747,
"calib/mu_w": 0.00028436018957345974,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.001959032331436965,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3352569169960474,
"calib/step_q_c_n": 253.0,
"calib/step_q_gap": 0.046753663199952,
"calib/step_q_w": 0.2885032537960954,
"calib/step_q_w_n": 1383.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 858.0,
"completions/max_terminated_length": 858.0,
"completions/mean_length": 260.8125,
"completions/mean_terminated_length": 261.8352966308594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 59.0,
"epoch": 0.11946666666666667,
"grad_norm": 0.003930776380002499,
"learning_rate": 2.4444444444444447e-06,
"loss": 0.0745,
"num_tokens": 24302661.0,
"reward": 0.994216799736023,
"reward_std": 0.01679299585521221,
"rewards/accuracy_reward_step": 0.171875,
"rewards/final_brier_reward_step": 0.8243710994720459,
"rewards/format_reward_step": 0.9921875,
"step": 112
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8919267151504755,
"aux_distill/mean_u": 0.3533143225373007,
"aux_distill/n_active_tok": 191.5,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.493953488372093,
"calib/avg_num_step_conf": 5.984375,
"calib/ece": 0.15653333333333333,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -9.418604651162795e-05,
"calib/mean_conf": 0.0003294117647058824,
"calib/mu_c": 0.00025,
"calib/mu_w": 0.00034418604651162795,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.001758262747507709,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3289644549763033,
"calib/step_q_c_n": 211.0,
"calib/step_q_gap": -0.02367498484201619,
"calib/step_q_w": 0.35263943981831947,
"calib/step_q_w_n": 1321.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2043.0,
"completions/max_terminated_length": 2043.0,
"completions/mean_length": 248.12890625,
"completions/mean_terminated_length": 248.12890625,
"completions/min_length": 95.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.12053333333333334,
"grad_norm": 0.002600407926365733,
"learning_rate": 2.4166666666666667e-06,
"loss": 0.125,
"num_tokens": 24495190.0,
"reward": 0.996131181716919,
"reward_std": 0.011162434704601765,
"rewards/accuracy_reward_step": 0.15625,
"rewards/final_brier_reward_step": 0.8399186730384827,
"rewards/format_reward_step": 0.99609375,
"step": 113
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.861085245385766,
"aux_distill/mean_u": 0.32562836972033066,
"aux_distill/n_active_tok": 182.125,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5092592592592593,
"calib/avg_num_step_conf": 5.69140625,
"calib/ece": 0.14062745098039214,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 7.610350076103431e-06,
"calib/mean_conf": 0.0005490196078431374,
"calib/mu_c": 0.0005555555555555556,
"calib/mu_w": 0.0005479452054794521,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.003267869279372495,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.33482926829268295,
"calib/step_q_c_n": 205.0,
"calib/step_q_gap": -0.013357712537988009,
"calib/step_q_w": 0.34818698083067096,
"calib/step_q_w_n": 1252.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2350.0,
"completions/max_terminated_length": 2350.0,
"completions/mean_length": 249.95703125,
"completions/mean_terminated_length": 249.95703125,
"completions/min_length": 82.0,
"completions/min_terminated_length": 82.0,
"epoch": 0.1216,
"grad_norm": 0.0028793588280677795,
"learning_rate": 2.388888888888889e-06,
"loss": 0.14,
"num_tokens": 24688011.0,
"reward": 0.9961664080619812,
"reward_std": 0.011282390914857388,
"rewards/accuracy_reward_step": 0.140625,
"rewards/final_brier_reward_step": 0.8556140661239624,
"rewards/format_reward_step": 0.99609375,
"step": 114
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8542553428560495,
"aux_distill/mean_u": 0.3127482086279962,
"aux_distill/n_active_tok": 194.0,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5236378205128205,
"calib/avg_num_step_conf": 6.12890625,
"calib/ece": 0.1866796875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0010416666666666669,
"calib/mean_conf": 0.0008203125000000001,
"calib/mu_c": 0.0016666666666666668,
"calib/mu_w": 0.000625,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.0036054073836868632,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.328165543071161,
"calib/step_q_c_n": 267.0,
"calib/step_q_gap": 0.008849874868396057,
"calib/step_q_w": 0.31931566820276497,
"calib/step_q_w_n": 1302.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 644.0,
"completions/max_terminated_length": 644.0,
"completions/mean_length": 248.51953125,
"completions/mean_terminated_length": 249.49412536621094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 39.0,
"epoch": 0.12266666666666666,
"grad_norm": 0.00394708476960659,
"learning_rate": 2.361111111111111e-06,
"loss": 0.071,
"num_tokens": 24880704.0,
"reward": 0.9963996410369873,
"reward_std": 0.011927313171327114,
"rewards/accuracy_reward_step": 0.1875,
"rewards/final_brier_reward_step": 0.8092054128646851,
"rewards/format_reward_step": 0.99609375,
"step": 115
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8351747281849384,
"aux_distill/mean_u": 0.3194099311432214,
"aux_distill/n_active_tok": 229.125,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5019817073170731,
"calib/avg_num_step_conf": 7.16015625,
"calib/ece": 0.1891304347826087,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 3.963414634146345e-05,
"calib/mean_conf": 0.0005928853754940711,
"calib/mu_c": 0.000625,
"calib/mu_w": 0.0005853658536585366,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0023616394065280088,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.28326771653543303,
"calib/step_q_c_n": 254.0,
"calib/step_q_gap": -0.04504482304658097,
"calib/step_q_w": 0.328312539582014,
"calib/step_q_w_n": 1579.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2653.0,
"completions/max_terminated_length": 2653.0,
"completions/mean_length": 287.15625,
"completions/mean_terminated_length": 287.15625,
"completions/min_length": 73.0,
"completions/min_terminated_length": 73.0,
"epoch": 0.12373333333333333,
"grad_norm": 0.0038886968977749348,
"learning_rate": 2.3333333333333336e-06,
"loss": 0.126,
"num_tokens": 25082544.0,
"reward": 0.9825363159179688,
"reward_std": 0.04223396256566048,
"rewards/accuracy_reward_step": 0.19140625,
"rewards/final_brier_reward_step": 0.7931976318359375,
"rewards/format_reward_step": 0.98046875,
"step": 116
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8338127043098211,
"aux_distill/mean_u": 0.28180083732390127,
"aux_distill/n_active_tok": 213.5,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5284824548078383,
"calib/avg_num_step_conf": 6.73046875,
"calib/ece": 0.11273046875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0009343764241227404,
"calib/mean_conf": 0.0005507812500000001,
"calib/mu_c": 0.0013793103448275863,
"calib/mu_w": 0.00044493392070484586,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0024393741542962284,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.34746341463414643,
"calib/step_q_c_n": 205.0,
"calib/step_q_gap": 0.037617367203316376,
"calib/step_q_w": 0.30984604743083005,
"calib/step_q_w_n": 1518.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 650.0,
"completions/max_terminated_length": 650.0,
"completions/mean_length": 268.328125,
"completions/mean_terminated_length": 269.3804016113281,
"completions/min_length": 0.0,
"completions/min_terminated_length": 90.0,
"epoch": 0.1248,
"grad_norm": 0.0032077496871352196,
"learning_rate": 2.305555555555556e-06,
"loss": 0.0736,
"num_tokens": 25281644.0,
"reward": 1.0001530647277832,
"reward_std": 0.0004437759052962065,
"rewards/accuracy_reward_step": 0.11328125,
"rewards/final_brier_reward_step": 0.887024998664856,
"rewards/format_reward_step": 1.0,
"step": 117
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8412914387881756,
"aux_distill/mean_u": 0.28512050657689647,
"aux_distill/n_active_tok": 187.875,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4725,
"calib/avg_num_step_conf": 5.921875,
"calib/ece": 0.2183203125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00055,
"calib/mean_conf": 0.0004296875,
"calib/mu_c": 0.0,
"calib/mu_w": 0.00055,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.002027866773815221,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.27316613418530356,
"calib/step_q_c_n": 313.0,
"calib/step_q_gap": -0.02819088992109714,
"calib/step_q_w": 0.3013570241064007,
"calib/step_q_w_n": 1203.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 631.0,
"completions/max_terminated_length": 631.0,
"completions/mean_length": 249.84375,
"completions/mean_terminated_length": 250.82354736328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 76.0,
"epoch": 0.12586666666666665,
"grad_norm": 0.0025783516466617584,
"learning_rate": 2.277777777777778e-06,
"loss": 0.0849,
"num_tokens": 25473420.0,
"reward": 0.9999978542327881,
"reward_std": 5.317016984918155e-06,
"rewards/accuracy_reward_step": 0.21875,
"rewards/final_brier_reward_step": 0.7812457084655762,
"rewards/format_reward_step": 1.0,
"step": 118
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9055172242224216,
"aux_distill/mean_u": 0.35845761656511016,
"aux_distill/n_active_tok": 194.5,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5418560606060605,
"calib/avg_num_step_conf": 6.1328125,
"calib/ece": 0.1398828125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0007525252525252525,
"calib/mean_conf": 0.0007421875,
"calib/mu_c": 0.001388888888888889,
"calib/mu_w": 0.0006363636363636364,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0030355860578879573,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.300391061452514,
"calib/step_q_c_n": 179.0,
"calib/step_q_gap": 0.007846848655964744,
"calib/step_q_w": 0.29254421279654924,
"calib/step_q_w_n": 1391.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 877.0,
"completions/max_terminated_length": 877.0,
"completions/mean_length": 263.62890625,
"completions/mean_terminated_length": 264.6627502441406,
"completions/min_length": 0.0,
"completions/min_terminated_length": 59.0,
"epoch": 0.12693333333333334,
"grad_norm": 0.004290780518203974,
"learning_rate": 2.25e-06,
"loss": 0.1046,
"num_tokens": 25669781.0,
"reward": 1.0001903772354126,
"reward_std": 0.000559728650841862,
"rewards/accuracy_reward_step": 0.140625,
"rewards/final_brier_reward_step": 0.8597558736801147,
"rewards/format_reward_step": 1.0,
"step": 119
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.873245045542717,
"aux_distill/mean_u": 0.33521150432676294,
"aux_distill/n_active_tok": 182.125,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5024099441907661,
"calib/avg_num_step_conf": 5.69140625,
"calib/ece": 0.14054901960784313,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -8.371385083713861e-05,
"calib/mean_conf": 0.000627450980392157,
"calib/mu_c": 0.0005555555555555556,
"calib/mu_w": 0.0006392694063926942,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.003003010152012023,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3134090909090909,
"calib/step_q_c_n": 176.0,
"calib/step_q_gap": -0.009096763891845827,
"calib/step_q_w": 0.32250585480093674,
"calib/step_q_w_n": 1281.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2675.0,
"completions/max_terminated_length": 2675.0,
"completions/mean_length": 248.625,
"completions/mean_terminated_length": 248.625,
"completions/min_length": 84.0,
"completions/min_terminated_length": 84.0,
"epoch": 0.128,
"grad_norm": 0.0037139912601560354,
"learning_rate": 2.222222222222222e-06,
"loss": 0.1296,
"num_tokens": 25863925.0,
"reward": 0.9961671829223633,
"reward_std": 0.0112801818177104,
"rewards/accuracy_reward_step": 0.140625,
"rewards/final_brier_reward_step": 0.8556156158447266,
"rewards/format_reward_step": 0.99609375,
"step": 120
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8498683087527752,
"aux_distill/mean_u": 0.3374592906505771,
"aux_distill/n_active_tok": 214.875,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4713458584426326,
"calib/avg_num_step_conf": 6.73828125,
"calib/ece": 0.1516015625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0005730828311473473,
"calib/mean_conf": 0.0007421875,
"calib/mu_c": 0.0002564102564102564,
"calib/mu_w": 0.0008294930875576037,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0026212654796574403,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3189316239316239,
"calib/step_q_c_n": 234.0,
"calib/step_q_gap": 0.015775285903454894,
"calib/step_q_w": 0.303156338028169,
"calib/step_q_w_n": 1491.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 715.0,
"completions/max_terminated_length": 715.0,
"completions/mean_length": 270.0703125,
"completions/mean_terminated_length": 271.1294250488281,
"completions/min_length": 0.0,
"completions/min_terminated_length": 90.0,
"epoch": 0.12906666666666666,
"grad_norm": 0.0026788460090756416,
"learning_rate": 2.1944444444444445e-06,
"loss": 0.0897,
"num_tokens": 26061927.0,
"reward": 1.000035285949707,
"reward_std": 0.00011826898844446987,
"rewards/accuracy_reward_step": 0.15234375,
"rewards/final_brier_reward_step": 0.8477269411087036,
"rewards/format_reward_step": 1.0,
"step": 121
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8673976100981236,
"aux_distill/mean_u": 0.3998442732842434,
"aux_distill/n_active_tok": 191.5,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5096590909090909,
"calib/avg_num_step_conf": 6.01171875,
"calib/ece": 0.139921875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0001515151515151514,
"calib/mean_conf": 0.000703125,
"calib/mu_c": 0.0008333333333333333,
"calib/mu_w": 0.0006818181818181819,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0027052015145594974,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.29958333333333337,
"calib/step_q_c_n": 192.0,
"calib/step_q_gap": 0.029442279138827077,
"calib/step_q_w": 0.2701410541945063,
"calib/step_q_w_n": 1347.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 707.0,
"completions/max_terminated_length": 707.0,
"completions/mean_length": 250.0,
"completions/mean_terminated_length": 250.98040771484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.13013333333333332,
"grad_norm": 0.004579862579703331,
"learning_rate": 2.166666666666667e-06,
"loss": 0.0859,
"num_tokens": 26257079.0,
"reward": 1.0001132488250732,
"reward_std": 0.00033672014251351357,
"rewards/accuracy_reward_step": 0.140625,
"rewards/final_brier_reward_step": 0.8596014976501465,
"rewards/format_reward_step": 1.0,
"step": 122
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8707848694175482,
"aux_distill/mean_u": 0.35850991461610887,
"aux_distill/n_active_tok": 193.375,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5176339285714285,
"calib/avg_num_step_conf": 6.0859375,
"calib/ece": 0.2180859375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0005214285714285714,
"calib/mean_conf": 0.0006640624999999999,
"calib/mu_c": 0.0010714285714285715,
"calib/mu_w": 0.00055,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.002642138527044665,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.28770392749244716,
"calib/step_q_c_n": 331.0,
"calib/step_q_gap": -0.0071442387667215446,
"calib/step_q_w": 0.2948481662591687,
"calib/step_q_w_n": 1227.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 834.0,
"completions/max_terminated_length": 834.0,
"completions/mean_length": 258.71484375,
"completions/mean_terminated_length": 259.72943115234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 81.0,
"epoch": 0.1312,
"grad_norm": 0.00448179617524147,
"learning_rate": 2.138888888888889e-06,
"loss": 0.0866,
"num_tokens": 26452406.0,
"reward": 1.0002305507659912,
"reward_std": 0.0006628651753999293,
"rewards/accuracy_reward_step": 0.21875,
"rewards/final_brier_reward_step": 0.7817113399505615,
"rewards/format_reward_step": 1.0,
"step": 123
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8901912048459053,
"aux_distill/mean_u": 0.2978366075901536,
"aux_distill/n_active_tok": 171.25,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4705912438625205,
"calib/avg_num_step_conf": 5.37890625,
"calib/ece": 0.18329411764705883,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0007283142389525369,
"calib/mean_conf": 0.001019607843137255,
"calib/mu_c": 0.000425531914893617,
"calib/mu_w": 0.001153846153846154,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.003506242030942292,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.28164502164502164,
"calib/step_q_c_n": 231.0,
"calib/step_q_gap": -0.03583316334625236,
"calib/step_q_w": 0.317478184991274,
"calib/step_q_w_n": 1146.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 548.0,
"completions/max_terminated_length": 548.0,
"completions/mean_length": 234.97265625,
"completions/mean_terminated_length": 235.89413452148438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 76.0,
"epoch": 0.13226666666666667,
"grad_norm": 0.0037744231522083282,
"learning_rate": 2.1111111111111114e-06,
"loss": 0.0938,
"num_tokens": 26643183.0,
"reward": 0.9981184005737305,
"reward_std": 0.005757684353739023,
"rewards/accuracy_reward_step": 0.1875,
"rewards/final_brier_reward_step": 0.8126429915428162,
"rewards/format_reward_step": 0.99609375,
"step": 124
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8706583678722382,
"aux_distill/mean_u": 0.3025617848825858,
"aux_distill/n_active_tok": 168.875,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4990793780687397,
"calib/avg_num_step_conf": 5.32421875,
"calib/ece": 0.18341176470588236,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -6.239770867430441e-05,
"calib/mean_conf": 0.0009019607843137254,
"calib/mu_c": 0.000851063829787234,
"calib/mu_w": 0.0009134615384615384,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.002998397624096514,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.33955284552845527,
"calib/step_q_c_n": 246.0,
"calib/step_q_gap": 0.035713991455044336,
"calib/step_q_w": 0.30383885407341094,
"calib/step_q_w_n": 1117.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 762.0,
"completions/max_terminated_length": 762.0,
"completions/mean_length": 235.75,
"completions/mean_terminated_length": 236.67453002929688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 72.0,
"epoch": 0.13333333333333333,
"grad_norm": 0.005619540344923735,
"learning_rate": 2.0833333333333334e-06,
"loss": 0.1079,
"num_tokens": 26832151.0,
"reward": 0.9845265746116638,
"reward_std": 0.04456701502203941,
"rewards/accuracy_reward_step": 0.18359375,
"rewards/final_brier_reward_step": 0.8010843396186829,
"rewards/format_reward_step": 0.984375,
"step": 125
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8616320062428713,
"aux_distill/mean_u": 0.3353064855944266,
"aux_distill/n_active_tok": 185.75,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.49109924343569206,
"calib/avg_num_step_conf": 5.83203125,
"calib/ece": 0.1634375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00017801513128615936,
"calib/mean_conf": 0.0006250000000000001,
"calib/mu_c": 0.0004761904761904762,
"calib/mu_w": 0.0006542056074766356,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0024206145913796356,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3123214285714285,
"calib/step_q_c_n": 224.0,
"calib/step_q_gap": -0.019685663627153016,
"calib/step_q_w": 0.33200709219858154,
"calib/step_q_w_n": 1269.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 610.0,
"completions/max_terminated_length": 610.0,
"completions/mean_length": 248.95703125,
"completions/mean_terminated_length": 249.933349609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 81.0,
"epoch": 0.1344,
"grad_norm": 0.0028145809192210436,
"learning_rate": 2.0555555555555555e-06,
"loss": 0.0784,
"num_tokens": 27025156.0,
"reward": 1.0000749826431274,
"reward_std": 0.00022589563741348684,
"rewards/accuracy_reward_step": 0.1640625,
"rewards/final_brier_reward_step": 0.8360875248908997,
"rewards/format_reward_step": 1.0,
"step": 126
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8615944497287273,
"aux_distill/mean_u": 0.3128397979115316,
"aux_distill/n_active_tok": 171.5,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4927832830676432,
"calib/avg_num_step_conf": 5.359375,
"calib/ece": 0.17175686274509805,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00035308056872037914,
"calib/mean_conf": 0.000792156862745098,
"calib/mu_c": 0.0005,
"calib/mu_w": 0.0008530805687203792,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.0028311878707898927,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.30305699481865284,
"calib/step_q_c_n": 193.0,
"calib/step_q_gap": -0.027531385164383637,
"calib/step_q_w": 0.3305883799830365,
"calib/step_q_w_n": 1179.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2177.0,
"completions/max_terminated_length": 2177.0,
"completions/mean_length": 234.6328125,
"completions/mean_terminated_length": 234.6328125,
"completions/min_length": 54.0,
"completions/min_terminated_length": 54.0,
"epoch": 0.13546666666666668,
"grad_norm": 0.004149171058088541,
"learning_rate": 2.027777777777778e-06,
"loss": 0.1157,
"num_tokens": 27212702.0,
"reward": 0.9922691583633423,
"reward_std": 0.022253649309277534,
"rewards/accuracy_reward_step": 0.171875,
"rewards/final_brier_reward_step": 0.8204757571220398,
"rewards/format_reward_step": 0.9921875,
"step": 127
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.896057328209281,
"aux_distill/mean_u": 0.3779112387709184,
"aux_distill/n_active_tok": 167.125,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4755381604696673,
"calib/avg_num_step_conf": 5.25390625,
"calib/ece": 0.13710905511811025,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00046451402478799736,
"calib/mean_conf": 0.000686220472440945,
"calib/mu_c": 0.00028571428571428574,
"calib/mu_w": 0.0007502283105022831,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.002508946018674027,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2608588957055215,
"calib/step_q_c_n": 163.0,
"calib/step_q_gap": -0.025587804802092717,
"calib/step_q_w": 0.2864467005076142,
"calib/step_q_w_n": 1182.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 664.0,
"completions/max_terminated_length": 664.0,
"completions/mean_length": 235.07421875,
"completions/mean_terminated_length": 235.99609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 77.0,
"epoch": 0.13653333333333334,
"grad_norm": 0.0037468273658305407,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0824,
"num_tokens": 27403353.0,
"reward": 0.9922232627868652,
"reward_std": 0.022215476259589195,
"rewards/accuracy_reward_step": 0.13671875,
"rewards/final_brier_reward_step": 0.8555401563644409,
"rewards/format_reward_step": 0.9921875,
"step": 128
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.896693766117096,
"aux_distill/mean_u": 0.3474992860126997,
"aux_distill/n_active_tok": 174.375,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5337232264924806,
"calib/avg_num_step_conf": 5.6015625,
"calib/ece": 0.112483203125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0006555217985720797,
"calib/mean_conf": 0.0007980468749999999,
"calib/mu_c": 0.0013793103448275863,
"calib/mu_w": 0.0007237885462555066,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0028335662596457377,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.33808641975308645,
"calib/step_q_c_n": 162.0,
"calib/step_q_gap": 0.025081702771954373,
"calib/step_q_w": 0.3130047169811321,
"calib/step_q_w_n": 1272.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 875.0,
"completions/max_terminated_length": 875.0,
"completions/mean_length": 232.25390625,
"completions/mean_terminated_length": 233.1647186279297,
"completions/min_length": 0.0,
"completions/min_terminated_length": 93.0,
"epoch": 0.1376,
"grad_norm": 0.003721039043739438,
"learning_rate": 1.9722222222222224e-06,
"loss": 0.0916,
"num_tokens": 27589002.0,
"reward": 1.0001518726348877,
"reward_std": 0.00037261395482346416,
"rewards/accuracy_reward_step": 0.11328125,
"rewards/final_brier_reward_step": 0.8870225548744202,
"rewards/format_reward_step": 1.0,
"step": 129
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.866528145968914,
"aux_distill/mean_u": 0.3317948903451359,
"aux_distill/n_active_tok": 172.0,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5101449275362319,
"calib/avg_num_step_conf": 5.41796875,
"calib/ece": 0.17891796875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00012194616977225675,
"calib/mean_conf": 0.00076953125,
"calib/mu_c": 0.0008695652173913044,
"calib/mu_w": 0.0007476190476190476,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0030602047162360617,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3190350877192983,
"calib/step_q_c_n": 228.0,
"calib/step_q_gap": -0.0020002876042564677,
"calib/step_q_w": 0.32103537532355475,
"calib/step_q_w_n": 1159.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 554.0,
"completions/max_terminated_length": 554.0,
"completions/mean_length": 229.515625,
"completions/mean_terminated_length": 230.4156951904297,
"completions/min_length": 0.0,
"completions/min_terminated_length": 82.0,
"epoch": 0.13866666666666666,
"grad_norm": 0.0038857655599713326,
"learning_rate": 1.944444444444445e-06,
"loss": 0.0782,
"num_tokens": 27776854.0,
"reward": 1.00015127658844,
"reward_std": 0.00044980537495575845,
"rewards/accuracy_reward_step": 0.1796875,
"rewards/final_brier_reward_step": 0.8206150531768799,
"rewards/format_reward_step": 1.0,
"step": 130
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8693040069192648,
"aux_distill/mean_u": 0.31701613274923535,
"aux_distill/n_active_tok": 191.0,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5135494987468672,
"calib/avg_num_step_conf": 5.98046875,
"calib/ece": 0.10850390625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0006259398496240602,
"calib/mean_conf": 0.00087109375,
"calib/mu_c": 0.0014285714285714286,
"calib/mu_w": 0.0008026315789473684,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.002941318059766903,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.32202797202797206,
"calib/step_q_c_n": 143.0,
"calib/step_q_gap": -0.006143737866360299,
"calib/step_q_w": 0.32817170989433236,
"calib/step_q_w_n": 1388.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 578.0,
"completions/max_terminated_length": 578.0,
"completions/mean_length": 233.84765625,
"completions/mean_terminated_length": 234.7647247314453,
"completions/min_length": 0.0,
"completions/min_terminated_length": 73.0,
"epoch": 0.13973333333333332,
"grad_norm": 0.004119996912777424,
"learning_rate": 1.916666666666667e-06,
"loss": 0.0873,
"num_tokens": 27966735.0,
"reward": 0.9962453246116638,
"reward_std": 0.011495009064674377,
"rewards/accuracy_reward_step": 0.109375,
"rewards/final_brier_reward_step": 0.8870218396186829,
"rewards/format_reward_step": 0.99609375,
"step": 131
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8582376260310411,
"aux_distill/mean_u": 0.368528070301397,
"aux_distill/n_active_tok": 165.125,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.49205236091631605,
"calib/avg_num_step_conf": 5.1875,
"calib/ece": 0.2699607843137255,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00026414212248714356,
"calib/mean_conf": 0.0006274509803921569,
"calib/mu_c": 0.00043478260869565214,
"calib/mu_w": 0.0006989247311827957,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0028694522556747793,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.29436666666666667,
"calib/step_q_c_n": 300.0,
"calib/step_q_gap": -0.045698508430609575,
"calib/step_q_w": 0.34006517509727624,
"calib/step_q_w_n": 1028.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 546.0,
"completions/max_terminated_length": 546.0,
"completions/mean_length": 222.0234375,
"completions/mean_terminated_length": 222.89413452148438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 70.0,
"epoch": 0.1408,
"grad_norm": 0.004884951747953892,
"learning_rate": 1.888888888888889e-06,
"loss": 0.0881,
"num_tokens": 28152973.0,
"reward": 0.9962066411972046,
"reward_std": 0.011387603357434273,
"rewards/accuracy_reward_step": 0.26953125,
"rewards/final_brier_reward_step": 0.7267882823944092,
"rewards/format_reward_step": 0.99609375,
"step": 132
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8835033662617207,
"aux_distill/mean_u": 0.35925395969114193,
"aux_distill/n_active_tok": 184.25,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5800740740740742,
"calib/avg_num_step_conf": 5.84765625,
"calib/ece": 0.11666666666666667,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0015333333333333336,
"calib/mean_conf": 0.000980392156862745,
"calib/mu_c": 0.0023333333333333335,
"calib/mu_w": 0.0007999999999999999,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.003226667047956739,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.3530057803468208,
"calib/step_q_c_n": 173.0,
"calib/step_q_gap": 0.008497321132319324,
"calib/step_q_w": 0.34450845921450146,
"calib/step_q_w_n": 1324.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 559.0,
"completions/max_terminated_length": 559.0,
"completions/mean_length": 238.609375,
"completions/mean_terminated_length": 239.54510498046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 76.0,
"epoch": 0.14186666666666667,
"grad_norm": 0.00523751974105835,
"learning_rate": 1.8611111111111113e-06,
"loss": 0.0948,
"num_tokens": 28344209.0,
"reward": 0.992455244064331,
"reward_std": 0.022799167782068253,
"rewards/accuracy_reward_step": 0.1171875,
"rewards/final_brier_reward_step": 0.8755354881286621,
"rewards/format_reward_step": 0.9921875,
"step": 133
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8557330705225468,
"aux_distill/mean_u": 0.3506175068699858,
"aux_distill/n_active_tok": 211.25,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.49714787187362874,
"calib/avg_num_step_conf": 6.6015625,
"calib/ece": 0.16788235294117648,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -5.704256252742426e-05,
"calib/mean_conf": 0.0007450980392156863,
"calib/mu_c": 0.0006976744186046512,
"calib/mu_w": 0.0007547169811320754,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0026259873008287384,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.32928464419475656,
"calib/step_q_c_n": 267.0,
"calib/step_q_gap": 0.025691109409092472,
"calib/step_q_w": 0.3035935347856641,
"calib/step_q_w_n": 1423.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2884.0,
"completions/max_terminated_length": 2884.0,
"completions/mean_length": 266.01171875,
"completions/mean_terminated_length": 266.01171875,
"completions/min_length": 83.0,
"completions/min_terminated_length": 83.0,
"epoch": 0.14293333333333333,
"grad_norm": 0.003346212673932314,
"learning_rate": 1.8333333333333333e-06,
"loss": 0.1081,
"num_tokens": 28545068.0,
"reward": 0.9962071776390076,
"reward_std": 0.011385188437998295,
"rewards/accuracy_reward_step": 0.16796875,
"rewards/final_brier_reward_step": 0.8283519744873047,
"rewards/format_reward_step": 0.99609375,
"step": 134
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9089143630117178,
"aux_distill/mean_u": 0.32230048849550913,
"aux_distill/n_active_tok": 173.875,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5025172231054584,
"calib/avg_num_step_conf": 5.43359375,
"calib/ece": 0.13230078125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 8.823529411764721e-05,
"calib/mean_conf": 0.00051171875,
"calib/mu_c": 0.0005882352941176471,
"calib/mu_w": 0.0004999999999999999,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0021954897337264955,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.29726775956284146,
"calib/step_q_c_n": 183.0,
"calib/step_q_gap": -0.004379591430536012,
"calib/step_q_w": 0.30164735099337747,
"calib/step_q_w_n": 1208.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 719.0,
"completions/max_terminated_length": 719.0,
"completions/mean_length": 240.73828125,
"completions/mean_terminated_length": 241.682373046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 83.0,
"epoch": 0.144,
"grad_norm": 0.004149943124502897,
"learning_rate": 1.8055555555555557e-06,
"loss": 0.0905,
"num_tokens": 28736385.0,
"reward": 0.9961693286895752,
"reward_std": 0.011272847652435303,
"rewards/accuracy_reward_step": 0.1328125,
"rewards/final_brier_reward_step": 0.8634324073791504,
"rewards/format_reward_step": 0.99609375,
"step": 135
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8809623774141073,
"aux_distill/mean_u": 0.3241061942736305,
"aux_distill/n_active_tok": 183.375,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5138349514563106,
"calib/avg_num_step_conf": 5.75,
"calib/ece": 0.1945546875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0003009708737864077,
"calib/mean_conf": 0.0007578125,
"calib/mu_c": 0.001,
"calib/mu_w": 0.0006990291262135923,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.002628705996273404,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.30015267175572524,
"calib/step_q_c_n": 262.0,
"calib/step_q_gap": -0.03536790675667145,
"calib/step_q_w": 0.3355205785123967,
"calib/step_q_w_n": 1210.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 537.0,
"completions/max_terminated_length": 537.0,
"completions/mean_length": 235.9609375,
"completions/mean_terminated_length": 236.88629150390625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 71.0,
"epoch": 0.14506666666666668,
"grad_norm": 0.003267818596214056,
"learning_rate": 1.777777777777778e-06,
"loss": 0.0877,
"num_tokens": 28929087.0,
"reward": 1.000191569328308,
"reward_std": 0.00047984960838221014,
"rewards/accuracy_reward_step": 0.1953125,
"rewards/final_brier_reward_step": 0.8050706386566162,
"rewards/format_reward_step": 1.0,
"step": 136
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8728037420660257,
"aux_distill/mean_u": 0.37050528594314014,
"aux_distill/n_active_tok": 176.25,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5345982142857143,
"calib/avg_num_step_conf": 5.55078125,
"calib/ece": 0.12390625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0012500000000000002,
"calib/mean_conf": 0.00109375,
"calib/mu_c": 0.0021875,
"calib/mu_w": 0.0009375000000000001,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0035869500885153116,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3929787234042554,
"calib/step_q_c_n": 141.0,
"calib/step_q_gap": 0.051128723404255394,
"calib/step_q_w": 0.34185,
"calib/step_q_w_n": 1280.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 791.0,
"completions/max_terminated_length": 791.0,
"completions/mean_length": 237.84765625,
"completions/mean_terminated_length": 238.78041076660156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 54.0,
"epoch": 0.14613333333333334,
"grad_norm": 0.004397119395434856,
"learning_rate": 1.75e-06,
"loss": 0.0846,
"num_tokens": 29120768.0,
"reward": 0.996360182762146,
"reward_std": 0.011717451736330986,
"rewards/accuracy_reward_step": 0.125,
"rewards/final_brier_reward_step": 0.8716264963150024,
"rewards/format_reward_step": 0.99609375,
"step": 137
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8349598366767168,
"aux_distill/mean_u": 0.2957780477687902,
"aux_distill/n_active_tok": 177.125,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4908908574293832,
"calib/avg_num_step_conf": 5.53515625,
"calib/ece": 0.24215686274509804,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00022981781714858762,
"calib/mean_conf": 0.000980392156862745,
"calib/mu_c": 0.0008064516129032258,
"calib/mu_w": 0.0010362694300518134,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.003102751442329801,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3777852348993288,
"calib/step_q_c_n": 298.0,
"calib/step_q_gap": 0.015041445801920406,
"calib/step_q_w": 0.3627437890974084,
"calib/step_q_w_n": 1119.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 519.0,
"completions/max_terminated_length": 519.0,
"completions/mean_length": 222.1328125,
"completions/mean_terminated_length": 223.00393676757812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 70.0,
"epoch": 0.1472,
"grad_norm": 0.004196003545075655,
"learning_rate": 1.7222222222222224e-06,
"loss": 0.0751,
"num_tokens": 29305778.0,
"reward": 0.996283769607544,
"reward_std": 0.011531622149050236,
"rewards/accuracy_reward_step": 0.2421875,
"rewards/final_brier_reward_step": 0.7542863488197327,
"rewards/format_reward_step": 0.99609375,
"step": 138
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8856425620615482,
"aux_distill/mean_u": 0.3184689657209526,
"aux_distill/n_active_tok": 159.5,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5523317435082141,
"calib/avg_num_step_conf": 5.015625,
"calib/ece": 0.1313671875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0010466348701642816,
"calib/mean_conf": 0.0014453125,
"calib/mu_c": 0.002352941176470588,
"calib/mu_w": 0.0013063063063063064,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0035162759813961914,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.4230817610062893,
"calib/step_q_c_n": 159.0,
"calib/step_q_gap": 0.05747598322851155,
"calib/step_q_w": 0.36560577777777775,
"calib/step_q_w_n": 1125.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 549.0,
"completions/max_terminated_length": 549.0,
"completions/mean_length": 211.58203125,
"completions/mean_terminated_length": 212.41177368164062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 65.0,
"epoch": 0.14826666666666666,
"grad_norm": 0.004312260076403618,
"learning_rate": 1.6944444444444446e-06,
"loss": 0.0947,
"num_tokens": 29486847.0,
"reward": 1.0003052949905396,
"reward_std": 0.0006617589388042688,
"rewards/accuracy_reward_step": 0.1328125,
"rewards/final_brier_reward_step": 0.8677979707717896,
"rewards/format_reward_step": 1.0,
"step": 139
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8976630344986916,
"aux_distill/mean_u": 0.3117378416124111,
"aux_distill/n_active_tok": 159.625,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4712398585396297,
"calib/avg_num_step_conf": 5.0078125,
"calib/ece": 0.17905098039215686,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0003101726648637403,
"calib/mean_conf": 0.0013411764705882354,
"calib/mu_c": 0.0010869565217391304,
"calib/mu_w": 0.0013971291866028707,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.003510945153490569,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.2988267326732673,
"calib/step_q_c_n": 202.0,
"calib/step_q_gap": -0.02715474880821417,
"calib/step_q_w": 0.3259814814814815,
"calib/step_q_w_n": 1080.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 488.0,
"completions/max_terminated_length": 488.0,
"completions/mean_length": 210.6484375,
"completions/mean_terminated_length": 211.47451782226562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 77.0,
"epoch": 0.14933333333333335,
"grad_norm": 0.005486528854817152,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.0883,
"num_tokens": 29669597.0,
"reward": 0.9923757910728455,
"reward_std": 0.022655244916677475,
"rewards/accuracy_reward_step": 0.1796875,
"rewards/final_brier_reward_step": 0.8128765821456909,
"rewards/format_reward_step": 0.9921875,
"step": 140
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.851903609931469,
"aux_distill/mean_u": 0.3203507632368131,
"aux_distill/n_active_tok": 169.375,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5276381909547738,
"calib/avg_num_step_conf": 5.35546875,
"calib/ece": 0.2209765625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0007731640659437539,
"calib/mean_conf": 0.0016796875,
"calib/mu_c": 0.002280701754385965,
"calib/mu_w": 0.0015075376884422112,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.004135278092504027,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3629182879377432,
"calib/step_q_c_n": 257.0,
"calib/step_q_gap": 0.04536891630399098,
"calib/step_q_w": 0.31754937163375224,
"calib/step_q_w_n": 1114.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 565.0,
"completions/max_terminated_length": 565.0,
"completions/mean_length": 225.58203125,
"completions/mean_terminated_length": 226.4666748046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 80.0,
"epoch": 0.1504,
"grad_norm": 0.0052869487553834915,
"learning_rate": 1.638888888888889e-06,
"loss": 0.1107,
"num_tokens": 29858250.0,
"reward": 1.000497817993164,
"reward_std": 0.0012639259221032262,
"rewards/accuracy_reward_step": 0.22265625,
"rewards/final_brier_reward_step": 0.7783395051956177,
"rewards/format_reward_step": 1.0,
"step": 141
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8793974351137877,
"aux_distill/mean_u": 0.32945197865296405,
"aux_distill/n_active_tok": 163.5,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5704353772509688,
"calib/avg_num_step_conf": 5.171875,
"calib/ece": 0.15894117647058825,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0015819466605881012,
"calib/mean_conf": 0.001843137254901961,
"calib/mu_c": 0.0031707317073170734,
"calib/mu_w": 0.0015887850467289721,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.004074658888740561,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3137755102040816,
"calib/step_q_c_n": 196.0,
"calib/step_q_gap": -0.01651704298740775,
"calib/step_q_w": 0.33029255319148937,
"calib/step_q_w_n": 1128.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 682.0,
"completions/max_terminated_length": 682.0,
"completions/mean_length": 211.09765625,
"completions/mean_terminated_length": 211.92550659179688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 85.0,
"epoch": 0.15146666666666667,
"grad_norm": 0.0058931345120072365,
"learning_rate": 1.6111111111111113e-06,
"loss": 0.0805,
"num_tokens": 30041259.0,
"reward": 0.9965915679931641,
"reward_std": 0.012145813554525375,
"rewards/accuracy_reward_step": 0.16015625,
"rewards/final_brier_reward_step": 0.8369331955909729,
"rewards/format_reward_step": 0.99609375,
"step": 142
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8862555511295795,
"aux_distill/mean_u": 0.3019375839277697,
"aux_distill/n_active_tok": 156.75,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5765437392795882,
"calib/avg_num_step_conf": 4.96484375,
"calib/ece": 0.1699453125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.001512006861063465,
"calib/mean_conf": 0.0019296875,
"calib/mu_c": 0.003181818181818182,
"calib/mu_w": 0.0016698113207547168,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.004032453490412971,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.31093750000000003,
"calib/step_q_c_n": 192.0,
"calib/step_q_gap": 0.005059835495829501,
"calib/step_q_w": 0.30587766450417053,
"calib/step_q_w_n": 1079.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 719.0,
"completions/max_terminated_length": 719.0,
"completions/mean_length": 215.78515625,
"completions/mean_terminated_length": 216.63137817382812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 70.0,
"epoch": 0.15253333333333333,
"grad_norm": 0.00680437870323658,
"learning_rate": 1.5833333333333333e-06,
"loss": 0.0785,
"num_tokens": 30227644.0,
"reward": 0.9966306686401367,
"reward_std": 0.01232366356998682,
"rewards/accuracy_reward_step": 0.171875,
"rewards/final_brier_reward_step": 0.8252924680709839,
"rewards/format_reward_step": 0.99609375,
"step": 143
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8676097318530083,
"aux_distill/mean_u": 0.2938534331536071,
"aux_distill/n_active_tok": 159.875,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4773992793973141,
"calib/avg_num_step_conf": 5.015625,
"calib/ece": 0.1660546875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00034392400917130717,
"calib/mean_conf": 0.0019140625,
"calib/mu_c": 0.0016279069767441861,
"calib/mu_w": 0.0019718309859154933,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.004221461802041296,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.32804123711340205,
"calib/step_q_c_n": 194.0,
"calib/step_q_gap": 0.02995866830606264,
"calib/step_q_w": 0.2980825688073394,
"calib/step_q_w_n": 1090.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 560.0,
"completions/max_terminated_length": 560.0,
"completions/mean_length": 206.1015625,
"completions/mean_terminated_length": 206.90982055664062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 55.0,
"epoch": 0.1536,
"grad_norm": 0.004363170359283686,
"learning_rate": 1.5555555555555558e-06,
"loss": 0.0915,
"num_tokens": 30408342.0,
"reward": 1.00026273727417,
"reward_std": 0.0006111764814704657,
"rewards/accuracy_reward_step": 0.16796875,
"rewards/final_brier_reward_step": 0.8325566053390503,
"rewards/format_reward_step": 1.0,
"step": 144
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.871504008769989,
"aux_distill/mean_u": 0.30711970518687853,
"aux_distill/n_active_tok": 161.875,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5699991183990125,
"calib/avg_num_step_conf": 5.12109375,
"calib/ece": 0.21972265625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0014169972670369391,
"calib/mean_conf": 0.0029335937500000004,
"calib/mu_c": 0.004035087719298246,
"calib/mu_w": 0.0026180904522613065,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005038085842853508,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3683471074380165,
"calib/step_q_c_n": 242.0,
"calib/step_q_gap": 0.05525169116112222,
"calib/step_q_w": 0.3130954162768943,
"calib/step_q_w_n": 1069.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 739.0,
"completions/max_terminated_length": 739.0,
"completions/mean_length": 208.78515625,
"completions/mean_terminated_length": 209.6039276123047,
"completions/min_length": 0.0,
"completions/min_terminated_length": 53.0,
"epoch": 0.15466666666666667,
"grad_norm": 0.006876181345432997,
"learning_rate": 1.527777777777778e-06,
"loss": 0.0934,
"num_tokens": 30588303.0,
"reward": 1.0008814334869385,
"reward_std": 0.0017051099566742778,
"rewards/accuracy_reward_step": 0.22265625,
"rewards/final_brier_reward_step": 0.779106616973877,
"rewards/format_reward_step": 1.0,
"step": 145
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9115830920636654,
"aux_distill/mean_u": 0.3254450969587543,
"aux_distill/n_active_tok": 171.25,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5464257964257965,
"calib/avg_num_step_conf": 5.3828125,
"calib/ece": 0.08310546875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0008799533799533797,
"calib/mean_conf": 0.00283203125,
"calib/mu_c": 0.0036363636363636364,
"calib/mu_w": 0.0027564102564102567,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.004744971575154422,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.31356435643564357,
"calib/step_q_c_n": 101.0,
"calib/step_q_gap": -0.028725228529117575,
"calib/step_q_w": 0.34228958496476114,
"calib/step_q_w_n": 1277.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 685.0,
"completions/max_terminated_length": 685.0,
"completions/mean_length": 219.73828125,
"completions/mean_terminated_length": 220.60000610351562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 51.0,
"epoch": 0.15573333333333333,
"grad_norm": 0.0055837687104940414,
"learning_rate": 1.5e-06,
"loss": 0.0813,
"num_tokens": 30775580.0,
"reward": 1.0002973079681396,
"reward_std": 0.0008225127821788192,
"rewards/accuracy_reward_step": 0.0859375,
"rewards/final_brier_reward_step": 0.914656937122345,
"rewards/format_reward_step": 1.0,
"step": 146
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8444254854694009,
"aux_distill/mean_u": 0.2973344318918232,
"aux_distill/n_active_tok": 157.375,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5471390632544664,
"calib/avg_num_step_conf": 4.9453125,
"calib/ece": 0.14556640625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0012638821825205226,
"calib/mean_conf": 0.0028710937500000004,
"calib/mu_c": 0.003947368421052633,
"calib/mu_w": 0.00268348623853211,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.00488730262299266,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.34029761904761907,
"calib/step_q_c_n": 168.0,
"calib/step_q_gap": 0.017043520686963354,
"calib/step_q_w": 0.3232540983606557,
"calib/step_q_w_n": 1098.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 441.0,
"completions/max_terminated_length": 441.0,
"completions/mean_length": 202.6171875,
"completions/mean_terminated_length": 203.41177368164062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 69.0,
"epoch": 0.1568,
"grad_norm": 0.006587905343621969,
"learning_rate": 1.4722222222222225e-06,
"loss": 0.0974,
"num_tokens": 30954938.0,
"reward": 1.0005698204040527,
"reward_std": 0.0012822604039683938,
"rewards/accuracy_reward_step": 0.1484375,
"rewards/final_brier_reward_step": 0.8527022004127502,
"rewards/format_reward_step": 1.0,
"step": 147
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.865119538269937,
"aux_distill/mean_u": 0.32240494753424903,
"aux_distill/n_active_tok": 151.0,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5549730224982184,
"calib/avg_num_step_conf": 4.80078125,
"calib/ece": 0.18,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0013315687671790694,
"calib/mean_conf": 0.00359375,
"calib/mu_c": 0.004680851063829788,
"calib/mu_w": 0.003349282296650718,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005113458803735491,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3392574257425743,
"calib/step_q_c_n": 202.0,
"calib/step_q_gap": 0.042695595168085465,
"calib/step_q_w": 0.2965618305744888,
"calib/step_q_w_n": 1027.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 502.0,
"completions/max_terminated_length": 502.0,
"completions/mean_length": 190.22265625,
"completions/mean_terminated_length": 190.96864318847656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 66.0,
"epoch": 0.15786666666666666,
"grad_norm": 0.006722502876073122,
"learning_rate": 1.4444444444444445e-06,
"loss": 0.0788,
"num_tokens": 31132555.0,
"reward": 1.0008398294448853,
"reward_std": 0.001604650286026299,
"rewards/accuracy_reward_step": 0.18359375,
"rewards/final_brier_reward_step": 0.8180859684944153,
"rewards/format_reward_step": 1.0,
"step": 148
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8780522029846907,
"aux_distill/mean_u": 0.2941598656144137,
"aux_distill/n_active_tok": 151.75,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5526471458148476,
"calib/avg_num_step_conf": 4.78515625,
"calib/ece": 0.1875390625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0007699891550823237,
"calib/mean_conf": 0.0038671875,
"calib/mu_c": 0.004489795918367348,
"calib/mu_w": 0.003719806763285024,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.005752541685189579,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.32889830508474577,
"calib/step_q_c_n": 236.0,
"calib/step_q_gap": 0.0374008328906103,
"calib/step_q_w": 0.29149747219413547,
"calib/step_q_w_n": 989.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 625.0,
"completions/max_terminated_length": 625.0,
"completions/mean_length": 202.828125,
"completions/mean_terminated_length": 203.62353515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 80.0,
"epoch": 0.15893333333333334,
"grad_norm": 0.0074161868542432785,
"learning_rate": 1.4166666666666667e-06,
"loss": 0.0712,
"num_tokens": 31312743.0,
"reward": 0.9969292879104614,
"reward_std": 0.012576460838317871,
"rewards/accuracy_reward_step": 0.19140625,
"rewards/final_brier_reward_step": 0.8063585758209229,
"rewards/format_reward_step": 0.99609375,
"step": 149
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9232179820537567,
"aux_distill/mean_u": 0.33005514047016793,
"aux_distill/n_active_tok": 138.75,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5525841346153846,
"calib/avg_num_step_conf": 4.453125,
"calib/ece": 0.183,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0011282051282051281,
"calib/mean_conf": 0.0045000000000000005,
"calib/mu_c": 0.005416666666666667,
"calib/mu_w": 0.004288461538461539,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.005700877125495689,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.27097142857142853,
"calib/step_q_c_n": 175.0,
"calib/step_q_gap": -0.00786691339748341,
"calib/step_q_w": 0.27883834196891194,
"calib/step_q_w_n": 965.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 475.0,
"completions/max_terminated_length": 475.0,
"completions/mean_length": 178.2421875,
"completions/mean_terminated_length": 178.94119262695312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 66.0,
"epoch": 0.16,
"grad_norm": 0.007917214184999466,
"learning_rate": 1.3888888888888892e-06,
"loss": 0.1157,
"num_tokens": 31487141.0,
"reward": 0.9970831871032715,
"reward_std": 0.012835890986025333,
"rewards/accuracy_reward_step": 0.1875,
"rewards/final_brier_reward_step": 0.810572624206543,
"rewards/format_reward_step": 0.99609375,
"step": 150
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9045738726854324,
"aux_distill/mean_u": 0.34284381420736626,
"aux_distill/n_active_tok": 151.625,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5800421274354923,
"calib/avg_num_step_conf": 4.7421875,
"calib/ece": 0.17123046875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.002027909426013692,
"calib/mean_conf": 0.00455078125,
"calib/mu_c": 0.006222222222222223,
"calib/mu_w": 0.004194312796208531,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0056330982828855765,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.24280701754385964,
"calib/step_q_c_n": 228.0,
"calib/step_q_gap": -0.052558094018006485,
"calib/step_q_w": 0.2953651115618661,
"calib/step_q_w_n": 986.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 484.0,
"completions/max_terminated_length": 484.0,
"completions/mean_length": 197.1328125,
"completions/mean_terminated_length": 197.90589904785156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 78.0,
"epoch": 0.16106666666666666,
"grad_norm": 0.0069051096215844154,
"learning_rate": 1.3611111111111112e-06,
"loss": 0.0868,
"num_tokens": 31668439.0,
"reward": 1.0010675191879272,
"reward_std": 0.0018596667796373367,
"rewards/accuracy_reward_step": 0.17578125,
"rewards/final_brier_reward_step": 0.8263537883758545,
"rewards/format_reward_step": 1.0,
"step": 151
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8891061600297689,
"aux_distill/mean_u": 0.2850408530933203,
"aux_distill/n_active_tok": 145.375,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5039297658862876,
"calib/avg_num_step_conf": 4.55078125,
"calib/ece": 0.0962109375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 3.678929765886345e-05,
"calib/mean_conf": 0.0053515625,
"calib/mu_c": 0.005384615384615385,
"calib/mu_w": 0.005347826086956522,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005852469889593089,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2187155963302752,
"calib/step_q_c_n": 109.0,
"calib/step_q_gap": -0.051278721851542974,
"calib/step_q_w": 0.2699943181818182,
"calib/step_q_w_n": 1056.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 458.0,
"completions/max_terminated_length": 458.0,
"completions/mean_length": 196.66796875,
"completions/mean_terminated_length": 197.43922424316406,
"completions/min_length": 0.0,
"completions/min_terminated_length": 56.0,
"epoch": 0.16213333333333332,
"grad_norm": 0.0065941475331783295,
"learning_rate": 1.3333333333333334e-06,
"loss": 0.0724,
"num_tokens": 31847986.0,
"reward": 1.0005154609680176,
"reward_std": 0.0011244446504861116,
"rewards/accuracy_reward_step": 0.1015625,
"rewards/final_brier_reward_step": 0.8994683623313904,
"rewards/format_reward_step": 1.0,
"step": 152
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9007142148911953,
"aux_distill/mean_u": 0.36622527358126683,
"aux_distill/n_active_tok": 142.875,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.41713121783876494,
"calib/avg_num_step_conf": 4.5546875,
"calib/ece": 0.1669921875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0017795883361921105,
"calib/mean_conf": 0.0048828125,
"calib/mu_c": 0.003409090909090909,
"calib/mu_w": 0.0051886792452830195,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005658402344287983,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.26424731182795697,
"calib/step_q_c_n": 186.0,
"calib/step_q_gap": -0.027946565723063477,
"calib/step_q_w": 0.29219387755102044,
"calib/step_q_w_n": 980.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 516.0,
"completions/max_terminated_length": 516.0,
"completions/mean_length": 193.578125,
"completions/mean_terminated_length": 194.33726501464844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 74.0,
"epoch": 0.1632,
"grad_norm": 0.007551599759608507,
"learning_rate": 1.3055555555555556e-06,
"loss": 0.0752,
"num_tokens": 32028670.0,
"reward": 1.0005578994750977,
"reward_std": 0.0011114366352558136,
"rewards/accuracy_reward_step": 0.171875,
"rewards/final_brier_reward_step": 0.8292410373687744,
"rewards/format_reward_step": 1.0,
"step": 153
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8881961200386286,
"aux_distill/mean_u": 0.3052273858621972,
"aux_distill/n_active_tok": 140.0,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5098214285714286,
"calib/avg_num_step_conf": 4.390625,
"calib/ece": 0.21355078125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00020214285714285733,
"calib/mean_conf": 0.00519921875,
"calib/mu_c": 0.005357142857142858,
"calib/mu_w": 0.005155000000000001,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005728505969242629,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.32344000000000006,
"calib/step_q_c_n": 250.0,
"calib/step_q_gap": 0.01546334096109847,
"calib/step_q_w": 0.3079766590389016,
"calib/step_q_w_n": 874.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 490.0,
"completions/max_terminated_length": 490.0,
"completions/mean_length": 187.39453125,
"completions/mean_terminated_length": 188.12942504882812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 61.0,
"epoch": 0.16426666666666667,
"grad_norm": 0.008079719729721546,
"learning_rate": 1.2777777777777779e-06,
"loss": 0.089,
"num_tokens": 32204891.0,
"reward": 1.0011417865753174,
"reward_std": 0.0018572057597339153,
"rewards/accuracy_reward_step": 0.21875,
"rewards/final_brier_reward_step": 0.7835339307785034,
"rewards/format_reward_step": 1.0,
"step": 154
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8976440113037825,
"aux_distill/mean_u": 0.2993254863342828,
"aux_distill/n_active_tok": 132.75,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5273582405935346,
"calib/avg_num_step_conf": 4.16015625,
"calib/ece": 0.1275390625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0003630100688924217,
"calib/mean_conf": 0.0052734374999999995,
"calib/mu_c": 0.005588235294117647,
"calib/mu_w": 0.005225225225225226,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.006244015689729948,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2894736842105263,
"calib/step_q_c_n": 152.0,
"calib/step_q_gap": 0.011381679829365332,
"calib/step_q_w": 0.278092004381161,
"calib/step_q_w_n": 913.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 438.0,
"completions/max_terminated_length": 438.0,
"completions/mean_length": 176.953125,
"completions/mean_terminated_length": 177.64706420898438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 62.0,
"epoch": 0.16533333333333333,
"grad_norm": 0.008248928934335709,
"learning_rate": 1.25e-06,
"loss": 0.0824,
"num_tokens": 32381215.0,
"reward": 1.000708818435669,
"reward_std": 0.0015589774120599031,
"rewards/accuracy_reward_step": 0.1328125,
"rewards/final_brier_reward_step": 0.8686050176620483,
"rewards/format_reward_step": 1.0,
"step": 155
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.890786474570632,
"aux_distill/mean_u": 0.2993873599101748,
"aux_distill/n_active_tok": 143.0,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.42251082251082245,
"calib/avg_num_step_conf": 4.48046875,
"calib/ece": 0.0920703125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0017575757575757581,
"calib/mean_conf": 0.0055859375,
"calib/mu_c": 0.004,
"calib/mu_w": 0.005757575757575758,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005559377415331123,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.29452991452991456,
"calib/step_q_c_n": 117.0,
"calib/step_q_gap": 0.0008988465687494451,
"calib/step_q_w": 0.2936310679611651,
"calib/step_q_w_n": 1030.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 457.0,
"completions/max_terminated_length": 457.0,
"completions/mean_length": 185.2421875,
"completions/mean_terminated_length": 185.96864318847656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 51.0,
"epoch": 0.1664,
"grad_norm": 0.0061567374505102634,
"learning_rate": 1.2222222222222223e-06,
"loss": 0.1028,
"num_tokens": 32557205.0,
"reward": 1.0003595352172852,
"reward_std": 0.0009770547039806843,
"rewards/accuracy_reward_step": 0.09765625,
"rewards/final_brier_reward_step": 0.9030628800392151,
"rewards/format_reward_step": 1.0,
"step": 156
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8980565015226603,
"aux_distill/mean_u": 0.3480745046670132,
"aux_distill/n_active_tok": 137.25,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5370686848958333,
"calib/avg_num_step_conf": 4.328125,
"calib/ece": 0.24380859375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0009114583333333344,
"calib/mean_conf": 0.00619140625,
"calib/mu_c": 0.006875000000000001,
"calib/mu_w": 0.0059635416666666665,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.005728090423296488,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.260632911392405,
"calib/step_q_c_n": 237.0,
"calib/step_q_gap": -0.022214390559374575,
"calib/step_q_w": 0.2828473019517796,
"calib/step_q_w_n": 871.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 570.0,
"completions/max_terminated_length": 570.0,
"completions/mean_length": 177.19140625,
"completions/mean_terminated_length": 177.88629150390625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 62.0,
"epoch": 0.16746666666666668,
"grad_norm": 0.007363366428762674,
"learning_rate": 1.1944444444444446e-06,
"loss": 0.0966,
"num_tokens": 32730102.0,
"reward": 0.9977768659591675,
"reward_std": 0.013398206792771816,
"rewards/accuracy_reward_step": 0.25,
"rewards/final_brier_reward_step": 0.7494601011276245,
"rewards/format_reward_step": 0.99609375,
"step": 157
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9132845029234886,
"aux_distill/mean_u": 0.2984792581929277,
"aux_distill/n_active_tok": 133.875,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4822831489498156,
"calib/avg_num_step_conf": 4.21484375,
"calib/ece": 0.2521568627450981,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0004088504088504089,
"calib/mean_conf": 0.006666666666666667,
"calib/mu_c": 0.006363636363636364,
"calib/mu_w": 0.006772486772486773,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.00596284793999944,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3271505791505792,
"calib/step_q_c_n": 259.0,
"calib/step_q_gap": -0.0033006403616159252,
"calib/step_q_w": 0.33045121951219514,
"calib/step_q_w_n": 820.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 483.0,
"completions/max_terminated_length": 483.0,
"completions/mean_length": 173.625,
"completions/mean_terminated_length": 174.30589294433594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 47.0,
"epoch": 0.16853333333333334,
"grad_norm": 0.00829799473285675,
"learning_rate": 1.1666666666666668e-06,
"loss": 0.0762,
"num_tokens": 32903598.0,
"reward": 0.9976944923400879,
"reward_std": 0.013237364590168,
"rewards/accuracy_reward_step": 0.2578125,
"rewards/final_brier_reward_step": 0.7414828538894653,
"rewards/format_reward_step": 0.99609375,
"step": 158
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8752852268517017,
"aux_distill/mean_u": 0.3136494702197504,
"aux_distill/n_active_tok": 139.125,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4699352911896466,
"calib/avg_num_step_conf": 4.34765625,
"calib/ece": 0.18618110236220473,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0007555998008959685,
"calib/mean_conf": 0.006732283464566929,
"calib/mu_c": 0.006122448979591837,
"calib/mu_w": 0.006878048780487806,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.006207626524627117,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3257842105263158,
"calib/step_q_c_n": 190.0,
"calib/step_q_gap": 0.0023574499629355206,
"calib/step_q_w": 0.32342676056338027,
"calib/step_q_w_n": 923.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2594.0,
"completions/max_terminated_length": 2594.0,
"completions/mean_length": 184.484375,
"completions/mean_terminated_length": 184.484375,
"completions/min_length": 51.0,
"completions/min_terminated_length": 51.0,
"epoch": 0.1696,
"grad_norm": 0.008591427467763424,
"learning_rate": 1.138888888888889e-06,
"loss": 0.1353,
"num_tokens": 33079418.0,
"reward": 0.993317723274231,
"reward_std": 0.024074668064713478,
"rewards/accuracy_reward_step": 0.19140625,
"rewards/final_brier_reward_step": 0.8030418157577515,
"rewards/format_reward_step": 0.9921875,
"step": 159
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8801496736705303,
"aux_distill/mean_u": 0.31119047924721255,
"aux_distill/n_active_tok": 137.5,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4530465949820789,
"calib/avg_num_step_conf": 4.30859375,
"calib/ece": 0.113671875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0011039426523297485,
"calib/mean_conf": 0.007421875,
"calib/mu_c": 0.0064516129032258064,
"calib/mu_w": 0.007555555555555555,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.006026775380282146,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.30671641791044774,
"calib/step_q_c_n": 134.0,
"calib/step_q_gap": -0.014521972182431564,
"calib/step_q_w": 0.3212383900928793,
"calib/step_q_w_n": 969.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 557.0,
"completions/max_terminated_length": 557.0,
"completions/mean_length": 171.9765625,
"completions/mean_terminated_length": 172.65098571777344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 69.0,
"epoch": 0.17066666666666666,
"grad_norm": 0.007910117506980896,
"learning_rate": 1.111111111111111e-06,
"loss": 0.0578,
"num_tokens": 33252092.0,
"reward": 0.9968292713165283,
"reward_std": 0.012476676143705845,
"rewards/accuracy_reward_step": 0.12109375,
"rewards/final_brier_reward_step": 0.8764711022377014,
"rewards/format_reward_step": 0.99609375,
"step": 160
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8750440068542957,
"aux_distill/mean_u": 0.29593528663277124,
"aux_distill/n_active_tok": 130.25,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.53564453125,
"calib/avg_num_step_conf": 4.17578125,
"calib/ece": 0.2423984375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.000697916666666667,
"calib/mean_conf": 0.0076015625,
"calib/mu_c": 0.008125,
"calib/mu_w": 0.007427083333333333,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.008348689870787735,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.29059090909090907,
"calib/step_q_c_n": 220.0,
"calib/step_q_gap": -0.03642558089731235,
"calib/step_q_w": 0.3270164899882214,
"calib/step_q_w_n": 849.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 451.0,
"completions/max_terminated_length": 451.0,
"completions/mean_length": 165.85546875,
"completions/mean_terminated_length": 166.50588989257812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 49.0,
"epoch": 0.17173333333333332,
"grad_norm": 0.009085629135370255,
"learning_rate": 1.0833333333333335e-06,
"loss": 0.1112,
"num_tokens": 33422279.0,
"reward": 1.001967430114746,
"reward_std": 0.002947971224784851,
"rewards/accuracy_reward_step": 0.25,
"rewards/final_brier_reward_step": 0.7539349794387817,
"rewards/format_reward_step": 1.0,
"step": 161
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8783328235149384,
"aux_distill/mean_u": 0.2781622598684642,
"aux_distill/n_active_tok": 132.625,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5306853815141108,
"calib/avg_num_step_conf": 4.14453125,
"calib/ece": 0.28325490196078434,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0006921009407197254,
"calib/mean_conf": 0.006941176470588235,
"calib/mu_c": 0.007432432432432433,
"calib/mu_w": 0.0067403314917127075,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005744261466227626,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.346728187919463,
"calib/step_q_c_n": 298.0,
"calib/step_q_gap": 0.05592871216585882,
"calib/step_q_w": 0.2907994757536042,
"calib/step_q_w_n": 763.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1928.0,
"completions/max_terminated_length": 1928.0,
"completions/mean_length": 179.453125,
"completions/mean_terminated_length": 179.453125,
"completions/min_length": 46.0,
"completions/min_terminated_length": 46.0,
"epoch": 0.1728,
"grad_norm": 0.009630361571907997,
"learning_rate": 1.0555555555555557e-06,
"loss": 0.1369,
"num_tokens": 33596171.0,
"reward": 0.9982017278671265,
"reward_std": 0.014269420877099037,
"rewards/accuracy_reward_step": 0.2890625,
"rewards/final_brier_reward_step": 0.7112472653388977,
"rewards/format_reward_step": 0.99609375,
"step": 162
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8860432486981153,
"aux_distill/mean_u": 0.33796958895590445,
"aux_distill/n_active_tok": 136.375,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.49386574074074074,
"calib/avg_num_step_conf": 4.36328125,
"calib/ece": 0.1492578125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0002870370370370353,
"calib/mean_conf": 0.0069921875,
"calib/mu_c": 0.006750000000000001,
"calib/mu_w": 0.007037037037037036,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.006182126573020302,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2718055555555556,
"calib/step_q_c_n": 144.0,
"calib/step_q_gap": -0.03968468082676718,
"calib/step_q_w": 0.31149023638232276,
"calib/step_q_w_n": 973.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 600.0,
"completions/max_terminated_length": 600.0,
"completions/mean_length": 170.16796875,
"completions/mean_terminated_length": 170.83529663085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 58.0,
"epoch": 0.17386666666666667,
"grad_norm": 0.007554793730378151,
"learning_rate": 1.0277777777777777e-06,
"loss": 0.0762,
"num_tokens": 33768374.0,
"reward": 1.0010111331939697,
"reward_std": 0.0017468599835410714,
"rewards/accuracy_reward_step": 0.15625,
"rewards/final_brier_reward_step": 0.8457722663879395,
"rewards/format_reward_step": 1.0,
"step": 163
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8970063086599112,
"aux_distill/mean_u": 0.29121355008947214,
"aux_distill/n_active_tok": 142.875,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5376078132927448,
"calib/avg_num_step_conf": 4.46484375,
"calib/ece": 0.13416862745098038,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0005730593607305946,
"calib/mean_conf": 0.0070078431372549025,
"calib/mu_c": 0.007500000000000001,
"calib/mu_w": 0.006926940639269406,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005982321771629778,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2468181818181818,
"calib/step_q_c_n": 154.0,
"calib/step_q_gap": -0.08941033183196986,
"calib/step_q_w": 0.33622851365015166,
"calib/step_q_w_n": 989.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 620.0,
"completions/max_terminated_length": 620.0,
"completions/mean_length": 179.66796875,
"completions/mean_terminated_length": 180.37255859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 45.0,
"epoch": 0.17493333333333333,
"grad_norm": 0.009981459937989712,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0684,
"num_tokens": 33944313.0,
"reward": 0.9971061944961548,
"reward_std": 0.012966196052730083,
"rewards/accuracy_reward_step": 0.140625,
"rewards/final_brier_reward_step": 0.8574935793876648,
"rewards/format_reward_step": 0.99609375,
"step": 164
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9019099716097116,
"aux_distill/mean_u": 0.33034416956193996,
"aux_distill/n_active_tok": 141.375,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5552083333333333,
"calib/avg_num_step_conf": 4.4375,
"calib/ece": 0.1494140625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0010833333333333337,
"calib/mean_conf": 0.006835937499999999,
"calib/mu_c": 0.00775,
"calib/mu_w": 0.006666666666666666,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005497552500530919,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2841301775147929,
"calib/step_q_c_n": 169.0,
"calib/step_q_gap": -0.03139205619772001,
"calib/step_q_w": 0.3155222337125129,
"calib/step_q_w_n": 967.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 477.0,
"completions/max_terminated_length": 477.0,
"completions/mean_length": 178.21484375,
"completions/mean_terminated_length": 178.9137420654297,
"completions/min_length": 0.0,
"completions/min_terminated_length": 45.0,
"epoch": 0.176,
"grad_norm": 0.008244171738624573,
"learning_rate": 9.722222222222224e-07,
"loss": 0.0962,
"num_tokens": 34119320.0,
"reward": 1.0011725425720215,
"reward_std": 0.0019493226427584887,
"rewards/accuracy_reward_step": 0.15625,
"rewards/final_brier_reward_step": 0.8460949063301086,
"rewards/format_reward_step": 1.0,
"step": 165
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9130820780992508,
"aux_distill/mean_u": 0.34838786578038355,
"aux_distill/n_active_tok": 139.625,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4449762989125383,
"calib/avg_num_step_conf": 4.36328125,
"calib/ece": 0.19944140625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.001243517055488426,
"calib/mean_conf": 0.00758984375,
"calib/mu_c": 0.006603773584905662,
"calib/mu_w": 0.007847290640394088,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005769644971797306,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2882242990654206,
"calib/step_q_c_n": 214.0,
"calib/step_q_gap": -0.02515333105639561,
"calib/step_q_w": 0.3133776301218162,
"calib/step_q_w_n": 903.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 431.0,
"completions/max_terminated_length": 431.0,
"completions/mean_length": 177.10546875,
"completions/mean_terminated_length": 177.8000030517578,
"completions/min_length": 0.0,
"completions/min_terminated_length": 62.0,
"epoch": 0.17706666666666668,
"grad_norm": 0.00669672479853034,
"learning_rate": 9.444444444444445e-07,
"loss": 0.0847,
"num_tokens": 34294651.0,
"reward": 1.001321792602539,
"reward_std": 0.0016973735764622688,
"rewards/accuracy_reward_step": 0.20703125,
"rewards/final_brier_reward_step": 0.7956122159957886,
"rewards/format_reward_step": 1.0,
"step": 166
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9007718153297901,
"aux_distill/mean_u": 0.34749688302032433,
"aux_distill/n_active_tok": 137.875,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5268932038834953,
"calib/avg_num_step_conf": 4.3125,
"calib/ece": 0.188359375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0008038834951456311,
"calib/mean_conf": 0.006953124999999999,
"calib/mu_c": 0.0076,
"calib/mu_w": 0.006796116504854369,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.006002420572933473,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2773591086828539,
"calib/step_q_c_n": 213.0,
"calib/step_q_gap": 0.005353497010575581,
"calib/step_q_w": 0.2720056116722783,
"calib/step_q_w_n": 891.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 652.0,
"completions/max_terminated_length": 652.0,
"completions/mean_length": 175.8671875,
"completions/mean_terminated_length": 176.55686950683594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 44.0,
"epoch": 0.17813333333333334,
"grad_norm": 0.01054567750543356,
"learning_rate": 9.166666666666666e-07,
"loss": 0.0817,
"num_tokens": 34469089.0,
"reward": 0.9936298727989197,
"reward_std": 0.024928880855441093,
"rewards/accuracy_reward_step": 0.1953125,
"rewards/final_brier_reward_step": 0.7997597455978394,
"rewards/format_reward_step": 0.9921875,
"step": 167
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9209919981658459,
"aux_distill/mean_u": 0.3350003984083795,
"aux_distill/n_active_tok": 143.375,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4944485566247224,
"calib/avg_num_step_conf": 4.49609375,
"calib/ece": 0.23817968750000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 2.9772185212599556e-05,
"calib/mean_conf": 0.0079140625,
"calib/mu_c": 0.007936507936507936,
"calib/mu_w": 0.007906735751295337,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005863722345583371,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3273705179282868,
"calib/step_q_c_n": 251.0,
"calib/step_q_gap": 0.016279406817175712,
"calib/step_q_w": 0.3110911111111111,
"calib/step_q_w_n": 900.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 477.0,
"completions/max_terminated_length": 477.0,
"completions/mean_length": 180.86328125,
"completions/mean_terminated_length": 181.5725555419922,
"completions/min_length": 0.0,
"completions/min_terminated_length": 46.0,
"epoch": 0.1792,
"grad_norm": 0.0089962063357234,
"learning_rate": 8.88888888888889e-07,
"loss": 0.1004,
"num_tokens": 34643870.0,
"reward": 1.0019044876098633,
"reward_std": 0.003158072242513299,
"rewards/accuracy_reward_step": 0.24609375,
"rewards/final_brier_reward_step": 0.7577154636383057,
"rewards/format_reward_step": 1.0,
"step": 168
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8786559756845236,
"aux_distill/mean_u": 0.2959150388126449,
"aux_distill/n_active_tok": 151.625,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5684840425531914,
"calib/avg_num_step_conf": 4.7578125,
"calib/ece": 0.1767372549019608,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.001719108019639935,
"calib/mean_conf": 0.007576470588235294,
"calib/mu_c": 0.00897872340425532,
"calib/mu_w": 0.007259615384615384,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.006041120147894082,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.31331683168316826,
"calib/step_q_c_n": 202.0,
"calib/step_q_gap": 0.01460620176190841,
"calib/step_q_w": 0.29871062992125985,
"calib/step_q_w_n": 1016.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 568.0,
"completions/max_terminated_length": 568.0,
"completions/mean_length": 186.21484375,
"completions/mean_terminated_length": 186.9451141357422,
"completions/min_length": 0.0,
"completions/min_terminated_length": 56.0,
"epoch": 0.18026666666666666,
"grad_norm": 0.009285873733460903,
"learning_rate": 8.611111111111112e-07,
"loss": 0.0847,
"num_tokens": 34819533.0,
"reward": 0.9996484518051147,
"reward_std": 0.00795112457126379,
"rewards/accuracy_reward_step": 0.1875,
"rewards/final_brier_reward_step": 0.815703272819519,
"rewards/format_reward_step": 0.99609375,
"step": 169
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8939372319728136,
"aux_distill/mean_u": 0.3145104684178751,
"aux_distill/n_active_tok": 150.5,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.49501424501424496,
"calib/avg_num_step_conf": 4.7109375,
"calib/ece": 0.14479607843137257,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00023183760683760483,
"calib/mean_conf": 0.008145098039215686,
"calib/mu_c": 0.00794871794871795,
"calib/mu_w": 0.008180555555555555,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005175974426607817,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.28104651162790695,
"calib/step_q_c_n": 172.0,
"calib/step_q_gap": -0.02617108991948186,
"calib/step_q_w": 0.3072176015473888,
"calib/step_q_w_n": 1034.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 509.0,
"completions/max_terminated_length": 509.0,
"completions/mean_length": 182.0546875,
"completions/mean_terminated_length": 182.7686309814453,
"completions/min_length": 0.0,
"completions/min_terminated_length": 49.0,
"epoch": 0.18133333333333335,
"grad_norm": 0.007290273439139128,
"learning_rate": 8.333333333333333e-07,
"loss": 0.1107,
"num_tokens": 34994099.0,
"reward": 0.9972583055496216,
"reward_std": 0.01242972444742918,
"rewards/accuracy_reward_step": 0.15234375,
"rewards/final_brier_reward_step": 0.8460791110992432,
"rewards/format_reward_step": 0.99609375,
"step": 170
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8823307789862156,
"aux_distill/mean_u": 0.3094804324683254,
"aux_distill/n_active_tok": 151.875,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.47939958592132503,
"calib/avg_num_step_conf": 4.78125,
"calib/ece": 0.1715625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0006293995859213237,
"calib/mean_conf": 0.008125,
"calib/mu_c": 0.007608695652173915,
"calib/mu_w": 0.008238095238095239,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005962120008855911,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2806111111111111,
"calib/step_q_c_n": 180.0,
"calib/step_q_gap": -0.018162835249042164,
"calib/step_q_w": 0.29877394636015325,
"calib/step_q_w_n": 1044.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 572.0,
"completions/max_terminated_length": 572.0,
"completions/mean_length": 190.0859375,
"completions/mean_terminated_length": 190.83139038085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 61.0,
"epoch": 0.1824,
"grad_norm": 0.008502909913659096,
"learning_rate": 8.055555555555557e-07,
"loss": 0.0798,
"num_tokens": 35173465.0,
"reward": 1.0013163089752197,
"reward_std": 0.0020688537042587996,
"rewards/accuracy_reward_step": 0.1796875,
"rewards/final_brier_reward_step": 0.8229453563690186,
"rewards/format_reward_step": 1.0,
"step": 171
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8792869746685028,
"aux_distill/mean_u": 0.25173040070577396,
"aux_distill/n_active_tok": 145.5,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5367481980385206,
"calib/avg_num_step_conf": 4.546875,
"calib/ece": 0.143828125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0008436724565756837,
"calib/mean_conf": 0.008515624999999999,
"calib/mu_c": 0.009230769230769232,
"calib/mu_w": 0.008387096774193548,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005316519619015339,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.33072847682119205,
"calib/step_q_c_n": 151.0,
"calib/step_q_gap": -0.005166192477919496,
"calib/step_q_w": 0.33589466929911155,
"calib/step_q_w_n": 1013.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 525.0,
"completions/max_terminated_length": 525.0,
"completions/mean_length": 179.9140625,
"completions/mean_terminated_length": 180.61961364746094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 61.0,
"epoch": 0.18346666666666667,
"grad_norm": 0.008968736976385117,
"learning_rate": 7.777777777777779e-07,
"loss": 0.0966,
"num_tokens": 35346683.0,
"reward": 1.0013558864593506,
"reward_std": 0.0021096221171319485,
"rewards/accuracy_reward_step": 0.15234375,
"rewards/final_brier_reward_step": 0.8503679633140564,
"rewards/format_reward_step": 1.0,
"step": 172
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8806129209697247,
"aux_distill/mean_u": 0.27734205629014846,
"aux_distill/n_active_tok": 143.875,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5025084299695698,
"calib/avg_num_step_conf": 4.51171875,
"calib/ece": 0.2380859375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0001159634838391304,
"calib/mean_conf": 0.0080078125,
"calib/mu_c": 0.008095238095238095,
"calib/mu_w": 0.007979274611398964,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005688524761732496,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.32799242424242425,
"calib/step_q_c_n": 264.0,
"calib/step_q_gap": 0.045119248035914716,
"calib/step_q_w": 0.28287317620650954,
"calib/step_q_w_n": 891.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 571.0,
"completions/max_terminated_length": 571.0,
"completions/mean_length": 183.71875,
"completions/mean_terminated_length": 184.43922424316406,
"completions/min_length": 0.0,
"completions/min_terminated_length": 54.0,
"epoch": 0.18453333333333333,
"grad_norm": 0.009307953529059887,
"learning_rate": 7.5e-07,
"loss": 0.067,
"num_tokens": 35520683.0,
"reward": 1.001943826675415,
"reward_std": 0.0026724967174232006,
"rewards/accuracy_reward_step": 0.24609375,
"rewards/final_brier_reward_step": 0.7577941417694092,
"rewards/format_reward_step": 1.0,
"step": 173
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8576763272285461,
"aux_distill/mean_u": 0.3163159836046228,
"aux_distill/n_active_tok": 157.5,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.46376619941814334,
"calib/avg_num_step_conf": 4.94921875,
"calib/ece": 0.2145703125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0009230362338005817,
"calib/mean_conf": 0.0080859375,
"calib/mu_c": 0.00736842105263158,
"calib/mu_w": 0.008291457286432161,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005576915791554841,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2866023166023166,
"calib/step_q_c_n": 259.0,
"calib/step_q_gap": 0.0005989435864436343,
"calib/step_q_w": 0.286003373015873,
"calib/step_q_w_n": 1008.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 560.0,
"completions/max_terminated_length": 560.0,
"completions/mean_length": 201.10546875,
"completions/mean_terminated_length": 201.89413452148438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 64.0,
"epoch": 0.1856,
"grad_norm": 0.008429630659520626,
"learning_rate": 7.222222222222222e-07,
"loss": 0.0903,
"num_tokens": 35700206.0,
"reward": 1.0015923976898193,
"reward_std": 0.0024176673032343388,
"rewards/accuracy_reward_step": 0.22265625,
"rewards/final_brier_reward_step": 0.7805285453796387,
"rewards/format_reward_step": 1.0,
"step": 174
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8765918333083391,
"aux_distill/mean_u": 0.2949703924609209,
"aux_distill/n_active_tok": 153.875,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5479222231781813,
"calib/avg_num_step_conf": 4.8984375,
"calib/ece": 0.22276171875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.000997418910780352,
"calib/mean_conf": 0.00770703125,
"calib/mu_c": 0.008474576271186442,
"calib/mu_w": 0.00747715736040609,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005341109487880157,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.356,
"calib/step_q_c_n": 255.0,
"calib/step_q_gap": 0.0870982982982983,
"calib/step_q_w": 0.2689017017017017,
"calib/step_q_w_n": 999.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 723.0,
"completions/max_terminated_length": 723.0,
"completions/mean_length": 192.16015625,
"completions/mean_terminated_length": 192.9137420654297,
"completions/min_length": 0.0,
"completions/min_terminated_length": 47.0,
"epoch": 0.18666666666666668,
"grad_norm": 0.009075365960597992,
"learning_rate": 6.944444444444446e-07,
"loss": 0.0763,
"num_tokens": 35879031.0,
"reward": 1.0019090175628662,
"reward_std": 0.0027942899614572525,
"rewards/accuracy_reward_step": 0.23046875,
"rewards/final_brier_reward_step": 0.7733495831489563,
"rewards/format_reward_step": 1.0,
"step": 175
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9150636978447437,
"aux_distill/mean_u": 0.35805016530399253,
"aux_distill/n_active_tok": 153.25,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.47607655502392343,
"calib/avg_num_step_conf": 4.80078125,
"calib/ece": 0.17532421875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00048651124910923174,
"calib/mean_conf": 0.00826953125,
"calib/mu_c": 0.007872340425531916,
"calib/mu_w": 0.008358851674641147,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0055840181908078925,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.24904255319148935,
"calib/step_q_c_n": 188.0,
"calib/step_q_gap": -0.0616162364338709,
"calib/step_q_w": 0.31065878962536025,
"calib/step_q_w_n": 1041.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 471.0,
"completions/max_terminated_length": 471.0,
"completions/mean_length": 188.4453125,
"completions/mean_terminated_length": 189.184326171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 48.0,
"epoch": 0.18773333333333334,
"grad_norm": 0.008459899574518204,
"learning_rate": 6.666666666666667e-07,
"loss": 0.094,
"num_tokens": 36055145.0,
"reward": 1.0013954639434814,
"reward_std": 0.002379113342612982,
"rewards/accuracy_reward_step": 0.18359375,
"rewards/final_brier_reward_step": 0.8191972970962524,
"rewards/format_reward_step": 1.0,
"step": 176
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8599807396531105,
"aux_distill/mean_u": 0.2972779997420456,
"aux_distill/n_active_tok": 154.75,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.48812477844735913,
"calib/avg_num_step_conf": 4.84375,
"calib/ece": 0.1441796875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 4.844617747843706e-05,
"calib/mean_conf": 0.0081640625,
"calib/mu_c": 0.008205128205128207,
"calib/mu_w": 0.00815668202764977,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005603120424914474,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3767272727272728,
"calib/step_q_c_n": 165.0,
"calib/step_q_gap": 0.09225285412262157,
"calib/step_q_w": 0.2844744186046512,
"calib/step_q_w_n": 1075.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 554.0,
"completions/max_terminated_length": 554.0,
"completions/mean_length": 193.140625,
"completions/mean_terminated_length": 193.89805603027344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 49.0,
"epoch": 0.1888,
"grad_norm": 0.007624008227139711,
"learning_rate": 6.388888888888889e-07,
"loss": 0.1008,
"num_tokens": 36232229.0,
"reward": 1.0012009143829346,
"reward_std": 0.0019222039263695478,
"rewards/accuracy_reward_step": 0.15234375,
"rewards/final_brier_reward_step": 0.8500581979751587,
"rewards/format_reward_step": 1.0,
"step": 177
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8995106462389231,
"aux_distill/mean_u": 0.31550128866836435,
"aux_distill/n_active_tok": 151.375,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5246366536689118,
"calib/avg_num_step_conf": 4.8125,
"calib/ece": 0.1437890625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0004950963015479173,
"calib/mean_conf": 0.008554687500000002,
"calib/mu_c": 0.008974358974358977,
"calib/mu_w": 0.00847926267281106,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005140690301636906,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2966787096774194,
"calib/step_q_c_n": 155.0,
"calib/step_q_gap": 0.010712135861263405,
"calib/step_q_w": 0.285966573816156,
"calib/step_q_w_n": 1077.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 653.0,
"completions/max_terminated_length": 653.0,
"completions/mean_length": 189.65234375,
"completions/mean_terminated_length": 190.39608764648438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 59.0,
"epoch": 0.18986666666666666,
"grad_norm": 0.011668938212096691,
"learning_rate": 6.111111111111112e-07,
"loss": 0.0498,
"num_tokens": 36410660.0,
"reward": 1.0013173818588257,
"reward_std": 0.002397837582975626,
"rewards/accuracy_reward_step": 0.15234375,
"rewards/final_brier_reward_step": 0.8502910137176514,
"rewards/format_reward_step": 1.0,
"step": 178
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8968758918344975,
"aux_distill/mean_u": 0.3325099115013923,
"aux_distill/n_active_tok": 158.375,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.49503657262277956,
"calib/avg_num_step_conf": 5.0234375,
"calib/ece": 0.21822265625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0001401950539881569,
"calib/mean_conf": 0.00833984375,
"calib/mu_c": 0.008448275862068965,
"calib/mu_w": 0.008308080808080808,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.006065860406866115,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2133955223880597,
"calib/step_q_c_n": 268.0,
"calib/step_q_gap": -0.08181862299504439,
"calib/step_q_w": 0.2952141453831041,
"calib/step_q_w_n": 1018.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 602.0,
"completions/max_terminated_length": 602.0,
"completions/mean_length": 196.3046875,
"completions/mean_terminated_length": 197.07452392578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 44.0,
"epoch": 0.19093333333333334,
"grad_norm": 0.009127902798354626,
"learning_rate": 5.833333333333334e-07,
"loss": 0.0987,
"num_tokens": 36590986.0,
"reward": 1.0018608570098877,
"reward_std": 0.0030939499847590923,
"rewards/accuracy_reward_step": 0.2265625,
"rewards/final_brier_reward_step": 0.7771593332290649,
"rewards/format_reward_step": 1.0,
"step": 179
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.870330423116684,
"aux_distill/mean_u": 0.334101875639815,
"aux_distill/n_active_tok": 157.25,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.48820831968555517,
"calib/avg_num_step_conf": 4.9140625,
"calib/ece": 0.159375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.00026640462932634584,
"calib/mean_conf": 0.00859375,
"calib/mu_c": 0.008372093023255815,
"calib/mu_w": 0.00863849765258216,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0057600964347396125,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.25797927461139897,
"calib/step_q_c_n": 193.0,
"calib/step_q_gap": -0.04420983336982165,
"calib/step_q_w": 0.3021891079812206,
"calib/step_q_w_n": 1065.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 576.0,
"completions/max_terminated_length": 576.0,
"completions/mean_length": 195.99609375,
"completions/mean_terminated_length": 196.7647247314453,
"completions/min_length": 0.0,
"completions/min_terminated_length": 47.0,
"epoch": 0.192,
"grad_norm": 0.009810417890548706,
"learning_rate": 5.555555555555555e-07,
"loss": 0.086,
"num_tokens": 36768825.0,
"reward": 1.0013527870178223,
"reward_std": 0.0027455103117972612,
"rewards/accuracy_reward_step": 0.16796875,
"rewards/final_brier_reward_step": 0.834736704826355,
"rewards/format_reward_step": 1.0,
"step": 180
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8936001621186733,
"aux_distill/mean_u": 0.2772019469641405,
"aux_distill/n_active_tok": 154.125,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5177832919768404,
"calib/avg_num_step_conf": 4.828125,
"calib/ece": 0.14393359375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00036311000827130013,
"calib/mean_conf": 0.00841015625,
"calib/mu_c": 0.00871794871794872,
"calib/mu_w": 0.008354838709677419,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005229847330523707,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2793037974683544,
"calib/step_q_c_n": 158.0,
"calib/step_q_gap": 0.001608064629764372,
"calib/step_q_w": 0.27769573283859,
"calib/step_q_w_n": 1078.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 656.0,
"completions/max_terminated_length": 656.0,
"completions/mean_length": 191.73046875,
"completions/mean_terminated_length": 192.48236083984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 50.0,
"epoch": 0.19306666666666666,
"grad_norm": 0.009542922489345074,
"learning_rate": 5.277777777777779e-07,
"loss": 0.0903,
"num_tokens": 36947980.0,
"reward": 1.0012791156768799,
"reward_std": 0.0020589884370565414,
"rewards/accuracy_reward_step": 0.15234375,
"rewards/final_brier_reward_step": 0.850214421749115,
"rewards/format_reward_step": 1.0,
"step": 181
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8494912981987,
"aux_distill/mean_u": 0.2728873570876729,
"aux_distill/n_active_tok": 164.25,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5364191729323309,
"calib/avg_num_step_conf": 5.21875,
"calib/ece": 0.1016015625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0008959899749373442,
"calib/mean_conf": 0.007773437499999999,
"calib/mu_c": 0.008571428571428572,
"calib/mu_w": 0.007675438596491228,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005459674388971722,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3235074626865672,
"calib/step_q_c_n": 134.0,
"calib/step_q_gap": 0.01208658082300651,
"calib/step_q_w": 0.3114208818635607,
"calib/step_q_w_n": 1202.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1003.0,
"completions/max_terminated_length": 1003.0,
"completions/mean_length": 206.20703125,
"completions/mean_terminated_length": 207.0157012939453,
"completions/min_length": 0.0,
"completions/min_terminated_length": 49.0,
"epoch": 0.19413333333333332,
"grad_norm": 0.008077757433056831,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0808,
"num_tokens": 37130737.0,
"reward": 1.0008924007415771,
"reward_std": 0.0018270486034452915,
"rewards/accuracy_reward_step": 0.109375,
"rewards/final_brier_reward_step": 0.8924098014831543,
"rewards/format_reward_step": 1.0,
"step": 182
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8732987251132727,
"aux_distill/mean_u": 0.3000003229976155,
"aux_distill/n_active_tok": 148.5,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5549640145913438,
"calib/avg_num_step_conf": 4.66796875,
"calib/ece": 0.1834765625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0010460416050478167,
"calib/mean_conf": 0.0079296875,
"calib/mu_c": 0.008775510204081634,
"calib/mu_w": 0.007729468599033817,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005078575701153203,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.35028169014084515,
"calib/step_q_c_n": 213.0,
"calib/step_q_gap": 0.05462181234043778,
"calib/step_q_w": 0.29565987780040737,
"calib/step_q_w_n": 982.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 475.0,
"completions/max_terminated_length": 475.0,
"completions/mean_length": 190.69921875,
"completions/mean_terminated_length": 191.4470672607422,
"completions/min_length": 0.0,
"completions/min_terminated_length": 51.0,
"epoch": 0.1952,
"grad_norm": 0.009582036174833775,
"learning_rate": 4.7222222222222226e-07,
"loss": 0.0906,
"num_tokens": 37310044.0,
"reward": 1.0016353130340576,
"reward_std": 0.0023358927574008703,
"rewards/accuracy_reward_step": 0.19140625,
"rewards/final_brier_reward_step": 0.8118644952774048,
"rewards/format_reward_step": 1.0,
"step": 183
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9122953005135059,
"aux_distill/mean_u": 0.32397631622647044,
"aux_distill/n_active_tok": 163.25,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5520361990950227,
"calib/avg_num_step_conf": 5.109375,
"calib/ece": 0.194765625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0010935143288084495,
"calib/mean_conf": 0.008359375,
"calib/mu_c": 0.009230769230769233,
"calib/mu_w": 0.008137254901960784,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005344001273332091,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.27924603174603174,
"calib/step_q_c_n": 252.0,
"calib/step_q_gap": -0.0157066197691198,
"calib/step_q_w": 0.29495265151515154,
"calib/step_q_w_n": 1056.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 481.0,
"completions/max_terminated_length": 481.0,
"completions/mean_length": 206.30859375,
"completions/mean_terminated_length": 207.11766052246094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 57.0,
"epoch": 0.19626666666666667,
"grad_norm": 0.01037457212805748,
"learning_rate": 4.444444444444445e-07,
"loss": 0.0966,
"num_tokens": 37491947.0,
"reward": 1.0018258094787598,
"reward_std": 0.002241644309833646,
"rewards/accuracy_reward_step": 0.203125,
"rewards/final_brier_reward_step": 0.8005265593528748,
"rewards/format_reward_step": 1.0,
"step": 184
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8742652013897896,
"aux_distill/mean_u": 0.31161982672906746,
"aux_distill/n_active_tok": 159.875,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4833984375,
"calib/avg_num_step_conf": 5.02734375,
"calib/ece": 0.241640625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0003125000000000003,
"calib/mean_conf": 0.008359374999999999,
"calib/mu_c": 0.008125,
"calib/mu_w": 0.0084375,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005344001273332091,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3025913978494624,
"calib/step_q_c_n": 279.0,
"calib/step_q_gap": -0.0333262608806964,
"calib/step_q_w": 0.3359176587301588,
"calib/step_q_w_n": 1008.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 613.0,
"completions/max_terminated_length": 613.0,
"completions/mean_length": 200.53515625,
"completions/mean_terminated_length": 201.3215789794922,
"completions/min_length": 0.0,
"completions/min_terminated_length": 45.0,
"epoch": 0.19733333333333333,
"grad_norm": 0.008647466078400612,
"learning_rate": 4.1666666666666667e-07,
"loss": 0.0808,
"num_tokens": 37674012.0,
"reward": 1.0019819736480713,
"reward_std": 0.002680209930986166,
"rewards/accuracy_reward_step": 0.25,
"rewards/final_brier_reward_step": 0.7539640665054321,
"rewards/format_reward_step": 1.0,
"step": 185
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9008009806275368,
"aux_distill/mean_u": 0.32494249858645763,
"aux_distill/n_active_tok": 152.875,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4924249465563714,
"calib/avg_num_step_conf": 4.8046875,
"calib/ece": 0.197578125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -2.4165814666788493e-05,
"calib/mean_conf": 0.009453125,
"calib/mu_c": 0.009433962264150945,
"calib/mu_w": 0.009458128078817733,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0052004497627008185,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.39382064777327935,
"calib/step_q_c_n": 247.0,
"calib/step_q_gap": 0.08945645652200773,
"calib/step_q_w": 0.3043641912512716,
"calib/step_q_w_n": 983.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 583.0,
"completions/max_terminated_length": 583.0,
"completions/mean_length": 197.9296875,
"completions/mean_terminated_length": 198.7058868408203,
"completions/min_length": 0.0,
"completions/min_terminated_length": 68.0,
"epoch": 0.1984,
"grad_norm": 0.00890508946031332,
"learning_rate": 3.8888888888888895e-07,
"loss": 0.0811,
"num_tokens": 37853530.0,
"reward": 1.0018949508666992,
"reward_std": 0.0025664858985692263,
"rewards/accuracy_reward_step": 0.20703125,
"rewards/final_brier_reward_step": 0.7967585325241089,
"rewards/format_reward_step": 1.0,
"step": 186
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8909507598727942,
"aux_distill/mean_u": 0.2999026721666665,
"aux_distill/n_active_tok": 150.875,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5296495956873315,
"calib/avg_num_step_conf": 4.72265625,
"calib/ece": 0.1974609375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.000779812250209129,
"calib/mean_conf": 0.0095703125,
"calib/mu_c": 0.010188679245283019,
"calib/mu_w": 0.00940886699507389,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.005097768497327409,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.3390128755364807,
"calib/step_q_c_n": 233.0,
"calib/step_q_gap": 0.033364309962710226,
"calib/step_q_w": 0.30564856557377046,
"calib/step_q_w_n": 976.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 702.0,
"completions/max_terminated_length": 702.0,
"completions/mean_length": 188.59375,
"completions/mean_terminated_length": 189.33334350585938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 59.0,
"epoch": 0.19946666666666665,
"grad_norm": 0.009559610858559608,
"learning_rate": 3.611111111111111e-07,
"loss": 0.0774,
"num_tokens": 38027162.0,
"reward": 0.9981445074081421,
"reward_std": 0.013470064848661423,
"rewards/accuracy_reward_step": 0.20703125,
"rewards/final_brier_reward_step": 0.7931640148162842,
"rewards/format_reward_step": 0.99609375,
"step": 187
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.930905407294631,
"aux_distill/mean_u": 0.3330317749775051,
"aux_distill/n_active_tok": 147.625,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5486639676113361,
"calib/avg_num_step_conf": 4.62109375,
"calib/ece": 0.24642745098039215,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0010149797570850206,
"calib/mean_conf": 0.00847450980392157,
"calib/mu_c": 0.009230769230769232,
"calib/mu_w": 0.008215789473684211,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.004622020742067258,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2894666666666667,
"calib/step_q_c_n": 270.0,
"calib/step_q_gap": -0.016119751734209542,
"calib/step_q_w": 0.30558641840087625,
"calib/step_q_w_n": 913.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 479.0,
"completions/max_terminated_length": 479.0,
"completions/mean_length": 188.40625,
"completions/mean_terminated_length": 189.14511108398438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 73.0,
"epoch": 0.20053333333333334,
"grad_norm": 0.009920141659677029,
"learning_rate": 3.3333333333333335e-07,
"loss": 0.1053,
"num_tokens": 38203274.0,
"reward": 1.000344157218933,
"reward_std": 0.008894114755094051,
"rewards/accuracy_reward_step": 0.2578125,
"rewards/final_brier_reward_step": 0.7467821836471558,
"rewards/format_reward_step": 0.99609375,
"step": 188
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8790177255868912,
"aux_distill/mean_u": 0.318059983412503,
"aux_distill/n_active_tok": 168.625,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.47647527910685805,
"calib/avg_num_step_conf": 5.28515625,
"calib/ece": 0.249671875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0005569377990430604,
"calib/mean_conf": 0.008140625,
"calib/mu_c": 0.007727272727272728,
"calib/mu_w": 0.008284210526315788,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005203265763861674,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3135640138408305,
"calib/step_q_c_n": 289.0,
"calib/step_q_gap": 0.0037642018107553743,
"calib/step_q_w": 0.3097998120300751,
"calib/step_q_w_n": 1064.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 496.0,
"completions/max_terminated_length": 496.0,
"completions/mean_length": 203.83984375,
"completions/mean_terminated_length": 204.63922119140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 46.0,
"epoch": 0.2016,
"grad_norm": 0.009484932757914066,
"learning_rate": 3.055555555555556e-07,
"loss": 0.0791,
"num_tokens": 38387033.0,
"reward": 1.0019454956054688,
"reward_std": 0.0023256679996848106,
"rewards/accuracy_reward_step": 0.2578125,
"rewards/final_brier_reward_step": 0.7460785508155823,
"rewards/format_reward_step": 1.0,
"step": 189
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.842708757147193,
"aux_distill/mean_u": 0.3038010499938975,
"aux_distill/n_active_tok": 172.25,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5113366571699904,
"calib/avg_num_step_conf": 5.390625,
"calib/ece": 0.14368627450980392,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00027421652421652683,
"calib/mean_conf": 0.009254901960784314,
"calib/mu_c": 0.00948717948717949,
"calib/mu_w": 0.009212962962962963,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.004584043669396849,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3494915254237288,
"calib/step_q_c_n": 177.0,
"calib/step_q_gap": 0.018417128083745404,
"calib/step_q_w": 0.3310743973399834,
"calib/step_q_w_n": 1203.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 575.0,
"completions/max_terminated_length": 575.0,
"completions/mean_length": 203.2265625,
"completions/mean_terminated_length": 204.02354431152344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 66.0,
"epoch": 0.20266666666666666,
"grad_norm": 0.008105236105620861,
"learning_rate": 2.7777777777777776e-07,
"loss": 0.0789,
"num_tokens": 38568475.0,
"reward": 0.9994391202926636,
"reward_std": 0.007705869153141975,
"rewards/accuracy_reward_step": 0.15625,
"rewards/final_brier_reward_step": 0.8465343713760376,
"rewards/format_reward_step": 0.99609375,
"step": 190
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8719021435827017,
"aux_distill/mean_u": 0.26051567777230705,
"aux_distill/n_active_tok": 152.0,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5524636653668912,
"calib/avg_num_step_conf": 4.7890625,
"calib/ece": 0.14328125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0011059907834101417,
"calib/mean_conf": 0.0090625,
"calib/mu_c": 0.010000000000000004,
"calib/mu_w": 0.008894009216589862,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.004227421643271464,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.36428571428571427,
"calib/step_q_c_n": 154.0,
"calib/step_q_gap": 0.0468883262260128,
"calib/step_q_w": 0.31739738805970147,
"calib/step_q_w_n": 1072.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 558.0,
"completions/max_terminated_length": 558.0,
"completions/mean_length": 182.8671875,
"completions/mean_terminated_length": 183.58432006835938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 66.0,
"epoch": 0.20373333333333332,
"grad_norm": 0.010515516623854637,
"learning_rate": 2.5000000000000004e-07,
"loss": 0.0907,
"num_tokens": 38743265.0,
"reward": 1.0014734268188477,
"reward_std": 0.0026649045757949352,
"rewards/accuracy_reward_step": 0.15234375,
"rewards/final_brier_reward_step": 0.8506031036376953,
"rewards/format_reward_step": 1.0,
"step": 191
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.872589722275734,
"aux_distill/mean_u": 0.282911263266772,
"aux_distill/n_active_tok": 159.625,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.47913813934756344,
"calib/avg_num_step_conf": 5.0859375,
"calib/ece": 0.24546875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0003801852597664114,
"calib/mean_conf": 0.0084375,
"calib/mu_c": 0.008153846153846154,
"calib/mu_w": 0.008534031413612565,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.0050678983563208916,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.29670033670033674,
"calib/step_q_c_n": 297.0,
"calib/step_q_gap": 0.0076754610784461885,
"calib/step_q_w": 0.28902487562189055,
"calib/step_q_w_n": 1005.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 585.0,
"completions/max_terminated_length": 585.0,
"completions/mean_length": 197.97265625,
"completions/mean_terminated_length": 198.74903869628906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 39.0,
"epoch": 0.2048,
"grad_norm": 0.011404029093682766,
"learning_rate": 2.2222222222222224e-07,
"loss": 0.0721,
"num_tokens": 38922730.0,
"reward": 1.0000687837600708,
"reward_std": 0.0084780128672719,
"rewards/accuracy_reward_step": 0.25390625,
"rewards/final_brier_reward_step": 0.7501375079154968,
"rewards/format_reward_step": 0.99609375,
"step": 192
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.896713113412261,
"aux_distill/mean_u": 0.3166824893453891,
"aux_distill/n_active_tok": 165.75,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.48357371794871795,
"calib/avg_num_step_conf": 5.2734375,
"calib/ece": 0.178671875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -9.61538461538465e-05,
"calib/mean_conf": 0.008828125,
"calib/mu_c": 0.008749999999999999,
"calib/mu_w": 0.008846153846153846,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0046972554736116915,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.29605691056910577,
"calib/step_q_c_n": 246.0,
"calib/step_q_gap": 0.010277019264757936,
"calib/step_q_w": 0.28577989130434783,
"calib/step_q_w_n": 1104.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 585.0,
"completions/max_terminated_length": 585.0,
"completions/mean_length": 204.1875,
"completions/mean_terminated_length": 204.98825073242188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 71.0,
"epoch": 0.20586666666666667,
"grad_norm": 0.00794635247439146,
"learning_rate": 1.9444444444444447e-07,
"loss": 0.1028,
"num_tokens": 39104522.0,
"reward": 1.001590609550476,
"reward_std": 0.0025997646152973175,
"rewards/accuracy_reward_step": 0.1875,
"rewards/final_brier_reward_step": 0.8156812191009521,
"rewards/format_reward_step": 1.0,
"step": 193
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9085062127560377,
"aux_distill/mean_u": 0.2996982017706232,
"aux_distill/n_active_tok": 144.375,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5206418184857095,
"calib/avg_num_step_conf": 4.51171875,
"calib/ece": 0.2343764705882353,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0005717867290656875,
"calib/mean_conf": 0.00876078431372549,
"calib/mu_c": 0.009193548387096776,
"calib/mu_w": 0.008621761658031088,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.0054680561994678014,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.2779622641509434,
"calib/step_q_c_n": 265.0,
"calib/step_q_gap": -0.011389421242315068,
"calib/step_q_w": 0.28935168539325845,
"calib/step_q_w_n": 890.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2557.0,
"completions/max_terminated_length": 2557.0,
"completions/mean_length": 189.11328125,
"completions/mean_terminated_length": 189.11328125,
"completions/min_length": 45.0,
"completions/min_terminated_length": 45.0,
"epoch": 0.20693333333333333,
"grad_norm": 0.010015045292675495,
"learning_rate": 1.6666666666666668e-07,
"loss": 0.1306,
"num_tokens": 39282687.0,
"reward": 0.9982671737670898,
"reward_std": 0.014264973811805248,
"rewards/accuracy_reward_step": 0.2421875,
"rewards/final_brier_reward_step": 0.7582530975341797,
"rewards/format_reward_step": 0.99609375,
"step": 194
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8857430908828974,
"aux_distill/mean_u": 0.3386770414670152,
"aux_distill/n_active_tok": 155.375,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4878806089743589,
"calib/avg_num_step_conf": 4.87890625,
"calib/ece": 0.17875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 1.734723475976807e-18,
"calib/mean_conf": 0.008749999999999999,
"calib/mu_c": 0.00875,
"calib/mu_w": 0.008749999999999999,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005077524002897475,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.34873777777777776,
"calib/step_q_c_n": 225.0,
"calib/step_q_gap": 0.04072019965277779,
"calib/step_q_w": 0.30801757812499997,
"calib/step_q_w_n": 1024.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 464.0,
"completions/max_terminated_length": 464.0,
"completions/mean_length": 191.8359375,
"completions/mean_terminated_length": 192.58824157714844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 54.0,
"epoch": 0.208,
"grad_norm": 0.012000245973467827,
"learning_rate": 1.3888888888888888e-07,
"loss": 0.0915,
"num_tokens": 39461589.0,
"reward": 1.0015894174575806,
"reward_std": 0.0027912305667996407,
"rewards/accuracy_reward_step": 0.1875,
"rewards/final_brier_reward_step": 0.8156788945198059,
"rewards/format_reward_step": 1.0,
"step": 195
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9231633394956589,
"aux_distill/mean_u": 0.3158360285345036,
"aux_distill/n_active_tok": 145.5,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5147087378640778,
"calib/avg_num_step_conf": 4.56640625,
"calib/ece": 0.1861328125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0002737864077669936,
"calib/mean_conf": 0.0091796875,
"calib/mu_c": 0.009400000000000002,
"calib/mu_w": 0.009126213592233009,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.004297407637441874,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.364247311827957,
"calib/step_q_c_n": 186.0,
"calib/step_q_gap": 0.05065931589713302,
"calib/step_q_w": 0.313587995930824,
"calib/step_q_w_n": 983.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 506.0,
"completions/max_terminated_length": 506.0,
"completions/mean_length": 180.30859375,
"completions/mean_terminated_length": 181.0157012939453,
"completions/min_length": 0.0,
"completions/min_terminated_length": 70.0,
"epoch": 0.20906666666666668,
"grad_norm": 0.009502574801445007,
"learning_rate": 1.1111111111111112e-07,
"loss": 0.1179,
"num_tokens": 39634100.0,
"reward": 0.9978784918785095,
"reward_std": 0.01335802674293518,
"rewards/accuracy_reward_step": 0.1953125,
"rewards/final_brier_reward_step": 0.804350733757019,
"rewards/format_reward_step": 0.99609375,
"step": 196
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.9447934832423925,
"aux_distill/mean_u": 0.3606861734221428,
"aux_distill/n_active_tok": 166.125,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5377046325322188,
"calib/avg_num_step_conf": 5.23828125,
"calib/ece": 0.2174609375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.000715778474399166,
"calib/mean_conf": 0.0091015625,
"calib/mu_c": 0.009655172413793104,
"calib/mu_w": 0.008939393939393938,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.00503633647194007,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.28548576779026225,
"calib/step_q_c_n": 267.0,
"calib/step_q_gap": -0.04320138304772658,
"calib/step_q_w": 0.32868715083798883,
"calib/step_q_w_n": 1074.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 534.0,
"completions/max_terminated_length": 534.0,
"completions/mean_length": 193.2421875,
"completions/mean_terminated_length": 194.00001525878906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 48.0,
"epoch": 0.21013333333333334,
"grad_norm": 0.012657982297241688,
"learning_rate": 8.333333333333334e-08,
"loss": 0.1053,
"num_tokens": 39812434.0,
"reward": 1.0021333694458008,
"reward_std": 0.002967329230159521,
"rewards/accuracy_reward_step": 0.2265625,
"rewards/final_brier_reward_step": 0.7777043581008911,
"rewards/format_reward_step": 1.0,
"step": 197
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8860240560024977,
"aux_distill/mean_u": 0.3080030762756176,
"aux_distill/n_active_tok": 153.75,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5506582411795682,
"calib/avg_num_step_conf": 4.84765625,
"calib/ece": 0.166953125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0011521853607161688,
"calib/mean_conf": 0.008828125,
"calib/mu_c": 0.00977777777777778,
"calib/mu_w": 0.00862559241706161,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.004940441173050742,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3725735,
"calib/step_q_c_n": 200.0,
"calib/step_q_gap": 0.05898080067243039,
"calib/step_q_w": 0.3135926993275696,
"calib/step_q_w_n": 1041.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 477.0,
"completions/max_terminated_length": 477.0,
"completions/mean_length": 184.109375,
"completions/mean_terminated_length": 184.83139038085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 64.0,
"epoch": 0.2112,
"grad_norm": 0.01102263294160366,
"learning_rate": 5.555555555555556e-08,
"loss": 0.0579,
"num_tokens": 39988758.0,
"reward": 1.0016674995422363,
"reward_std": 0.002053692238405347,
"rewards/accuracy_reward_step": 0.17578125,
"rewards/final_brier_reward_step": 0.8275538682937622,
"rewards/format_reward_step": 1.0,
"step": 198
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.8891988657414913,
"aux_distill/mean_u": 0.32847913300924675,
"aux_distill/n_active_tok": 159.875,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5358375234151632,
"calib/avg_num_step_conf": 5.01171875,
"calib/ece": 0.1828515625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.000777876367938482,
"calib/mean_conf": 0.0085546875,
"calib/mu_c": 0.009183673469387756,
"calib/mu_w": 0.008405797101449274,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.00564760540205703,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.263408071748879,
"calib/step_q_c_n": 223.0,
"calib/step_q_gap": -0.06313928674168712,
"calib/step_q_w": 0.3265473584905661,
"calib/step_q_w_n": 1060.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 487.0,
"completions/max_terminated_length": 487.0,
"completions/mean_length": 189.8828125,
"completions/mean_terminated_length": 190.62745666503906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 45.0,
"epoch": 0.21226666666666666,
"grad_norm": 0.010852034203708172,
"learning_rate": 2.777777777777778e-08,
"loss": 0.1011,
"num_tokens": 40165376.0,
"reward": 1.001705288887024,
"reward_std": 0.002582351677119732,
"rewards/accuracy_reward_step": 0.19140625,
"rewards/final_brier_reward_step": 0.8120043277740479,
"rewards/format_reward_step": 1.0,
"step": 199
},
{
"aux_distill/lambda": 0.10000000000000005,
"aux_distill/loss": 0.866759080439806,
"aux_distill/mean_u": 0.31014469211236895,
"aux_distill/n_active_tok": 161.0,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5681837735524391,
"calib/avg_num_step_conf": 5.05859375,
"calib/ece": 0.2224609375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0014875677535920182,
"calib/mean_conf": 0.0080078125,
"calib/mu_c": 0.009152542372881357,
"calib/mu_w": 0.007664974619289339,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.005032699470944371,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3124904214559387,
"calib/step_q_c_n": 261.0,
"calib/step_q_gap": 0.0130416787093236,
"calib/step_q_w": 0.2994487427466151,
"calib/step_q_w_n": 1034.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 567.0,
"completions/max_terminated_length": 567.0,
"completions/mean_length": 201.74609375,
"completions/mean_terminated_length": 202.53726196289062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 53.0,
"epoch": 0.21333333333333335,
"grad_norm": 0.008249734528362751,
"learning_rate": 0.0,
"loss": 0.1024,
"num_tokens": 40348879.0,
"reward": 1.0020647048950195,
"reward_std": 0.0021201735362410545,
"rewards/accuracy_reward_step": 0.23046875,
"rewards/final_brier_reward_step": 0.7736605405807495,
"rewards/format_reward_step": 1.0,
"step": 200
},
{
"epoch": 0.21333333333333335,
"step": 200,
"total_flos": 0.0,
"train_loss": 0.10086522626224906,
"train_runtime": 13306.6332,
"train_samples_per_second": 3.848,
"train_steps_per_second": 0.015
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 40348879,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}