{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_brier/lambda": 0.1, "aux_brier/loss": 5.791089203391117e-07, "aux_brier/mean_group_std": 0.06289231620091193, "aux_brier/mean_r": 0.4665906001184907, "aux_brier/n_active_tok": 24.615384615384617, "aux_brier/n_groups": 5.3076923076923075, "aux_brier/n_step_records": 6.153846153846154, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.32379189133644104, "learning_rate": 2.5000000000000004e-07, "loss": 0.0318, "num_tokens": 264685.0, "reward": 0.04124843701720238, "reward_std": 0.0838509351015091, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step": 0.04296875, "step": 1 }, { "aux_brier/lambda": 0.10000000000000003, "aux_brier/loss": 2.461345396504181e-08, "aux_brier/mean_group_std": 0.046398653263787254, "aux_brier/mean_r": 0.430243897442093, "aux_brier/n_active_tok": 28.42105263157895, "aux_brier/n_groups": 5.894736842105263, "aux_brier/n_step_records": 7.105263157894737, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.006298445165157318, "learning_rate": 5.000000000000001e-07, "loss": 0.0643, "num_tokens": 533467.0, "reward": 0.08358447253704071, "reward_std": 0.15892045199871063, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step": 0.08984375, "step": 2 }, { "aux_brier/lambda": 0.1, "aux_brier/loss": 7.577576924907484e-08, "aux_brier/mean_group_std": 0.04357231654427651, "aux_brier/mean_r": 0.4394718939076222, "aux_brier/n_active_tok": 24.923076923076923, "aux_brier/n_groups": 4.846153846153846, "aux_brier/n_step_records": 6.230769230769231, "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.5918367346938775, "calib/avg_num_step_conf": 0.31640625, "calib/ece": 0.46071428571428596, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.004285714285714226, "calib/mean_conf": 0.960714285714286, "calib/mu_c": 0.9628571428571427, "calib/mu_w": 0.9585714285714285, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.078125, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.46071428571428596, "calib/std_conf": 0.02548508967844466, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 3041.0, "completions/max_terminated_length": 3041.0, "completions/mean_length": 677.55078125, "completions/mean_terminated_length": 754.1434326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.005849530920386314, "learning_rate": 7.5e-07, "loss": 0.0361, "num_tokens": 812176.0, "reward": 0.06003017723560333, "reward_std": 0.13926836848258972, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.029183203354477882, "rewards/format_reward_step": 0.05078125, "step": 3 }, { "aux_brier/lambda": 0.09999999999999999, "aux_brier/loss": 1.5729003877140713e-08, "aux_brier/mean_group_std": 0.03736511171309455, "aux_brier/mean_r": 0.515641851666916, "aux_brier/n_active_tok": 21.333333333333332, "aux_brier/n_groups": 4.833333333333333, "aux_brier/n_step_records": 5.333333333333333, "calib/answer_extract_rate": 0.0390625, "calib/auroc": 0.625, "calib/avg_num_step_conf": 0.125, "calib/ece": 0.77, "calib/final_conf_rate": 0.01953125, "calib/format_rate": 0.015625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.012499999999999956, "calib/mean_conf": 0.97, "calib/mu_c": 0.98, "calib/mu_w": 0.9675, "calib/nonempty_final_conf_rate": 0.01953125, "calib/nonempty_reasoning_rate": 0.05078125, "calib/nonempty_step_conf_rate": 0.02734375, "calib/pce": 0.77, "calib/std_conf": 0.021908902300206666, "calib/step_conf_rate": 0.02734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 3020.0, "completions/max_terminated_length": 3020.0, "completions/mean_length": 732.6953125, "completions/mean_terminated_length": 784.8117065429688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.0019624708220362663, "learning_rate": 1.0000000000000002e-06, "loss": 0.0206, "num_tokens": 1105914.0, "reward": 0.012828808277845383, "reward_std": 0.032432470470666885, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.004440234508365393, "rewards/format_reward_step": 0.015625, "step": 4 }, { "aux_brier/lambda": 0.09999999999999999, "aux_brier/loss": 0.0, "aux_brier/mean_group_std": 0.0, "aux_brier/mean_r": 0.4117961536400311, "aux_brier/n_active_tok": 29.142857142857142, "aux_brier/n_groups": 7.285714285714286, "aux_brier/n_step_records": 7.285714285714286, "calib/answer_extract_rate": 0.046875, "calib/auroc": 0.9166666666666666, "calib/avg_num_step_conf": 0.203125, "calib/ece": 0.8299999999999998, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.03166666666666673, "calib/mean_conf": 0.9728571428571428, "calib/mu_c": 1.0, "calib/mu_w": 0.9683333333333333, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.05078125, "calib/nonempty_step_conf_rate": 0.03125, "calib/pce": 0.8299999999999998, "calib/std_conf": 0.02657296462534038, "calib/step_conf_rate": 0.03125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 3050.0, "completions/max_terminated_length": 3050.0, "completions/mean_length": 696.09765625, "completions/mean_terminated_length": 778.1702880859375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.005333333333333333, "grad_norm": 0.002589157084003091, "learning_rate": 1.25e-06, "loss": 0.0127, "num_tokens": 1390803.0, "reward": 0.018915917724370956, "reward_std": 0.045617617666721344, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0053511718288064, "rewards/format_reward_step": 0.02734375, "step": 5 }, { "aux_brier/lambda": 0.10000000000000003, "aux_brier/loss": 1.4815696324098504e-08, "aux_brier/mean_group_std": 0.05746869219517105, "aux_brier/mean_r": 0.3723841895175155, "aux_brier/n_active_tok": 36.888888888888886, "aux_brier/n_groups": 6.444444444444445, "aux_brier/n_step_records": 9.222222222222221, "calib/answer_extract_rate": 0.10546875, "calib/auroc": 0.2543859649122807, "calib/avg_num_step_conf": 0.6484375, "calib/ece": 0.7795454545454545, "calib/final_conf_rate": 0.0859375, "calib/format_rate": 0.08203125, "calib/frac_conf_gt_0.9": 0.9090909090909091, "calib/gap": 0.03175438596491231, "calib/mean_conf": 0.915909090909091, "calib/mu_c": 0.9433333333333334, "calib/mu_w": 0.911578947368421, "calib/nonempty_final_conf_rate": 0.0859375, "calib/nonempty_reasoning_rate": 0.1328125, "calib/nonempty_step_conf_rate": 0.11328125, "calib/pce": 0.7795454545454545, "calib/std_conf": 0.2017797875026835, "calib/step_conf_rate": 0.11328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 3067.0, "completions/max_terminated_length": 3067.0, "completions/mean_length": 602.48828125, "completions/mean_terminated_length": 670.5956420898438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.5637331008911133, "learning_rate": 1.5e-06, "loss": 0.0227, "num_tokens": 1650992.0, "reward": 0.0578799769282341, "reward_std": 0.1234724223613739, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.020582422614097595, "rewards/format_reward_step": 0.08203125, "step": 6 }, { "aux_brier/lambda": 0.10000000000000002, "aux_brier/loss": -3.312424807646989e-08, "aux_brier/mean_group_std": 0.0524642909145865, "aux_brier/mean_r": 0.4580663278051075, "aux_brier/n_active_tok": 30.11764705882353, "aux_brier/n_groups": 5.882352941176471, "aux_brier/n_step_records": 7.529411764705882, "calib/answer_extract_rate": 0.09375, "calib/auroc": 0.6214285714285714, "calib/avg_num_step_conf": 0.51171875, "calib/ece": 0.6669999999999999, "calib/final_conf_rate": 0.07421875, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 0.8421052631578947, "calib/gap": 0.050071428571428656, "calib/mean_conf": 0.9091052631578947, "calib/mu_c": 0.9460000000000001, "calib/mu_w": 0.8959285714285714, "calib/nonempty_final_conf_rate": 0.07421875, "calib/nonempty_reasoning_rate": 0.12109375, "calib/nonempty_step_conf_rate": 0.09765625, "calib/pce": 0.6564736842105263, "calib/std_conf": 0.21569883847547158, "calib/step_conf_rate": 0.09765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15234375, "completions/max_length": 2992.0, "completions/max_terminated_length": 2992.0, "completions/mean_length": 647.859375, "completions/mean_terminated_length": 764.294921875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.06025625765323639, "learning_rate": 1.75e-06, "loss": 0.0251, "num_tokens": 1924268.0, "reward": 0.05460413172841072, "reward_std": 0.10174418985843658, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.015291526913642883, "rewards/format_reward_step": 0.0625, "step": 7 }, { "aux_brier/lambda": 0.09999999999999999, "aux_brier/loss": -1.477801154175557e-09, "aux_brier/mean_group_std": 0.013452690359781548, "aux_brier/mean_r": 0.5437225358088926, "aux_brier/n_active_tok": 18.181818181818183, "aux_brier/n_groups": 4.090909090909091, "aux_brier/n_step_records": 4.545454545454546, "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.31818181818181823, "calib/avg_num_step_conf": 0.19921875, "calib/ece": 0.7907692307692309, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.8461538461538461, "calib/gap": -0.09954545454545449, "calib/mean_conf": 0.8892307692307693, "calib/mu_c": 0.8049999999999999, "calib/mu_w": 0.9045454545454544, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.078125, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.7630769230769231, "calib/std_conf": 0.207678062189517, "calib/step_conf_rate": 0.05078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3028.0, "completions/max_terminated_length": 3028.0, "completions/mean_length": 659.81640625, "completions/mean_terminated_length": 715.7330322265625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.003042832249775529, "learning_rate": 2.0000000000000003e-06, "loss": 0.0296, "num_tokens": 2199693.0, "reward": 0.02483486384153366, "reward_std": 0.06142483651638031, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.005589453037828207, "rewards/format_reward_step": 0.03125, "step": 8 }, { "aux_brier/lambda": 0.10000000000000002, "aux_brier/loss": -2.289575166495676e-08, "aux_brier/mean_group_std": 0.015908686119228563, "aux_brier/mean_r": 0.4945823469752942, "aux_brier/n_active_tok": 22.133333333333333, "aux_brier/n_groups": 5.2, "aux_brier/n_step_records": 5.533333333333333, "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.375, "calib/avg_num_step_conf": 0.34375, "calib/ece": 0.8099999999999999, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": -0.19199999999999995, "calib/mean_conf": 0.89, "calib/mu_c": 0.73, "calib/mu_w": 0.9219999999999999, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.08984375, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.7666666666666666, "calib/std_conf": 0.1451436070471816, "calib/step_conf_rate": 0.06640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2962.0, "completions/max_terminated_length": 2962.0, "completions/mean_length": 708.26171875, "completions/mean_terminated_length": 755.4791870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.003322332864627242, "learning_rate": 2.25e-06, "loss": 0.0206, "num_tokens": 2488544.0, "reward": 0.03203320503234863, "reward_std": 0.06588542461395264, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.010945312678813934, "rewards/format_reward_step": 0.04296875, "step": 9 }, { "aux_brier/lambda": 0.09999999999999999, "aux_brier/loss": 0.0, "aux_brier/mean_group_std": 0.0, "aux_brier/mean_r": 0.5014135361346449, "aux_brier/n_active_tok": 18.0, "aux_brier/n_groups": 4.5, "aux_brier/n_step_records": 4.5, "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.9166666666666666, "calib/avg_num_step_conf": 0.2734375, "calib/ece": 0.7075, "calib/final_conf_rate": 0.03125, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.023333333333333206, "calib/mean_conf": 0.9575, "calib/mu_c": 0.975, "calib/mu_w": 0.9516666666666668, "calib/nonempty_final_conf_rate": 0.03125, "calib/nonempty_reasoning_rate": 0.08984375, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.7075, "calib/std_conf": 0.013919410907075068, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 3046.0, "completions/max_terminated_length": 3046.0, "completions/mean_length": 626.03125, "completions/mean_terminated_length": 702.9122924804688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.0037440224550664425, "learning_rate": 2.5e-06, "loss": 0.0161, "num_tokens": 2755608.0, "reward": 0.025769628584384918, "reward_std": 0.06890425831079483, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.009328515268862247, "rewards/format_reward_step": 0.0234375, "step": 10 }, { "aux_brier/lambda": 0.10000000000000003, "aux_brier/loss": 1.7376004437766014e-08, "aux_brier/mean_group_std": 0.04473917750408892, "aux_brier/mean_r": 0.4066446659813901, "aux_brier/n_active_tok": 25.11111111111111, "aux_brier/n_groups": 4.944444444444445, "aux_brier/n_step_records": 6.277777777777778, "calib/answer_extract_rate": 0.1328125, "calib/auroc": 0.4761904761904762, "calib/avg_num_step_conf": 0.48046875, "calib/ece": 0.8730434782608694, "calib/final_conf_rate": 0.08984375, "calib/format_rate": 0.0703125, "calib/frac_conf_gt_0.9": 0.9130434782608695, "calib/gap": 1.1102230246251565e-16, "calib/mean_conf": 0.96, "calib/mu_c": 0.96, "calib/mu_w": 0.9599999999999999, "calib/nonempty_final_conf_rate": 0.08984375, "calib/nonempty_reasoning_rate": 0.16796875, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.8730434782608694, "calib/std_conf": 0.026702873970059274, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2923.0, "completions/max_terminated_length": 2923.0, "completions/mean_length": 669.08203125, "completions/mean_terminated_length": 728.872314453125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.22778251767158508, "learning_rate": 2.7500000000000004e-06, "loss": 0.009, "num_tokens": 3031373.0, "reward": 0.04616621136665344, "reward_std": 0.09391022473573685, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.012789842672646046, "rewards/format_reward_step": 0.0703125, "step": 11 }, { "aux_brier/lambda": 0.10000000000000002, "aux_brier/loss": 1.049562002620688e-06, "aux_brier/mean_group_std": 0.044216260561479336, "aux_brier/mean_r": 0.3861607698583299, "aux_brier/n_active_tok": 29.904761904761905, "aux_brier/n_groups": 5.0, "aux_brier/n_step_records": 7.476190476190476, "calib/answer_extract_rate": 0.16796875, "calib/auroc": 0.6111111111111112, "calib/avg_num_step_conf": 0.64453125, "calib/ece": 0.5557199999999999, "calib/final_conf_rate": 0.1171875, "calib/format_rate": 0.09765625, "calib/frac_conf_gt_0.9": 0.7666666666666667, "calib/gap": 0.1521460317460317, "calib/mean_conf": 0.8557199999999999, "calib/mu_c": 0.9622222222222222, "calib/mu_w": 0.8100761904761905, "calib/nonempty_final_conf_rate": 0.1171875, "calib/nonempty_reasoning_rate": 0.20703125, "calib/nonempty_step_conf_rate": 0.1484375, "calib/pce": 0.5557199999999999, "calib/std_conf": 0.28735765798043383, "calib/step_conf_rate": 0.1484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2916.0, "completions/max_terminated_length": 2916.0, "completions/mean_length": 617.40625, "completions/mean_terminated_length": 678.3519287109375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.27130717039108276, "learning_rate": 3e-06, "loss": 0.0425, "num_tokens": 3293605.0, "reward": 0.0996939092874527, "reward_std": 0.1851814091205597, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.047213148325681686, "rewards/format_reward_step": 0.09765625, "step": 12 }, { "aux_brier/lambda": 0.10000000000000003, "aux_brier/loss": -9.533306213680085e-08, "aux_brier/mean_group_std": 0.10324539318803636, "aux_brier/mean_r": 0.435593864980078, "aux_brier/n_active_tok": 40.15384615384615, "aux_brier/n_groups": 6.769230769230769, "aux_brier/n_step_records": 10.038461538461538, "calib/answer_extract_rate": 0.18359375, "calib/auroc": 0.3326530612244898, "calib/avg_num_step_conf": 1.05078125, "calib/ece": 0.7514642857142855, "calib/final_conf_rate": 0.1640625, "calib/format_rate": 0.13671875, "calib/frac_conf_gt_0.9": 0.8333333333333334, "calib/gap": 0.021099999999999897, "calib/mean_conf": 0.9181309523809522, "calib/mu_c": 0.9357142857142857, "calib/mu_w": 0.9146142857142858, "calib/nonempty_final_conf_rate": 0.1640625, "calib/nonempty_reasoning_rate": 0.2265625, "calib/nonempty_step_conf_rate": 0.1953125, "calib/pce": 0.7514642857142855, "calib/std_conf": 0.1771416926645033, "calib/step_conf_rate": 0.1953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2960.0, "completions/max_terminated_length": 2960.0, "completions/mean_length": 620.421875, "completions/mean_terminated_length": 661.7833862304688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.23905538022518158, "learning_rate": 3.2500000000000002e-06, "loss": 0.0071, "num_tokens": 3557025.0, "reward": 0.11006979644298553, "reward_std": 0.21814632415771484, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.041841670870780945, "rewards/format_reward_step": 0.13671875, "step": 13 }, { "aux_brier/lambda": 0.10000000000000003, "aux_brier/loss": -1.0146125100973791e-07, "aux_brier/mean_group_std": 0.08463532005936164, "aux_brier/mean_r": 0.4396115964218729, "aux_brier/n_active_tok": 29.92, "aux_brier/n_groups": 5.04, "aux_brier/n_step_records": 7.48, "calib/answer_extract_rate": 0.1953125, "calib/auroc": 0.3866666666666667, "calib/avg_num_step_conf": 0.734375, "calib/ece": 0.5905405405405406, "calib/final_conf_rate": 0.14453125, "calib/format_rate": 0.125, "calib/frac_conf_gt_0.9": 0.7027027027027027, "calib/gap": -0.030100000000000016, "calib/mean_conf": 0.8878378378378379, "calib/mu_c": 0.8675, "calib/mu_w": 0.8976000000000001, "calib/nonempty_final_conf_rate": 0.14453125, "calib/nonempty_reasoning_rate": 0.234375, "calib/nonempty_step_conf_rate": 0.17578125, "calib/pce": 0.5770270270270271, "calib/std_conf": 0.17837305479361545, "calib/step_conf_rate": 0.17578125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3057.0, "completions/max_terminated_length": 3057.0, "completions/mean_length": 616.9765625, "completions/mean_terminated_length": 669.2626953125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.014933333333333333, "grad_norm": 0.14763100445270538, "learning_rate": 3.5e-06, "loss": 0.049, "num_tokens": 3820371.0, "reward": 0.12359999865293503, "reward_std": 0.19199584424495697, "rewards/accuracy_reward_step": 0.046875, "rewards/final_brier_reward_step": 0.05690000206232071, "rewards/format_reward_step": 0.125, "step": 14 }, { "aux_brier/lambda": 0.10000000000000003, "aux_brier/loss": 5.148582835993546e-08, "aux_brier/mean_group_std": 0.12489536346329189, "aux_brier/mean_r": 0.48580572546183526, "aux_brier/n_active_tok": 45.666666666666664, "aux_brier/n_groups": 6.625, "aux_brier/n_step_records": 11.416666666666666, "calib/answer_extract_rate": 0.2578125, "calib/auroc": 0.2687074829931973, "calib/avg_num_step_conf": 1.1015625, "calib/ece": 0.848070909090909, "calib/final_conf_rate": 0.21484375, "calib/format_rate": 0.16015625, "calib/frac_conf_gt_0.9": 0.8363636363636363, "calib/gap": -0.3083789115646257, "calib/mean_conf": 0.8880709090909091, "calib/mu_c": 0.6133333333333334, "calib/mu_w": 0.9217122448979591, "calib/nonempty_final_conf_rate": 0.21484375, "calib/nonempty_reasoning_rate": 0.296875, "calib/nonempty_step_conf_rate": 0.21484375, "calib/pce": 0.8135254545454544, "calib/std_conf": 0.21320841418900832, "calib/step_conf_rate": 0.21484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2888.0, "completions/max_terminated_length": 2888.0, "completions/mean_length": 562.0390625, "completions/mean_terminated_length": 609.6694946289062, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.4689406752586365, "learning_rate": 3.7500000000000005e-06, "loss": 0.0805, "num_tokens": 4072133.0, "reward": 0.11165524274110794, "reward_std": 0.18125450611114502, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.032558489590883255, "rewards/format_reward_step": 0.16015625, "step": 15 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -9.720391010622128e-09, "aux_brier/mean_group_std": 0.08268950881241742, "aux_brier/mean_r": 0.5006170795156372, "aux_brier/n_active_tok": 47.333333333333336, "aux_brier/n_groups": 7.266666666666667, "aux_brier/n_step_records": 11.833333333333334, "calib/answer_extract_rate": 0.28515625, "calib/auroc": 0.49767080745341613, "calib/avg_num_step_conf": 1.41015625, "calib/ece": 0.6770844444444445, "calib/final_conf_rate": 0.234375, "calib/format_rate": 0.18359375, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": 0.057219047619047836, "calib/mean_conf": 0.9104177777777778, "calib/mu_c": 0.9542857142857144, "calib/mu_w": 0.8970666666666666, "calib/nonempty_final_conf_rate": 0.234375, "calib/nonempty_reasoning_rate": 0.33984375, "calib/nonempty_step_conf_rate": 0.25, "calib/pce": 0.6770844444444445, "calib/std_conf": 0.18774205703636818, "calib/step_conf_rate": 0.25, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 2924.0, "completions/max_terminated_length": 2924.0, "completions/mean_length": 636.3984375, "completions/mean_terminated_length": 670.4443969726562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.017066666666666667, "grad_norm": 0.09589699655771255, "learning_rate": 4.000000000000001e-06, "loss": 0.0878, "num_tokens": 4343899.0, "reward": 0.16413387656211853, "reward_std": 0.30323994159698486, "rewards/accuracy_reward_step": 0.0546875, "rewards/final_brier_reward_step": 0.07059801369905472, "rewards/format_reward_step": 0.18359375, "step": 16 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.940613958035136e-09, "aux_brier/mean_group_std": 0.09984985591998839, "aux_brier/mean_r": 0.5037807829020474, "aux_brier/n_active_tok": 53.25, "aux_brier/n_groups": 6.8125, "aux_brier/n_step_records": 13.3125, "calib/answer_extract_rate": 0.3515625, "calib/auroc": 0.4314685314685315, "calib/avg_num_step_conf": 1.78125, "calib/ece": 0.7495588235294116, "calib/final_conf_rate": 0.265625, "calib/format_rate": 0.2421875, "calib/frac_conf_gt_0.9": 0.8235294117647058, "calib/gap": -0.008517482517482633, "calib/mean_conf": 0.940735294117647, "calib/mu_c": 0.9338461538461539, "calib/mu_w": 0.9423636363636365, "calib/nonempty_final_conf_rate": 0.265625, "calib/nonempty_reasoning_rate": 0.42578125, "calib/nonempty_step_conf_rate": 0.33203125, "calib/pce": 0.7495588235294116, "calib/std_conf": 0.07006963293901011, "calib/step_conf_rate": 0.33203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3056.0, "completions/max_terminated_length": 3056.0, "completions/mean_length": 506.90625, "completions/mean_terminated_length": 536.2313842773438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.018133333333333335, "grad_norm": 0.39644762873649597, "learning_rate": 4.25e-06, "loss": 0.0278, "num_tokens": 4577195.0, "reward": 0.19333231449127197, "reward_std": 0.3084465265274048, "rewards/accuracy_reward_step": 0.0546875, "rewards/final_brier_reward_step": 0.07020430266857147, "rewards/format_reward_step": 0.2421875, "step": 17 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -9.114924291532743e-09, "aux_brier/mean_group_std": 0.1339071515098237, "aux_brier/mean_r": 0.4656635302259107, "aux_brier/n_active_tok": 46.42857142857143, "aux_brier/n_groups": 6.0, "aux_brier/n_step_records": 11.607142857142858, "calib/answer_extract_rate": 0.26171875, "calib/auroc": 0.5976430976430978, "calib/avg_num_step_conf": 1.30078125, "calib/ece": 0.7418, "calib/final_conf_rate": 0.25390625, "calib/format_rate": 0.21484375, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": 0.055661616161616134, "calib/mean_conf": 0.9110307692307692, "calib/mu_c": 0.9572727272727272, "calib/mu_w": 0.901611111111111, "calib/nonempty_final_conf_rate": 0.25390625, "calib/nonempty_reasoning_rate": 0.3046875, "calib/nonempty_step_conf_rate": 0.265625, "calib/pce": 0.7418, "calib/std_conf": 0.16250443489624136, "calib/step_conf_rate": 0.265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 2872.0, "completions/max_terminated_length": 2872.0, "completions/mean_length": 546.1953125, "completions/mean_terminated_length": 587.5042114257812, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0192, "grad_norm": 0.01480529922991991, "learning_rate": 4.5e-06, "loss": 0.0867, "num_tokens": 4827741.0, "reward": 0.170943945646286, "reward_std": 0.2673485279083252, "rewards/accuracy_reward_step": 0.046875, "rewards/final_brier_reward_step": 0.06658825278282166, "rewards/format_reward_step": 0.21484375, "step": 18 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.668735549886229e-09, "aux_brier/mean_group_std": 0.18498027240974022, "aux_brier/mean_r": 0.48280702387294094, "aux_brier/n_active_tok": 97.0, "aux_brier/n_groups": 7.78125, "aux_brier/n_step_records": 24.25, "calib/answer_extract_rate": 0.64453125, "calib/auroc": 0.4915229885057471, "calib/avg_num_step_conf": 3.078125, "calib/ece": 0.721006711409396, "calib/final_conf_rate": 0.58203125, "calib/format_rate": 0.48828125, "calib/frac_conf_gt_0.9": 0.7718120805369127, "calib/gap": 0.005899425287356319, "calib/mean_conf": 0.909731543624161, "calib/mu_c": 0.9144827586206896, "calib/mu_w": 0.9085833333333333, "calib/nonempty_final_conf_rate": 0.58203125, "calib/nonempty_reasoning_rate": 0.71875, "calib/nonempty_step_conf_rate": 0.59375, "calib/pce": 0.7180536912751678, "calib/std_conf": 0.15382023984576226, "calib/step_conf_rate": 0.59375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2850.0, "completions/max_terminated_length": 2850.0, "completions/mean_length": 414.8203125, "completions/mean_terminated_length": 424.7760314941406, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.020266666666666665, "grad_norm": 0.08393163979053497, "learning_rate": 4.75e-06, "loss": 0.0789, "num_tokens": 5038695.0, "reward": 0.40926384925842285, "reward_std": 0.44889628887176514, "rewards/accuracy_reward_step": 0.125, "rewards/final_brier_reward_step": 0.16049286723136902, "rewards/format_reward_step": 0.48828125, "step": 19 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.39126467185924e-09, "aux_brier/mean_group_std": 0.19250987151383014, "aux_brier/mean_r": 0.47375429637667654, "aux_brier/n_active_tok": 117.625, "aux_brier/n_groups": 8.8125, "aux_brier/n_step_records": 29.40625, "calib/answer_extract_rate": 0.72265625, "calib/auroc": 0.5795833333333333, "calib/avg_num_step_conf": 3.75, "calib/ece": 0.62632225433526, "calib/final_conf_rate": 0.67578125, "calib/format_rate": 0.59375, "calib/frac_conf_gt_0.9": 0.7283236994219653, "calib/gap": 0.02785833333333354, "calib/mean_conf": 0.900079479768786, "calib/mu_c": 0.9202083333333334, "calib/mu_w": 0.8923499999999999, "calib/nonempty_final_conf_rate": 0.67578125, "calib/nonempty_reasoning_rate": 0.80859375, "calib/nonempty_step_conf_rate": 0.73046875, "calib/pce": 0.624472543352601, "calib/std_conf": 0.16012256811207, "calib/step_conf_rate": 0.73046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2930.0, "completions/max_terminated_length": 2930.0, "completions/mean_length": 406.50390625, "completions/mean_terminated_length": 416.260009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.021333333333333333, "grad_norm": 0.23560120165348053, "learning_rate": 5e-06, "loss": 0.0914, "num_tokens": 5247632.0, "reward": 0.5486721992492676, "reward_std": 0.5214025378227234, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.24156367778778076, "rewards/format_reward_step": 0.59375, "step": 20 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.1447771191007714e-08, "aux_brier/mean_group_std": 0.21348233132151326, "aux_brier/mean_r": 0.48291993027533586, "aux_brier/n_active_tok": 128.25, "aux_brier/n_groups": 8.03125, "aux_brier/n_step_records": 32.0625, "calib/answer_extract_rate": 0.7890625, "calib/auroc": 0.5100358422939069, "calib/avg_num_step_conf": 4.0703125, "calib/ece": 0.6886000000000001, "calib/final_conf_rate": 0.78125, "calib/format_rate": 0.703125, "calib/frac_conf_gt_0.9": 0.765, "calib/gap": 0.024602150537634482, "calib/mean_conf": 0.9136000000000002, "calib/mu_c": 0.9326666666666668, "calib/mu_w": 0.9080645161290323, "calib/nonempty_final_conf_rate": 0.78125, "calib/nonempty_reasoning_rate": 0.8828125, "calib/nonempty_step_conf_rate": 0.83203125, "calib/pce": 0.6886000000000001, "calib/std_conf": 0.1340822135855461, "calib/step_conf_rate": 0.83203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2947.0, "completions/max_terminated_length": 2947.0, "completions/mean_length": 321.03125, "completions/mean_terminated_length": 322.29022216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.0224, "grad_norm": 0.23807606101036072, "learning_rate": 4.9722222222222224e-06, "loss": 0.0649, "num_tokens": 5432776.0, "reward": 0.5986981391906738, "reward_std": 0.4536815881729126, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.2541675865650177, "rewards/format_reward_step": 0.703125, "step": 21 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.53926171284669e-08, "aux_brier/mean_group_std": 0.18000107465540574, "aux_brier/mean_r": 0.4770465280997206, "aux_brier/n_active_tok": 139.625, "aux_brier/n_groups": 10.625, "aux_brier/n_step_records": 34.90625, "calib/answer_extract_rate": 0.8359375, "calib/auroc": 0.4400944568729803, "calib/avg_num_step_conf": 4.4375, "calib/ece": 0.6613344827586207, "calib/final_conf_rate": 0.79296875, "calib/format_rate": 0.74609375, "calib/frac_conf_gt_0.9": 0.7339901477832512, "calib/gap": -0.012347576435495888, "calib/mean_conf": 0.9112852216748769, "calib/mu_c": 0.9022222222222221, "calib/mu_w": 0.914569798657718, "calib/nonempty_final_conf_rate": 0.79296875, "calib/nonempty_reasoning_rate": 0.88671875, "calib/nonempty_step_conf_rate": 0.84375, "calib/pce": 0.6533049261083744, "calib/std_conf": 0.13118444308196095, "calib/step_conf_rate": 0.84375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2954.0, "completions/max_terminated_length": 2954.0, "completions/mean_length": 373.7890625, "completions/mean_terminated_length": 376.7322692871094, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.023466666666666667, "grad_norm": 0.3638046979904175, "learning_rate": 4.944444444444445e-06, "loss": 0.1508, "num_tokens": 5630282.0, "reward": 0.6571128368377686, "reward_std": 0.5000396966934204, "rewards/accuracy_reward_step": 0.21484375, "rewards/final_brier_reward_step": 0.27688878774642944, "rewards/format_reward_step": 0.74609375, "step": 22 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.056146938631212e-09, "aux_brier/mean_group_std": 0.20348816761380015, "aux_brier/mean_r": 0.45004448004259806, "aux_brier/n_active_tok": 135.625, "aux_brier/n_groups": 7.96875, "aux_brier/n_step_records": 33.90625, "calib/answer_extract_rate": 0.86328125, "calib/auroc": 0.45709075305728386, "calib/avg_num_step_conf": 4.30078125, "calib/ece": 0.6455714285714285, "calib/final_conf_rate": 0.84765625, "calib/format_rate": 0.8046875, "calib/frac_conf_gt_0.9": 0.7926267281105991, "calib/gap": 0.007901845097618487, "calib/mean_conf": 0.9084838709677421, "calib/mu_c": 0.9142372881355934, "calib/mu_w": 0.9063354430379749, "calib/nonempty_final_conf_rate": 0.84765625, "calib/nonempty_reasoning_rate": 0.93359375, "calib/nonempty_step_conf_rate": 0.91015625, "calib/pce": 0.6410829493087556, "calib/std_conf": 0.1628202225003445, "calib/step_conf_rate": 0.91015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2228.0, "completions/max_terminated_length": 2228.0, "completions/mean_length": 322.25390625, "completions/mean_terminated_length": 326.0751037597656, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.024533333333333334, "grad_norm": 0.20985783636569977, "learning_rate": 4.9166666666666665e-06, "loss": 0.0767, "num_tokens": 5816715.0, "reward": 0.7147294282913208, "reward_std": 0.5085971355438232, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.2964176535606384, "rewards/format_reward_step": 0.8046875, "step": 23 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.2515715999982255e-08, "aux_brier/mean_group_std": 0.21807992550748412, "aux_brier/mean_r": 0.5138780667267331, "aux_brier/n_active_tok": 162.625, "aux_brier/n_groups": 9.5, "aux_brier/n_step_records": 40.65625, "calib/answer_extract_rate": 0.89453125, "calib/auroc": 0.5052807486631016, "calib/avg_num_step_conf": 5.1875, "calib/ece": 0.7266960352422906, "calib/final_conf_rate": 0.88671875, "calib/format_rate": 0.828125, "calib/frac_conf_gt_0.9": 0.7400881057268722, "calib/gap": 0.002164438502673671, "calib/mean_conf": 0.902466960352423, "calib/mu_c": 0.9042499999999999, "calib/mu_w": 0.9020855614973262, "calib/nonempty_final_conf_rate": 0.88671875, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.9296875, "calib/pce": 0.72647577092511, "calib/std_conf": 0.15633492937236343, "calib/step_conf_rate": 0.9296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2395.0, "completions/max_terminated_length": 2395.0, "completions/mean_length": 305.625, "completions/mean_terminated_length": 305.625, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "epoch": 0.0256, "grad_norm": 0.2822449207305908, "learning_rate": 4.888888888888889e-06, "loss": 0.0269, "num_tokens": 5999467.0, "reward": 0.6373910903930664, "reward_std": 0.43420350551605225, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.2526894509792328, "rewards/format_reward_step": 0.828125, "step": 24 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.5344279522276913e-09, "aux_brier/mean_group_std": 0.1798748501833308, "aux_brier/mean_r": 0.46854791868214885, "aux_brier/n_active_tok": 162.25, "aux_brier/n_groups": 9.84375, "aux_brier/n_step_records": 40.5625, "calib/answer_extract_rate": 0.9140625, "calib/auroc": 0.49373737373737375, "calib/avg_num_step_conf": 5.15625, "calib/ece": 0.6825038297872339, "calib/final_conf_rate": 0.91796875, "calib/format_rate": 0.859375, "calib/frac_conf_gt_0.9": 0.7702127659574468, "calib/gap": 0.01481696969696955, "calib/mean_conf": 0.9081344680851064, "calib/mu_c": 0.9194836363636363, "calib/mu_w": 0.9046666666666667, "calib/nonempty_final_conf_rate": 0.91796875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.6782978723404254, "calib/std_conf": 0.16339619921630394, "calib/step_conf_rate": 0.953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2334.0, "completions/max_terminated_length": 2334.0, "completions/mean_length": 303.58203125, "completions/mean_terminated_length": 304.7725524902344, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.02666666666666667, "grad_norm": 0.11927485466003418, "learning_rate": 4.861111111111111e-06, "loss": 0.0495, "num_tokens": 6180408.0, "reward": 0.7238137125968933, "reward_std": 0.4830150604248047, "rewards/accuracy_reward_step": 0.21875, "rewards/final_brier_reward_step": 0.3015049397945404, "rewards/format_reward_step": 0.859375, "step": 25 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 9.91186948490208e-09, "aux_brier/mean_group_std": 0.18813481670262427, "aux_brier/mean_r": 0.5172853977295571, "aux_brier/n_active_tok": 154.75, "aux_brier/n_groups": 7.96875, "aux_brier/n_step_records": 38.6875, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.4997711670480549, "calib/avg_num_step_conf": 4.86328125, "calib/ece": 0.7169796610169492, "calib/final_conf_rate": 0.921875, "calib/format_rate": 0.86328125, "calib/frac_conf_gt_0.9": 0.7838983050847458, "calib/gap": 0.03328933638443954, "calib/mean_conf": 0.9118949152542372, "calib/mu_c": 0.9386956521739132, "calib/mu_w": 0.9054063157894736, "calib/nonempty_final_conf_rate": 0.921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.7169796610169492, "calib/std_conf": 0.14151453231206076, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1738.0, "completions/max_terminated_length": 1738.0, "completions/mean_length": 296.37109375, "completions/mean_terminated_length": 296.37109375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 0.027733333333333332, "grad_norm": 0.7803621888160706, "learning_rate": 4.833333333333333e-06, "loss": 0.06, "num_tokens": 6361519.0, "reward": 0.6934462785720825, "reward_std": 0.41746586561203003, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.2815975546836853, "rewards/format_reward_step": 0.86328125, "step": 26 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.690096985447088e-09, "aux_brier/mean_group_std": 0.20029898540091512, "aux_brier/mean_r": 0.5076341572702145, "aux_brier/n_active_tok": 167.375, "aux_brier/n_groups": 9.5, "aux_brier/n_step_records": 41.84375, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.48783515392254223, "calib/avg_num_step_conf": 5.33984375, "calib/ece": 0.6745267489711934, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.6748971193415638, "calib/gap": 0.0008758689175770717, "calib/mean_conf": 0.8862962962962964, "calib/mu_c": 0.8869811320754716, "calib/mu_w": 0.8861052631578945, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.671358024691358, "calib/std_conf": 0.1623752026855946, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1158.0, "completions/max_terminated_length": 1158.0, "completions/mean_length": 284.8359375, "completions/mean_terminated_length": 285.9529724121094, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.0288, "grad_norm": 0.5266081690788269, "learning_rate": 4.805555555555556e-06, "loss": 0.0333, "num_tokens": 6539653.0, "reward": 0.7518002986907959, "reward_std": 0.4312363266944885, "rewards/accuracy_reward_step": 0.21484375, "rewards/final_brier_reward_step": 0.3275136947631836, "rewards/format_reward_step": 0.91015625, "step": 27 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.1516466663928924e-08, "aux_brier/mean_group_std": 0.2100269071624303, "aux_brier/mean_r": 0.5077899568958248, "aux_brier/n_active_tok": 161.875, "aux_brier/n_groups": 9.03125, "aux_brier/n_step_records": 40.46875, "calib/answer_extract_rate": 0.9296875, "calib/auroc": 0.46140583554376663, "calib/avg_num_step_conf": 5.078125, "calib/ece": 0.6487029288702929, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.8984375, "calib/frac_conf_gt_0.9": 0.7740585774058577, "calib/gap": -0.025137931034482808, "calib/mean_conf": 0.9123012552301256, "calib/mu_c": 0.894, "calib/mu_w": 0.9191379310344828, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.6445188284518829, "calib/std_conf": 0.14621063371947451, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1411.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 291.09375, "completions/mean_terminated_length": 292.2353210449219, "completions/min_length": 0.0, "completions/min_terminated_length": 37.0, "epoch": 0.029866666666666666, "grad_norm": 0.7113686203956604, "learning_rate": 4.777777777777778e-06, "loss": 0.0575, "num_tokens": 6721117.0, "reward": 0.7854001522064209, "reward_std": 0.48861730098724365, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.3291007876396179, "rewards/format_reward_step": 0.8984375, "step": 28 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 9.426145683444354e-09, "aux_brier/mean_group_std": 0.2137391558059363, "aux_brier/mean_r": 0.5381966670089346, "aux_brier/n_active_tok": 170.5, "aux_brier/n_groups": 9.09375, "aux_brier/n_step_records": 42.625, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5993589743589743, "calib/avg_num_step_conf": 5.390625, "calib/ece": 0.7296296296296296, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.691358024691358, "calib/gap": 0.06490950226244352, "calib/mean_conf": 0.8901234567901235, "calib/mu_c": 0.9446153846153847, "calib/mu_w": 0.8797058823529412, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.7296296296296296, "calib/std_conf": 0.16349697391262502, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1235.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 300.42578125, "completions/mean_terminated_length": 302.7913513183594, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.030933333333333334, "grad_norm": 0.1321856528520584, "learning_rate": 4.75e-06, "loss": 0.0428, "num_tokens": 6905154.0, "reward": 0.6882942318916321, "reward_std": 0.3734467625617981, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.2922394275665283, "rewards/format_reward_step": 0.91015625, "step": 29 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.015215935934904e-09, "aux_brier/mean_group_std": 0.20785395803487267, "aux_brier/mean_r": 0.5942987839190768, "aux_brier/n_active_tok": 161.0, "aux_brier/n_groups": 8.25, "aux_brier/n_step_records": 40.25, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.42458303118201596, "calib/avg_num_step_conf": 5.05078125, "calib/ece": 0.673130081300813, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.6016260162601627, "calib/gap": -0.020483787423598798, "calib/mean_conf": 0.8710975609756098, "calib/mu_c": 0.8546938775510204, "calib/mu_w": 0.8751776649746192, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.672520325203252, "calib/std_conf": 0.17216464584404084, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2391.0, "completions/max_terminated_length": 2391.0, "completions/mean_length": 298.9609375, "completions/mean_terminated_length": 300.13336181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.032, "grad_norm": 0.18173524737358093, "learning_rate": 4.722222222222222e-06, "loss": 0.0717, "num_tokens": 7088672.0, "reward": 0.7423487305641174, "reward_std": 0.39480555057525635, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.3287699222564697, "rewards/format_reward_step": 0.9375, "step": 30 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.266128449450292e-09, "aux_brier/mean_group_std": 0.2204184963840759, "aux_brier/mean_r": 0.5945776016599642, "aux_brier/n_active_tok": 170.5, "aux_brier/n_groups": 9.53125, "aux_brier/n_step_records": 42.625, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5138953488372093, "calib/avg_num_step_conf": 5.375, "calib/ece": 0.7052674897119342, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.6378600823045267, "calib/gap": 0.015393023255813931, "calib/mean_conf": 0.8794238683127573, "calib/mu_c": 0.892093023255814, "calib/mu_w": 0.8767, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.7038683127572016, "calib/std_conf": 0.1732540101206787, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1714.0, "completions/max_terminated_length": 1714.0, "completions/mean_length": 305.76953125, "completions/mean_terminated_length": 305.76953125, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.03306666666666667, "grad_norm": 0.7781769037246704, "learning_rate": 4.694444444444445e-06, "loss": 0.0798, "num_tokens": 7272861.0, "reward": 0.7064942121505737, "reward_std": 0.35947245359420776, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.3025394678115845, "rewards/format_reward_step": 0.91015625, "step": 31 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.1055202975285283e-08, "aux_brier/mean_group_std": 0.20600129394839078, "aux_brier/mean_r": 0.6912579028682645, "aux_brier/n_active_tok": 165.875, "aux_brier/n_groups": 9.25, "aux_brier/n_step_records": 41.46875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.44870364010989017, "calib/avg_num_step_conf": 5.19921875, "calib/ece": 0.5632995934959351, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.483739837398374, "calib/gap": -0.01180020604395593, "calib/mean_conf": 0.8123239837398375, "calib/mu_c": 0.8035937500000001, "calib/mu_w": 0.815393956043956, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.5577304878048781, "calib/std_conf": 0.21630351932772776, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2696.0, "completions/max_terminated_length": 2696.0, "completions/mean_length": 285.109375, "completions/mean_terminated_length": 285.109375, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.034133333333333335, "grad_norm": 0.7779445648193359, "learning_rate": 4.666666666666667e-06, "loss": 0.0784, "num_tokens": 7452553.0, "reward": 0.8274486064910889, "reward_std": 0.38893434405326843, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.4269818663597107, "rewards/format_reward_step": 0.94140625, "step": 32 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.109116656467407e-09, "aux_brier/mean_group_std": 0.19827492141701014, "aux_brier/mean_r": 0.6903188189204631, "aux_brier/n_active_tok": 172.375, "aux_brier/n_groups": 9.28125, "aux_brier/n_step_records": 43.09375, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5199074074074074, "calib/avg_num_step_conf": 5.44921875, "calib/ece": 0.6412520325203253, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.4268292682926829, "calib/gap": 0.026722222222222092, "calib/mean_conf": 0.7722439024390243, "calib/mu_c": 0.7950555555555555, "calib/mu_w": 0.7683333333333334, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.6335772357723578, "calib/std_conf": 0.24758529256998707, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1403.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 307.99609375, "completions/mean_terminated_length": 307.99609375, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.0352, "grad_norm": 0.1332700252532959, "learning_rate": 4.638888888888889e-06, "loss": 0.0491, "num_tokens": 7638272.0, "reward": 0.7117406725883484, "reward_std": 0.31479260325431824, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.40165019035339355, "rewards/format_reward_step": 0.94140625, "step": 33 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.72177542898411e-09, "aux_brier/mean_group_std": 0.19156588146267975, "aux_brier/mean_r": 0.7494763104746016, "aux_brier/n_active_tok": 170.25, "aux_brier/n_groups": 9.4375, "aux_brier/n_step_records": 42.5625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4086677790462808, "calib/avg_num_step_conf": 5.3203125, "calib/ece": 0.5250404761904761, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.36507936507936506, "calib/gap": -0.07622919118292792, "calib/mean_conf": 0.720754761904762, "calib/mu_c": 0.6623728813559323, "calib/mu_w": 0.7386020725388602, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.5058341269841269, "calib/std_conf": 0.26430859552386954, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2141.0, "completions/max_terminated_length": 2141.0, "completions/mean_length": 275.90625, "completions/mean_terminated_length": 275.90625, "completions/min_length": 12.0, "completions/min_terminated_length": 12.0, "epoch": 0.03626666666666667, "grad_norm": 0.6952009201049805, "learning_rate": 4.611111111111112e-06, "loss": 0.022, "num_tokens": 7814016.0, "reward": 0.8390231132507324, "reward_std": 0.37944960594177246, "rewards/accuracy_reward_step": 0.234375, "rewards/final_brier_reward_step": 0.47328001260757446, "rewards/format_reward_step": 0.97265625, "step": 34 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.980083928574075e-08, "aux_brier/mean_group_std": 0.1865629492017434, "aux_brier/mean_r": 0.7878694557961754, "aux_brier/n_active_tok": 183.625, "aux_brier/n_groups": 9.90625, "aux_brier/n_step_records": 45.90625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.48602484472049684, "calib/avg_num_step_conf": 5.95703125, "calib/ece": 0.5238955823293173, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.3132530120481928, "calib/gap": -0.010384450631827025, "calib/mean_conf": 0.7017269076305221, "calib/mu_c": 0.6932608695652174, "calib/mu_w": 0.7036453201970444, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.5204417670682732, "calib/std_conf": 0.27637322292922056, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1314.0, "completions/max_terminated_length": 1314.0, "completions/mean_length": 298.75390625, "completions/mean_terminated_length": 299.9255065917969, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.037333333333333336, "grad_norm": 0.36831894516944885, "learning_rate": 4.583333333333333e-06, "loss": 0.0062, "num_tokens": 7999753.0, "reward": 0.784788191318512, "reward_std": 0.33648285269737244, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.48290273547172546, "rewards/format_reward_step": 0.9609375, "step": 35 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.2163590878966346e-07, "aux_brier/mean_group_std": 0.14348302719721806, "aux_brier/mean_r": 0.8458712432336049, "aux_brier/n_active_tok": 181.125, "aux_brier/n_groups": 10.09375, "aux_brier/n_step_records": 45.28125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.47090053763440864, "calib/avg_num_step_conf": 5.671875, "calib/ece": 0.3115338645418327, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.20318725099601595, "calib/gap": -0.025331989247311837, "calib/mean_conf": 0.594601593625498, "calib/mu_c": 0.5789583333333332, "calib/mu_w": 0.6042903225806451, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2618326693227092, "calib/std_conf": 0.28520287152437157, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1725.0, "completions/max_terminated_length": 1725.0, "completions/mean_length": 303.37890625, "completions/mean_terminated_length": 304.5686340332031, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.0384, "grad_norm": 0.07049930095672607, "learning_rate": 4.555555555555556e-06, "loss": 0.0056, "num_tokens": 8180130.0, "reward": 1.0110127925872803, "reward_std": 0.43369102478027344, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.6065510511398315, "rewards/format_reward_step": 0.96875, "step": 36 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.1559835892157615e-08, "aux_brier/mean_group_std": 0.13505032111760723, "aux_brier/mean_r": 0.8542272640350191, "aux_brier/n_active_tok": 185.75, "aux_brier/n_groups": 10.625, "aux_brier/n_step_records": 46.4375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4597677139037433, "calib/avg_num_step_conf": 5.8125, "calib/ece": 0.32262908366533866, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.1593625498007968, "calib/gap": -0.04092299465240634, "calib/mean_conf": 0.5529884462151394, "calib/mu_c": 0.5225, "calib/mu_w": 0.5634229946524063, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.31031872509960157, "calib/std_conf": 0.28159442435295023, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2341.0, "completions/max_terminated_length": 2341.0, "completions/mean_length": 293.80859375, "completions/mean_terminated_length": 293.80859375, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.039466666666666664, "grad_norm": 0.12618288397789001, "learning_rate": 4.527777777777778e-06, "loss": 0.0222, "num_tokens": 8362441.0, "reward": 0.8851826190948486, "reward_std": 0.3449189364910126, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.6032304763793945, "rewards/format_reward_step": 0.96875, "step": 37 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.866049319376177e-08, "aux_brier/mean_group_std": 0.09684418597562156, "aux_brier/mean_r": 0.9022777078041829, "aux_brier/n_active_tok": 178.375, "aux_brier/n_groups": 9.6875, "aux_brier/n_step_records": 44.59375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.423245806621586, "calib/avg_num_step_conf": 5.59765625, "calib/ece": 0.314047619047619, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.11904761904761904, "calib/gap": -0.0787617458505313, "calib/mean_conf": 0.4715079365079365, "calib/mu_c": 0.4111864406779661, "calib/mu_w": 0.4899481865284974, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2757142857142857, "calib/std_conf": 0.2780917613213121, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 674.0, "completions/max_terminated_length": 674.0, "completions/mean_length": 274.9765625, "completions/mean_terminated_length": 276.054931640625, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.04053333333333333, "grad_norm": 0.293340265750885, "learning_rate": 4.5e-06, "loss": 0.0377, "num_tokens": 8539723.0, "reward": 0.8784909248352051, "reward_std": 0.32937389612197876, "rewards/accuracy_reward_step": 0.234375, "rewards/final_brier_reward_step": 0.6389636993408203, "rewards/format_reward_step": 0.96875, "step": 38 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.6374711629735295e-09, "aux_brier/mean_group_std": 0.10792458710176506, "aux_brier/mean_r": 0.89967279991962, "aux_brier/n_active_tok": 175.875, "aux_brier/n_groups": 9.0625, "aux_brier/n_step_records": 43.96875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4461904761904762, "calib/avg_num_step_conf": 5.51171875, "calib/ece": 0.26904000000000006, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.1, "calib/gap": -0.06372222222222212, "calib/mean_conf": 0.46488, "calib/mu_c": 0.41900000000000004, "calib/mu_w": 0.48272222222222216, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22696000000000002, "calib/std_conf": 0.2905308685837015, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2545.0, "completions/max_terminated_length": 2545.0, "completions/mean_length": 303.01953125, "completions/mean_terminated_length": 303.01953125, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.0416, "grad_norm": 0.10772345215082169, "learning_rate": 4.472222222222223e-06, "loss": 0.0361, "num_tokens": 8723384.0, "reward": 0.9172160625457764, "reward_std": 0.2951314449310303, "rewards/accuracy_reward_step": 0.27734375, "rewards/final_brier_reward_step": 0.6298016309738159, "rewards/format_reward_step": 0.96484375, "step": 39 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.0188851054526715e-07, "aux_brier/mean_group_std": 0.07951427386641692, "aux_brier/mean_r": 0.9142084420714021, "aux_brier/n_active_tok": 211.25, "aux_brier/n_groups": 12.65625, "aux_brier/n_step_records": 52.8125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4294578083237877, "calib/avg_num_step_conf": 6.69140625, "calib/ece": 0.33789112903225804, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.11290322580645161, "calib/gap": -0.06076384116074851, "calib/mean_conf": 0.4654959677419355, "calib/mu_c": 0.4179629629629629, "calib/mu_w": 0.47872680412371144, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.29282258064516126, "calib/std_conf": 0.28501851946032175, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2413.0, "completions/max_terminated_length": 2413.0, "completions/mean_length": 349.68359375, "completions/mean_terminated_length": 349.68359375, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.042666666666666665, "grad_norm": 0.045656561851501465, "learning_rate": 4.444444444444444e-06, "loss": 0.0769, "num_tokens": 8919663.0, "reward": 0.8543899655342102, "reward_std": 0.32906851172447205, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.6441223621368408, "rewards/format_reward_step": 0.96484375, "step": 40 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.4130172093794755e-07, "aux_brier/mean_group_std": 0.08445477888794868, "aux_brier/mean_r": 0.9186786748781224, "aux_brier/n_active_tok": 181.75, "aux_brier/n_groups": 10.25, "aux_brier/n_step_records": 45.4375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.48849344393898847, "calib/avg_num_step_conf": 5.90234375, "calib/ece": 0.241140562248996, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.060240963855421686, "calib/gap": -0.02732713406475773, "calib/mean_conf": 0.3743614457831326, "calib/mu_c": 0.3581188118811882, "calib/mu_w": 0.38544594594594594, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10493975903614458, "calib/std_conf": 0.26386058347081, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1574.0, "completions/max_terminated_length": 1574.0, "completions/mean_length": 295.39453125, "completions/mean_terminated_length": 297.720458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.04373333333333333, "grad_norm": 0.17622338235378265, "learning_rate": 4.416666666666667e-06, "loss": 0.0142, "num_tokens": 9102532.0, "reward": 1.0430893898010254, "reward_std": 0.3884175419807434, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.6489200592041016, "rewards/format_reward_step": 0.96484375, "step": 41 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.0904451541726701e-07, "aux_brier/mean_group_std": 0.06779451242606827, "aux_brier/mean_r": 0.9429386022622575, "aux_brier/n_active_tok": 176.5, "aux_brier/n_groups": 9.46875, "aux_brier/n_step_records": 44.125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5357168318243387, "calib/avg_num_step_conf": 5.63671875, "calib/ece": 0.18984920634920632, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.04365079365079365, "calib/gap": 0.028710772082412517, "calib/mean_conf": 0.31411904761904763, "calib/mu_c": 0.33337349397590366, "calib/mu_w": 0.30466272189349114, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.08730158730158731, "calib/std_conf": 0.24366650730054254, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 794.0, "completions/max_terminated_length": 794.0, "completions/mean_length": 265.8359375, "completions/mean_terminated_length": 266.8784484863281, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.0448, "grad_norm": 0.28679654002189636, "learning_rate": 4.388888888888889e-06, "loss": 0.0123, "num_tokens": 9274954.0, "reward": 0.9861007332801819, "reward_std": 0.3326447308063507, "rewards/accuracy_reward_step": 0.32421875, "rewards/final_brier_reward_step": 0.7100280523300171, "rewards/format_reward_step": 0.96875, "step": 42 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.1635402393656014e-07, "aux_brier/mean_group_std": 0.05950149274247848, "aux_brier/mean_r": 0.9497981332929468, "aux_brier/n_active_tok": 179.0, "aux_brier/n_groups": 11.1875, "aux_brier/n_step_records": 44.75, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4820969990461516, "calib/avg_num_step_conf": 5.609375, "calib/ece": 0.19779527559055116, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.027559055118110236, "calib/gap": -0.028646268985252077, "calib/mean_conf": 0.27385826771653543, "calib/mu_c": 0.2538961038961039, "calib/mu_w": 0.282542372881356, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08425196850393701, "calib/std_conf": 0.22082539239794835, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2131.0, "completions/max_terminated_length": 2131.0, "completions/mean_length": 291.54296875, "completions/mean_terminated_length": 291.54296875, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.04586666666666667, "grad_norm": 0.09002138674259186, "learning_rate": 4.361111111111112e-06, "loss": 0.0373, "num_tokens": 9454813.0, "reward": 0.9753509163856506, "reward_std": 0.2572925388813019, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.713903546333313, "rewards/format_reward_step": 0.984375, "step": 43 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.59239092087671e-07, "aux_brier/mean_group_std": 0.07996816997485257, "aux_brier/mean_r": 0.9347192221273104, "aux_brier/n_active_tok": 200.25, "aux_brier/n_groups": 11.625, "aux_brier/n_step_records": 50.0625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5027714789619552, "calib/avg_num_step_conf": 6.3515625, "calib/ece": 0.18976111111111113, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.05555555555555555, "calib/gap": 0.006030687830687886, "calib/mean_conf": 0.27103253968253965, "calib/mu_c": 0.2755555555555556, "calib/mu_w": 0.2695248677248677, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1053968253968254, "calib/std_conf": 0.2476975512145346, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1915.0, "completions/max_terminated_length": 1915.0, "completions/mean_length": 332.87109375, "completions/mean_terminated_length": 332.87109375, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.046933333333333334, "grad_norm": 0.09287559241056442, "learning_rate": 4.333333333333334e-06, "loss": 0.084, "num_tokens": 9646348.0, "reward": 0.9206594824790955, "reward_std": 0.26056694984436035, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.7373253703117371, "rewards/format_reward_step": 0.98046875, "step": 44 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.05503563902532e-08, "aux_brier/mean_group_std": 0.055398259622085014, "aux_brier/mean_r": 0.950087883753752, "aux_brier/n_active_tok": 179.25, "aux_brier/n_groups": 10.84375, "aux_brier/n_step_records": 44.8125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4737741888549396, "calib/avg_num_step_conf": 5.6796875, "calib/ece": 0.22992608695652175, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.015810276679841896, "calib/gap": -0.03034193219845774, "calib/mean_conf": 0.19568656126482215, "calib/mu_c": 0.17481898734177215, "calib/mu_w": 0.20516091954022989, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0566798418972332, "calib/std_conf": 0.19731349060401598, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2075.0, "completions/max_terminated_length": 2075.0, "completions/mean_length": 335.0, "completions/mean_terminated_length": 335.0, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.048, "grad_norm": 0.2698090970516205, "learning_rate": 4.305555555555556e-06, "loss": 0.0731, "num_tokens": 9837156.0, "reward": 0.9776219129562378, "reward_std": 0.3411560654640198, "rewards/accuracy_reward_step": 0.30859375, "rewards/final_brier_reward_step": 0.707362711429596, "rewards/format_reward_step": 0.984375, "step": 45 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.774552657156207e-07, "aux_brier/mean_group_std": 0.06672806496857805, "aux_brier/mean_r": 0.9530298777798117, "aux_brier/n_active_tok": 194.125, "aux_brier/n_groups": 13.6875, "aux_brier/n_step_records": 48.53125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4734519473241805, "calib/avg_num_step_conf": 6.12890625, "calib/ece": 0.2606626984126984, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.015873015873015872, "calib/gap": -0.019389044550294193, "calib/mean_conf": 0.16370238095238096, "calib/mu_c": 0.15093023255813953, "calib/mu_w": 0.17031927710843373, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.04154761904761904, "calib/std_conf": 0.17862520261767117, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2397.0, "completions/max_terminated_length": 2397.0, "completions/mean_length": 317.03125, "completions/mean_terminated_length": 318.2745361328125, "completions/min_length": 0.0, "completions/min_terminated_length": 75.0, "epoch": 0.04906666666666667, "grad_norm": 0.14536640048027039, "learning_rate": 4.277777777777778e-06, "loss": 0.0467, "num_tokens": 10023084.0, "reward": 0.9945597052574158, "reward_std": 0.28032350540161133, "rewards/accuracy_reward_step": 0.3359375, "rewards/final_brier_reward_step": 0.6891763210296631, "rewards/format_reward_step": 0.97265625, "step": 46 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.171471457205868e-07, "aux_brier/mean_group_std": 0.057716109376894524, "aux_brier/mean_r": 0.9634310785724322, "aux_brier/n_active_tok": 201.625, "aux_brier/n_groups": 12.34375, "aux_brier/n_step_records": 50.40625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4757418909592822, "calib/avg_num_step_conf": 6.53515625, "calib/ece": 0.2841701195219123, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0199203187250996, "calib/gap": -0.04046770186335401, "calib/mean_conf": 0.1439573705179283, "calib/mu_c": 0.11800000000000004, "calib/mu_w": 0.15846770186335404, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0347808764940239, "calib/std_conf": 0.16426385384720288, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2012.0, "completions/max_terminated_length": 2012.0, "completions/mean_length": 327.30078125, "completions/mean_terminated_length": 328.5843200683594, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.050133333333333335, "grad_norm": 0.12283183634281158, "learning_rate": 4.25e-06, "loss": 0.0337, "num_tokens": 10212849.0, "reward": 1.002259612083435, "reward_std": 0.263668417930603, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.6574758291244507, "rewards/format_reward_step": 0.97265625, "step": 47 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.894419301544218e-07, "aux_brier/mean_group_std": 0.04886702999974815, "aux_brier/mean_r": 0.9653090402978679, "aux_brier/n_active_tok": 167.375, "aux_brier/n_groups": 9.03125, "aux_brier/n_step_records": 41.84375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.43420659799970146, "calib/avg_num_step_conf": 5.2734375, "calib/ece": 0.23660956175298808, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.03421533064636516, "calib/mean_conf": 0.11709561752988049, "calib/mu_c": 0.09337662337662338, "calib/mu_w": 0.12759195402298854, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.023466135458167333, "calib/std_conf": 0.13227244167027186, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2559.0, "completions/max_terminated_length": 2559.0, "completions/mean_length": 313.046875, "completions/mean_terminated_length": 313.046875, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 0.0512, "grad_norm": 0.01831880211830139, "learning_rate": 4.222222222222223e-06, "loss": 0.0691, "num_tokens": 10396677.0, "reward": 0.9644834399223328, "reward_std": 0.24241739511489868, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.701683759689331, "rewards/format_reward_step": 0.9765625, "step": 48 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.7135789104893995e-06, "aux_brier/mean_group_std": 0.05474680158777261, "aux_brier/mean_r": 0.9588095690380102, "aux_brier/n_active_tok": 192.375, "aux_brier/n_groups": 11.375, "aux_brier/n_step_records": 48.09375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5272404263718911, "calib/avg_num_step_conf": 6.30859375, "calib/ece": 0.34338645418326696, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.01593625498007968, "calib/gap": -0.023503750493485992, "calib/mean_conf": 0.11071713147410359, "calib/mu_c": 0.09676470588235295, "calib/mu_w": 0.12026845637583894, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.023864541832669325, "calib/std_conf": 0.14123912243607503, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3006.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 343.04296875, "completions/mean_terminated_length": 345.74407958984375, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.05226666666666667, "grad_norm": 0.14206573367118835, "learning_rate": 4.194444444444445e-06, "loss": 0.0733, "num_tokens": 10589032.0, "reward": 1.042632818222046, "reward_std": 0.2652859091758728, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.6236566305160522, "rewards/format_reward_step": 0.9765625, "step": 49 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.2484828668668513e-06, "aux_brier/mean_group_std": 0.03886498509140872, "aux_brier/mean_r": 0.9748389287583661, "aux_brier/n_active_tok": 200.25, "aux_brier/n_groups": 13.21875, "aux_brier/n_step_records": 50.0625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.44925458715596334, "calib/avg_num_step_conf": 6.35546875, "calib/ece": 0.35617786561264814, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": -0.01788646788990826, "calib/mean_conf": 0.09741897233201582, "calib/mu_c": 0.08723853211009175, "calib/mu_w": 0.10512500000000001, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011383399209486165, "calib/std_conf": 0.11901850902213904, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1891.0, "completions/max_terminated_length": 1891.0, "completions/mean_length": 346.51953125, "completions/mean_terminated_length": 347.8784484863281, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.05333333333333334, "grad_norm": 0.08137652277946472, "learning_rate": 4.166666666666667e-06, "loss": 0.0084, "num_tokens": 10783101.0, "reward": 1.070351004600525, "reward_std": 0.2891105115413666, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6095291972160339, "rewards/format_reward_step": 0.984375, "step": 50 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.0868636725346335e-06, "aux_brier/mean_group_std": 0.051500322294725044, "aux_brier/mean_r": 0.9681560346245173, "aux_brier/n_active_tok": 200.125, "aux_brier/n_groups": 13.3125, "aux_brier/n_step_records": 50.03125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5681772875274105, "calib/avg_num_step_conf": 6.27734375, "calib/ece": 0.34138559999999996, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.016, "calib/gap": 0.0021135224931889407, "calib/mean_conf": 0.08933440000000001, "calib/mu_c": 0.09059405940594062, "calib/mu_w": 0.08848053691275168, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.013359999999999999, "calib/std_conf": 0.13203527110829136, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2344.0, "completions/max_terminated_length": 2344.0, "completions/mean_length": 348.3515625, "completions/mean_terminated_length": 349.7176818847656, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.0544, "grad_norm": 0.06175484135746956, "learning_rate": 4.138888888888889e-06, "loss": 0.041, "num_tokens": 10981575.0, "reward": 1.0389909744262695, "reward_std": 0.28017863631248474, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.6247137188911438, "rewards/format_reward_step": 0.96875, "step": 51 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.2085471974328e-07, "aux_brier/mean_group_std": 0.04814929792522246, "aux_brier/mean_r": 0.9657737402298109, "aux_brier/n_active_tok": 170.875, "aux_brier/n_groups": 10.53125, "aux_brier/n_step_records": 42.71875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5216535433070866, "calib/avg_num_step_conf": 5.47265625, "calib/ece": 0.4419171936758893, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": -0.003838404574428189, "calib/mean_conf": 0.0716005928853755, "calib/mu_c": 0.06968897637795277, "calib/mu_w": 0.07352738095238096, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005770750988142292, "calib/std_conf": 0.10909835143671857, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2056.0, "completions/max_terminated_length": 2056.0, "completions/mean_length": 310.57421875, "completions/mean_terminated_length": 311.79217529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.055466666666666664, "grad_norm": 0.069333516061306, "learning_rate": 4.111111111111111e-06, "loss": -0.0201, "num_tokens": 11169034.0, "reward": 1.1242389678955078, "reward_std": 0.31043705344200134, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5438309907913208, "rewards/format_reward_step": 0.984375, "step": 52 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.706876126636672e-07, "aux_brier/mean_group_std": 0.06379028666306505, "aux_brier/mean_r": 0.9572606440168248, "aux_brier/n_active_tok": 197.25, "aux_brier/n_groups": 12.09375, "aux_brier/n_step_records": 49.3125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4921052631578947, "calib/avg_num_step_conf": 6.23046875, "calib/ece": 0.3919452755905512, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.015748031496062992, "calib/gap": -0.011138634085213012, "calib/mean_conf": 0.09254291338582678, "calib/mu_c": 0.08640350877192984, "calib/mu_w": 0.09754214285714286, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.017834645669291337, "calib/std_conf": 0.13935218147297143, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2561.0, "completions/max_terminated_length": 2561.0, "completions/mean_length": 359.828125, "completions/mean_terminated_length": 361.2392272949219, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.05653333333333333, "grad_norm": 0.18689092993736267, "learning_rate": 4.083333333333334e-06, "loss": 0.001, "num_tokens": 11366974.0, "reward": 1.08318030834198, "reward_std": 0.2806326150894165, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5905337333679199, "rewards/format_reward_step": 0.98046875, "step": 53 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.372307485483006e-06, "aux_brier/mean_group_std": 0.07067157423924163, "aux_brier/mean_r": 0.9552103360580152, "aux_brier/n_active_tok": 163.5, "aux_brier/n_groups": 9.96875, "aux_brier/n_step_records": 40.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5120615384615385, "calib/avg_num_step_conf": 5.1328125, "calib/ece": 0.4645882352941177, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0017015384615384627, "calib/mean_conf": 0.05337254901960785, "calib/mu_c": 0.05253846153846154, "calib/mu_w": 0.054240000000000003, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00407843137254902, "calib/std_conf": 0.06479226557094778, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1790.0, "completions/max_terminated_length": 1790.0, "completions/mean_length": 284.7890625, "completions/mean_terminated_length": 284.7890625, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.0576, "grad_norm": 0.09624961018562317, "learning_rate": 4.055555555555556e-06, "loss": 0.0054, "num_tokens": 11546112.0, "reward": 1.139514684677124, "reward_std": 0.2729540169239044, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5346214771270752, "rewards/format_reward_step": 0.99609375, "step": 54 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.311708374199474e-06, "aux_brier/mean_group_std": 0.0666999291595621, "aux_brier/mean_r": 0.9573599392167562, "aux_brier/n_active_tok": 179.25, "aux_brier/n_groups": 11.75, "aux_brier/n_step_records": 44.8125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4435973597359736, "calib/avg_num_step_conf": 5.86328125, "calib/ece": 0.3454223107569721, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.0006921452145214474, "calib/mean_conf": 0.06270517928286853, "calib/mu_c": 0.0631188118811881, "calib/mu_w": 0.06242666666666666, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0028685258964143423, "calib/std_conf": 0.08411072628365464, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2354.0, "completions/max_terminated_length": 2354.0, "completions/mean_length": 335.140625, "completions/mean_terminated_length": 336.4549255371094, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.058666666666666666, "grad_norm": 0.030210435390472412, "learning_rate": 4.027777777777779e-06, "loss": 0.0438, "num_tokens": 11739732.0, "reward": 1.0322225093841553, "reward_std": 0.27241966128349304, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.6132650375366211, "rewards/format_reward_step": 0.96875, "step": 55 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.866932890228547e-06, "aux_brier/mean_group_std": 0.057494094171987906, "aux_brier/mean_r": 0.9651561452353414, "aux_brier/n_active_tok": 189.75, "aux_brier/n_groups": 11.96875, "aux_brier/n_step_records": 47.4375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5274398220826793, "calib/avg_num_step_conf": 6.0078125, "calib/ece": 0.340405905511811, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.007065293040293026, "calib/mean_conf": 0.06069645669291338, "calib/mu_c": 0.05635714285714286, "calib/mu_w": 0.06342243589743589, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0076377952755905506, "calib/std_conf": 0.08014792443098312, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1010.0, "completions/max_terminated_length": 1010.0, "completions/mean_length": 334.88671875, "completions/mean_terminated_length": 336.20001220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.05973333333333333, "grad_norm": 0.03248266503214836, "learning_rate": 4.000000000000001e-06, "loss": -0.0052, "num_tokens": 11932303.0, "reward": 1.0395299196243286, "reward_std": 0.2998000979423523, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6424946784973145, "rewards/format_reward_step": 0.9921875, "step": 56 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.1238458076101576e-05, "aux_brier/mean_group_std": 0.035071457082858415, "aux_brier/mean_r": 0.9781422398987716, "aux_brier/n_active_tok": 196.875, "aux_brier/n_groups": 12.90625, "aux_brier/n_step_records": 49.21875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4735983945817132, "calib/avg_num_step_conf": 6.2578125, "calib/ece": 0.4757865612648221, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.003059074376019079, "calib/mean_conf": 0.059707509881422934, "calib/mu_c": 0.058268656716417906, "calib/mu_w": 0.061327731092436985, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.002924901185770751, "calib/std_conf": 0.06811790393782177, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2507.0, "completions/max_terminated_length": 2507.0, "completions/mean_length": 355.015625, "completions/mean_terminated_length": 355.015625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.0608, "grad_norm": 0.12113117426633835, "learning_rate": 3.972222222222223e-06, "loss": 0.069, "num_tokens": 12129979.0, "reward": 1.1440820693969727, "reward_std": 0.2719724476337433, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5138286352157593, "rewards/format_reward_step": 0.984375, "step": 57 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.475250295123747e-07, "aux_brier/mean_group_std": 0.05221701207752745, "aux_brier/mean_r": 0.9712742272457991, "aux_brier/n_active_tok": 199.125, "aux_brier/n_groups": 14.5625, "aux_brier/n_step_records": 49.78125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48925667828106845, "calib/avg_num_step_conf": 6.50390625, "calib/ece": 0.27565355999999996, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00415744628339141, "calib/mean_conf": 0.05234644, "calib/mu_c": 0.05514024390243903, "calib/mu_w": 0.05098279761904762, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.046329434139933116, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2487.0, "completions/max_terminated_length": 2487.0, "completions/mean_length": 392.66796875, "completions/mean_terminated_length": 394.2078552246094, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.06186666666666667, "grad_norm": 0.1359066367149353, "learning_rate": 3.944444444444445e-06, "loss": 0.0211, "num_tokens": 12336822.0, "reward": 0.9744365215301514, "reward_std": 0.2868717908859253, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.6789959669113159, "rewards/format_reward_step": 0.96875, "step": 58 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.741232379590343e-06, "aux_brier/mean_group_std": 0.03873162633294896, "aux_brier/mean_r": 0.9730663895053515, "aux_brier/n_active_tok": 209.75, "aux_brier/n_groups": 14.625, "aux_brier/n_step_records": 52.4375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4612823439878235, "calib/avg_num_step_conf": 6.60546875, "calib/ece": 0.37555905511811016, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0075272704211060415, "calib/mean_conf": 0.05766929133858267, "calib/mu_c": 0.053342592592592594, "calib/mu_w": 0.060869863013698636, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004015748031496063, "calib/std_conf": 0.04586999003828072, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2593.0, "completions/max_terminated_length": 2593.0, "completions/mean_length": 368.3515625, "completions/mean_terminated_length": 368.3515625, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.06293333333333333, "grad_norm": 0.021179357543587685, "learning_rate": 3.916666666666667e-06, "loss": 0.0493, "num_tokens": 12537368.0, "reward": 1.0675222873687744, "reward_std": 0.27865922451019287, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.6060266494750977, "rewards/format_reward_step": 0.98828125, "step": 59 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.3851988523994052e-06, "aux_brier/mean_group_std": 0.044724947384388106, "aux_brier/mean_r": 0.9742053092633978, "aux_brier/n_active_tok": 196.75, "aux_brier/n_groups": 13.9375, "aux_brier/n_step_records": 49.1875, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4127853278796675, "calib/avg_num_step_conf": 6.2265625, "calib/ece": 0.38225301204819273, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.014807824251220472, "calib/mean_conf": 0.051192771084337355, "calib/mu_c": 0.04268867924528302, "calib/mu_w": 0.057496503496503495, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0038714859437751, "calib/std_conf": 0.04604401364697368, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2448.0, "completions/max_terminated_length": 2448.0, "completions/mean_length": 367.11328125, "completions/mean_terminated_length": 367.11328125, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.064, "grad_norm": 0.3024795651435852, "learning_rate": 3.88888888888889e-06, "loss": 0.1115, "num_tokens": 12740205.0, "reward": 1.0419573783874512, "reward_std": 0.3243491053581238, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.5818923711776733, "rewards/format_reward_step": 0.96484375, "step": 60 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.6845017756516043e-06, "aux_brier/mean_group_std": 0.050071488925600725, "aux_brier/mean_r": 0.9648921357151992, "aux_brier/n_active_tok": 166.875, "aux_brier/n_groups": 10.25, "aux_brier/n_step_records": 41.71875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5217404983639566, "calib/avg_num_step_conf": 5.37890625, "calib/ece": 0.49561660079051384, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0030678328718852163, "calib/mean_conf": 0.04588537549407115, "calib/mu_c": 0.047291970802919706, "calib/mu_w": 0.04422413793103449, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.03021043951499289, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2710.0, "completions/max_terminated_length": 2710.0, "completions/mean_length": 322.0390625, "completions/mean_terminated_length": 322.0390625, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 0.06506666666666666, "grad_norm": 0.04219770058989525, "learning_rate": 3.861111111111112e-06, "loss": 0.0296, "num_tokens": 12926711.0, "reward": 1.1544866561889648, "reward_std": 0.19616234302520752, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.5007594227790833, "rewards/format_reward_step": 0.98828125, "step": 61 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.854852538035745e-06, "aux_brier/mean_group_std": 0.04710899009756953, "aux_brier/mean_r": 0.9690258826150502, "aux_brier/n_active_tok": 201.625, "aux_brier/n_groups": 13.90625, "aux_brier/n_step_records": 50.40625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5359795134443022, "calib/avg_num_step_conf": 6.44140625, "calib/ece": 0.38956111111111114, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00437933418693983, "calib/mean_conf": 0.046946825396825397, "calib/mu_c": 0.04941454545454546, "calib/mu_w": 0.04503521126760563, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.032727376013294215, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2117.0, "completions/max_terminated_length": 2117.0, "completions/mean_length": 366.01953125, "completions/mean_terminated_length": 366.01953125, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.06613333333333334, "grad_norm": 0.3728078007698059, "learning_rate": 3.833333333333334e-06, "loss": 0.0011, "num_tokens": 13127492.0, "reward": 1.071333885192871, "reward_std": 0.27668479084968567, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.5900229811668396, "rewards/format_reward_step": 0.98046875, "step": 62 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.2460958116813914e-06, "aux_brier/mean_group_std": 0.05956475319454775, "aux_brier/mean_r": 0.961629036896829, "aux_brier/n_active_tok": 186.125, "aux_brier/n_groups": 12.34375, "aux_brier/n_step_records": 46.53125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5489032498601878, "calib/avg_num_step_conf": 6.08203125, "calib/ece": 0.4316338582677165, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002031628658422921, "calib/mean_conf": 0.04687007874015748, "calib/mu_c": 0.047933884297520664, "calib/mu_w": 0.04590225563909774, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0010629921259842521, "calib/std_conf": 0.03287405653257879, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1484.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 352.8515625, "completions/mean_terminated_length": 354.2353210449219, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.0672, "grad_norm": 0.3791986107826233, "learning_rate": 3.8055555555555556e-06, "loss": 0.0334, "num_tokens": 13326462.0, "reward": 1.1101253032684326, "reward_std": 0.26827260851860046, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5576891303062439, "rewards/format_reward_step": 0.98828125, "step": 63 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.967570181811041e-06, "aux_brier/mean_group_std": 0.04056971406073933, "aux_brier/mean_r": 0.9616810714876558, "aux_brier/n_active_tok": 193.125, "aux_brier/n_groups": 12.34375, "aux_brier/n_step_records": 48.28125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.42913798754506727, "calib/avg_num_step_conf": 6.09765625, "calib/ece": 0.41231975806451615, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006491871517535233, "calib/mean_conf": 0.04493830645161291, "calib/mu_c": 0.041404424778761065, "calib/mu_w": 0.0478962962962963, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0008064516129032258, "calib/std_conf": 0.029575978298159205, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2403.0, "completions/max_terminated_length": 2403.0, "completions/mean_length": 359.21484375, "completions/mean_terminated_length": 360.6235656738281, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.06826666666666667, "grad_norm": 0.3696114718914032, "learning_rate": 3.777777777777778e-06, "loss": 0.0661, "num_tokens": 13522197.0, "reward": 1.066054344177246, "reward_std": 0.2907637059688568, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5610923767089844, "rewards/format_reward_step": 0.96875, "step": 64 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.113235367253498e-06, "aux_brier/mean_group_std": 0.04308684222143899, "aux_brier/mean_r": 0.9679355211739188, "aux_brier/n_active_tok": 167.125, "aux_brier/n_groups": 9.9375, "aux_brier/n_step_records": 41.78125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.57240083764474, "calib/avg_num_step_conf": 5.30859375, "calib/ece": 0.4439882352941177, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.004021988174427199, "calib/mean_conf": 0.05012941176470589, "calib/mu_c": 0.05221138211382114, "calib/mu_w": 0.04818939393939394, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0058823529411764705, "calib/std_conf": 0.08752986559022134, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2249.0, "completions/max_terminated_length": 2249.0, "completions/mean_length": 307.1640625, "completions/mean_terminated_length": 307.1640625, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.06933333333333333, "grad_norm": 0.3415069878101349, "learning_rate": 3.7500000000000005e-06, "loss": 0.0529, "num_tokens": 13705855.0, "reward": 1.1118378639221191, "reward_std": 0.2020946443080902, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5489141345024109, "rewards/format_reward_step": 0.98828125, "step": 65 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.2503346324183084e-06, "aux_brier/mean_group_std": 0.040809763049596604, "aux_brier/mean_r": 0.975717045438534, "aux_brier/n_active_tok": 220.125, "aux_brier/n_groups": 16.125, "aux_brier/n_step_records": 55.03125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5343137254901962, "calib/avg_num_step_conf": 7.16015625, "calib/ece": 0.3531525691699604, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": -0.000988130718954247, "calib/mean_conf": 0.05166956521739131, "calib/mu_c": 0.051072000000000006, "calib/mu_w": 0.05206013071895425, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.004782608695652174, "calib/std_conf": 0.06744616520015219, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2958.0, "completions/max_terminated_length": 2958.0, "completions/mean_length": 414.08984375, "completions/mean_terminated_length": 415.7137451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.0704, "grad_norm": 0.48906105756759644, "learning_rate": 3.7222222222222225e-06, "loss": 0.0555, "num_tokens": 13918214.0, "reward": 1.037306308746338, "reward_std": 0.20587214827537537, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.6257877349853516, "rewards/format_reward_step": 0.98046875, "step": 66 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.1599211185986391e-06, "aux_brier/mean_group_std": 0.06801118077395583, "aux_brier/mean_r": 0.9610036007316717, "aux_brier/n_active_tok": 181.875, "aux_brier/n_groups": 11.0625, "aux_brier/n_step_records": 45.46875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.525, "calib/avg_num_step_conf": 5.88671875, "calib/ece": 0.464990625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003204786324786331, "calib/mean_conf": 0.042821875, "calib/mu_c": 0.044399230769230774, "calib/mu_w": 0.04119444444444444, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.031134633141557573, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1179.0, "completions/max_terminated_length": 1179.0, "completions/mean_length": 356.78515625, "completions/mean_terminated_length": 358.184326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.07146666666666666, "grad_norm": 0.19877856969833374, "learning_rate": 3.694444444444445e-06, "loss": -0.0081, "num_tokens": 14114559.0, "reward": 1.1414318084716797, "reward_std": 0.18996474146842957, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5344774127006531, "rewards/format_reward_step": 1.0, "step": 67 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.404874281467187e-07, "aux_brier/mean_group_std": 0.03332953893440229, "aux_brier/mean_r": 0.977619805886587, "aux_brier/n_active_tok": 178.875, "aux_brier/n_groups": 11.34375, "aux_brier/n_step_records": 44.71875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49225522692503826, "calib/avg_num_step_conf": 5.7109375, "calib/ece": 0.3791606299212598, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005195002549719531, "calib/mean_conf": 0.042177952755905516, "calib/mu_c": 0.03915094339622641, "calib/mu_w": 0.044345945945945944, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0020078740157480316, "calib/std_conf": 0.038865171569968944, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2495.0, "completions/max_terminated_length": 2495.0, "completions/mean_length": 354.2890625, "completions/mean_terminated_length": 354.2890625, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 0.07253333333333334, "grad_norm": 0.1947488635778427, "learning_rate": 3.6666666666666666e-06, "loss": 0.0681, "num_tokens": 14309345.0, "reward": 1.0590476989746094, "reward_std": 0.2134093940258026, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.603378415107727, "rewards/format_reward_step": 0.98828125, "step": 68 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -9.199222195666046e-07, "aux_brier/mean_group_std": 0.04567901826417099, "aux_brier/mean_r": 0.9651829820279053, "aux_brier/n_active_tok": 204.375, "aux_brier/n_groups": 16.375, "aux_brier/n_step_records": 51.09375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.49064243909078536, "calib/avg_num_step_conf": 6.421875, "calib/ece": 0.3308486055776892, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005765346399891104, "calib/mean_conf": 0.047876494023904384, "calib/mu_c": 0.04424731182795699, "calib/mu_w": 0.050012658227848096, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.004103585657370518, "calib/std_conf": 0.047888697787222614, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2794.0, "completions/max_terminated_length": 2794.0, "completions/mean_length": 418.5, "completions/mean_terminated_length": 420.1412048339844, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.0736, "grad_norm": 0.0591975674033165, "learning_rate": 3.638888888888889e-06, "loss": 0.0686, "num_tokens": 14520977.0, "reward": 1.0121474266052246, "reward_std": 0.2417616844177246, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.6423400044441223, "rewards/format_reward_step": 0.9765625, "step": 69 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.9151696961402536e-06, "aux_brier/mean_group_std": 0.03618447172246309, "aux_brier/mean_r": 0.9738672651371714, "aux_brier/n_active_tok": 207.125, "aux_brier/n_groups": 15.40625, "aux_brier/n_step_records": 51.78125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5128526858383071, "calib/avg_num_step_conf": 6.8125, "calib/ece": 0.3534248554216868, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000872051885512759, "calib/mean_conf": 0.04135426104417671, "calib/mu_c": 0.04188659793814434, "calib/mu_w": 0.04101454605263158, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002610441767068273, "calib/std_conf": 0.03730413259189743, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2680.0, "completions/max_terminated_length": 2680.0, "completions/mean_length": 392.6875, "completions/mean_terminated_length": 395.779541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.07466666666666667, "grad_norm": 0.3551815450191498, "learning_rate": 3.6111111111111115e-06, "loss": 0.0917, "num_tokens": 14728497.0, "reward": 1.0179234743118286, "reward_std": 0.25442177057266235, "rewards/accuracy_reward_step": 0.37890625, "rewards/final_brier_reward_step": 0.618569016456604, "rewards/format_reward_step": 0.96875, "step": 70 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.852666630363657e-06, "aux_brier/mean_group_std": 0.04797432748809096, "aux_brier/mean_r": 0.9620673853485948, "aux_brier/n_active_tok": 205.5, "aux_brier/n_groups": 14.5625, "aux_brier/n_step_records": 51.375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5415628394351798, "calib/avg_num_step_conf": 6.609375, "calib/ece": 0.4114468253968255, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": -0.007985547249377035, "calib/mean_conf": 0.040696031746031744, "calib/mu_c": 0.03622792792792793, "calib/mu_w": 0.044213475177304964, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.005833333333333333, "calib/std_conf": 0.06686747305309905, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2732.0, "completions/max_terminated_length": 2732.0, "completions/mean_length": 419.1484375, "completions/mean_terminated_length": 419.1484375, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.07573333333333333, "grad_norm": 0.21635693311691284, "learning_rate": 3.5833333333333335e-06, "loss": 0.0365, "num_tokens": 14940207.0, "reward": 1.063964605331421, "reward_std": 0.2941199243068695, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.5683585405349731, "rewards/format_reward_step": 0.9765625, "step": 71 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.2753528566555872e-08, "aux_brier/mean_group_std": 0.028263059452835318, "aux_brier/mean_r": 0.978569870939542, "aux_brier/n_active_tok": 227.75, "aux_brier/n_groups": 14.53125, "aux_brier/n_step_records": 56.9375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5464475683890578, "calib/avg_num_step_conf": 7.21484375, "calib/ece": 0.41105533596837945, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011850937183384008, "calib/mean_conf": 0.03368774703557312, "calib/mu_c": 0.03434821428571429, "calib/mu_w": 0.03316312056737589, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0010276679841897233, "calib/std_conf": 0.019322960600969042, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2423.0, "completions/max_terminated_length": 2423.0, "completions/mean_length": 402.0625, "completions/mean_terminated_length": 402.0625, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.0768, "grad_norm": 0.03792320564389229, "learning_rate": 3.555555555555556e-06, "loss": 0.0277, "num_tokens": 15147543.0, "reward": 1.08038330078125, "reward_std": 0.21755686402320862, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5793453454971313, "rewards/format_reward_step": 0.98828125, "step": 72 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.6318656402013474e-06, "aux_brier/mean_group_std": 0.04297948343048164, "aux_brier/mean_r": 0.9734220111834818, "aux_brier/n_active_tok": 183.5, "aux_brier/n_groups": 10.71875, "aux_brier/n_step_records": 45.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5783283283283284, "calib/avg_num_step_conf": 6.01171875, "calib/ece": 0.532210443137255, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0059946178678678685, "calib/mean_conf": 0.032495439215686275, "calib/mu_c": 0.035104861111111114, "calib/mu_w": 0.029110243243243246, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.025879877857153948, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1021.0, "completions/max_terminated_length": 1021.0, "completions/mean_length": 361.6796875, "completions/mean_terminated_length": 363.0980529785156, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.07786666666666667, "grad_norm": 0.12629614770412445, "learning_rate": 3.5277777777777784e-06, "loss": 0.003, "num_tokens": 15347165.0, "reward": 1.1754639148712158, "reward_std": 0.2581225633621216, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.4674806296825409, "rewards/format_reward_step": 0.9921875, "step": 73 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.5335723340115237e-06, "aux_brier/mean_group_std": 0.020959688280528344, "aux_brier/mean_r": 0.9875021147358175, "aux_brier/n_active_tok": 211.0, "aux_brier/n_groups": 12.46875, "aux_brier/n_step_records": 52.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5358246609902239, "calib/avg_num_step_conf": 6.671875, "calib/ece": 0.37884765625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0022378429517502275, "calib/mean_conf": 0.03130859375, "calib/mu_c": 0.03262857142857142, "calib/mu_w": 0.030390728476821196, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.017500069318361598, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1013.0, "completions/max_terminated_length": 1013.0, "completions/mean_length": 367.82421875, "completions/mean_terminated_length": 369.2666931152344, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.07893333333333333, "grad_norm": 0.06852582097053528, "learning_rate": 3.5e-06, "loss": 0.0083, "num_tokens": 15545256.0, "reward": 1.0590846538543701, "reward_std": 0.1917310357093811, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6113389134407043, "rewards/format_reward_step": 0.9921875, "step": 74 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.953110997712006e-06, "aux_brier/mean_group_std": 0.07335513578920866, "aux_brier/mean_r": 0.9510220788206281, "aux_brier/n_active_tok": 191.625, "aux_brier/n_groups": 10.84375, "aux_brier/n_step_records": 47.90625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5958255534809282, "calib/avg_num_step_conf": 6.1015625, "calib/ece": 0.6092466666666667, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0035297879434515803, "calib/mean_conf": 0.03161607843137255, "calib/mu_c": 0.03288957055214723, "calib/mu_w": 0.02935978260869565, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0008235294117647059, "calib/std_conf": 0.01631417332530111, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 989.0, "completions/max_terminated_length": 989.0, "completions/mean_length": 346.22265625, "completions/mean_terminated_length": 347.5804138183594, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.08, "grad_norm": 0.10428853332996368, "learning_rate": 3.4722222222222224e-06, "loss": 0.0037, "num_tokens": 15738641.0, "reward": 1.2318360805511475, "reward_std": 0.22782982885837555, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.39609429240226746, "rewards/format_reward_step": 0.9921875, "step": 75 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.548702398188276e-06, "aux_brier/mean_group_std": 0.05644470769181853, "aux_brier/mean_r": 0.9652043977565967, "aux_brier/n_active_tok": 232.5, "aux_brier/n_groups": 17.28125, "aux_brier/n_step_records": 58.125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5240878725590956, "calib/avg_num_step_conf": 7.578125, "calib/ece": 0.5177649402390438, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00103558581706064, "calib/mean_conf": 0.03601992031872509, "calib/mu_c": 0.03648201438848921, "calib/mu_w": 0.03544642857142857, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.017588127605375554, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2884.0, "completions/max_terminated_length": 2884.0, "completions/mean_length": 405.8203125, "completions/mean_terminated_length": 407.4117736816406, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.08106666666666666, "grad_norm": 0.20300698280334473, "learning_rate": 3.444444444444445e-06, "loss": 0.0085, "num_tokens": 15945587.0, "reward": 1.149160385131836, "reward_std": 0.24240437150001526, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.47164177894592285, "rewards/format_reward_step": 0.9765625, "step": 76 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.4597693375523235e-06, "aux_brier/mean_group_std": 0.05872090048024422, "aux_brier/mean_r": 0.9624783841273407, "aux_brier/n_active_tok": 215.625, "aux_brier/n_groups": 16.625, "aux_brier/n_step_records": 53.90625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4822032795419052, "calib/avg_num_step_conf": 7.0, "calib/ece": 0.5135333333333334, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005769521082769416, "calib/mean_conf": 0.032651405622489964, "calib/mu_c": 0.03291323529411765, "calib/mu_w": 0.03233628318584071, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.022713129971992565, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2749.0, "completions/max_terminated_length": 2749.0, "completions/mean_length": 387.3203125, "completions/mean_terminated_length": 388.8392333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.08213333333333334, "grad_norm": 0.07579105347394943, "learning_rate": 3.416666666666667e-06, "loss": 0.0585, "num_tokens": 16149405.0, "reward": 1.1362875699996948, "reward_std": 0.2781468331813812, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.4748378396034241, "rewards/format_reward_step": 0.97265625, "step": 77 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.3853375112655755e-06, "aux_brier/mean_group_std": 0.058701462720420934, "aux_brier/mean_r": 0.9580716076366684, "aux_brier/n_active_tok": 231.0, "aux_brier/n_groups": 14.34375, "aux_brier/n_step_records": 57.75, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5604399087221096, "calib/avg_num_step_conf": 7.4140625, "calib/ece": 0.4303507936507936, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003111511156186606, "calib/mean_conf": 0.030919047619047617, "calib/mu_c": 0.03259827586206896, "calib/mu_w": 0.029486764705882355, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00047619047619047614, "calib/std_conf": 0.016665917670244963, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2415.0, "completions/max_terminated_length": 2415.0, "completions/mean_length": 447.8984375, "completions/mean_terminated_length": 449.6549377441406, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.0832, "grad_norm": 0.16607753932476044, "learning_rate": 3.3888888888888893e-06, "loss": 0.0852, "num_tokens": 16372091.0, "reward": 1.0822772979736328, "reward_std": 0.26324495673179626, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5556716918945312, "rewards/format_reward_step": 0.98046875, "step": 78 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.567880250659016e-07, "aux_brier/mean_group_std": 0.046318595736745447, "aux_brier/mean_r": 0.966572659793065, "aux_brier/n_active_tok": 229.0, "aux_brier/n_groups": 15.84375, "aux_brier/n_step_records": 57.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5549912761714856, "calib/avg_num_step_conf": 7.3046875, "calib/ece": 0.5063656299212598, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.0013733636590229498, "calib/mean_conf": 0.0380438188976378, "calib/mu_c": 0.03868183823529412, "calib/mu_w": 0.03730847457627117, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004488188976377953, "calib/std_conf": 0.07996638440387699, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2691.0, "completions/max_terminated_length": 2691.0, "completions/mean_length": 433.921875, "completions/mean_terminated_length": 433.921875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.08426666666666667, "grad_norm": 0.012332778424024582, "learning_rate": 3.3611111111111117e-06, "loss": 0.0101, "num_tokens": 16589551.0, "reward": 1.1548140048980713, "reward_std": 0.2139796018600464, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.49425625801086426, "rewards/format_reward_step": 0.9921875, "step": 79 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.043303556497627e-07, "aux_brier/mean_group_std": 0.07608587225960624, "aux_brier/mean_r": 0.9550416989188162, "aux_brier/n_active_tok": 215.25, "aux_brier/n_groups": 13.9375, "aux_brier/n_step_records": 53.8125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.513432364096081, "calib/avg_num_step_conf": 6.8515625, "calib/ece": 0.5229090909090909, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002242667509481662, "calib/mean_conf": 0.032980237154150195, "calib/mu_c": 0.03197857142857143, "calib/mu_w": 0.034221238938053095, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0012648221343873518, "calib/std_conf": 0.022552959297313678, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2733.0, "completions/max_terminated_length": 2733.0, "completions/mean_length": 388.55078125, "completions/mean_terminated_length": 388.55078125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.08533333333333333, "grad_norm": 0.053818900138139725, "learning_rate": 3.3333333333333333e-06, "loss": 0.0199, "num_tokens": 16791180.0, "reward": 1.1577060222625732, "reward_std": 0.23924782872200012, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.47457432746887207, "rewards/format_reward_step": 0.984375, "step": 80 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.621482094401385e-06, "aux_brier/mean_group_std": 0.05353568207670549, "aux_brier/mean_r": 0.9641680840652858, "aux_brier/n_active_tok": 236.375, "aux_brier/n_groups": 15.53125, "aux_brier/n_step_records": 59.09375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4881942240558552, "calib/avg_num_step_conf": 7.69140625, "calib/ece": 0.5121289682539683, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0010155188828943167, "calib/mean_conf": 0.031521825396825395, "calib/mu_c": 0.03105839416058394, "calib/mu_w": 0.03207391304347826, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.013320474448197003, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3067.0, "completions/max_terminated_length": 3067.0, "completions/mean_length": 437.00390625, "completions/mean_terminated_length": 438.7176818847656, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.0864, "grad_norm": 0.17115481197834015, "learning_rate": 3.3055555555555558e-06, "loss": 0.038, "num_tokens": 17009301.0, "reward": 1.144742727279663, "reward_std": 0.2484113574028015, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.47740817070007324, "rewards/format_reward_step": 0.98046875, "step": 81 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 8.319326978423192e-07, "aux_brier/mean_group_std": 0.05801051575384863, "aux_brier/mean_r": 0.9620548690289261, "aux_brier/n_active_tok": 199.625, "aux_brier/n_groups": 11.09375, "aux_brier/n_step_records": 49.90625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5533769063180828, "calib/avg_num_step_conf": 6.30859375, "calib/ece": 0.5019413385826772, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.004214049175225651, "calib/mean_conf": 0.037035039370078744, "calib/mu_c": 0.03506074074074074, "calib/mu_w": 0.03927478991596639, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0037401574803149606, "calib/std_conf": 0.05940848775984653, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1910.0, "completions/max_terminated_length": 1910.0, "completions/mean_length": 373.4921875, "completions/mean_terminated_length": 373.4921875, "completions/min_length": 67.0, "completions/min_terminated_length": 67.0, "epoch": 0.08746666666666666, "grad_norm": 0.009562434628605843, "learning_rate": 3.277777777777778e-06, "loss": 0.0269, "num_tokens": 17210467.0, "reward": 1.1426995992660522, "reward_std": 0.257326602935791, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.49267327785491943, "rewards/format_reward_step": 0.984375, "step": 82 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.0656154593456932e-06, "aux_brier/mean_group_std": 0.06109657630213634, "aux_brier/mean_r": 0.9554997579037448, "aux_brier/n_active_tok": 256.875, "aux_brier/n_groups": 18.90625, "aux_brier/n_step_records": 64.21875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5174933862433863, "calib/avg_num_step_conf": 8.31640625, "calib/ece": 0.5115627125506074, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00022476521164021407, "calib/mean_conf": 0.03499599190283401, "calib/mu_c": 0.03489407407407407, "calib/mu_w": 0.035118839285714284, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.014265005187660572, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2880.0, "completions/max_terminated_length": 2880.0, "completions/mean_length": 493.265625, "completions/mean_terminated_length": 497.14959716796875, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.08853333333333334, "grad_norm": 0.8394145369529724, "learning_rate": 3.2500000000000002e-06, "loss": 0.0394, "num_tokens": 17444007.0, "reward": 1.1201465129852295, "reward_std": 0.21937991678714752, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.46496108174324036, "rewards/format_reward_step": 0.953125, "step": 83 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.553298743134818e-07, "aux_brier/mean_group_std": 0.06533469957955927, "aux_brier/mean_r": 0.9503936504809598, "aux_brier/n_active_tok": 220.5, "aux_brier/n_groups": 14.4375, "aux_brier/n_step_records": 55.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4640179910044977, "calib/avg_num_step_conf": 7.12890625, "calib/ece": 0.5083984251968503, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002682433783108437, "calib/mean_conf": 0.035696062992125986, "calib/mu_c": 0.034471014492753624, "calib/mu_w": 0.03715344827586206, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0003937007874015748, "calib/std_conf": 0.014815569069805376, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2615.0, "completions/max_terminated_length": 2615.0, "completions/mean_length": 396.421875, "completions/mean_terminated_length": 397.97650146484375, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.0896, "grad_norm": 0.22569727897644043, "learning_rate": 3.2222222222222227e-06, "loss": 0.0202, "num_tokens": 17651411.0, "reward": 1.1573580503463745, "reward_std": 0.19223107397556305, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.48880699276924133, "rewards/format_reward_step": 0.9921875, "step": 84 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.8013977368391831e-06, "aux_brier/mean_group_std": 0.06041905749317108, "aux_brier/mean_r": 0.9559718294778211, "aux_brier/n_active_tok": 230.875, "aux_brier/n_groups": 13.78125, "aux_brier/n_step_records": 57.71875, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5632520325203252, "calib/avg_num_step_conf": 7.59375, "calib/ece": 0.4670165322580645, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003436260162601637, "calib/mean_conf": 0.03701572580645161, "calib/mu_c": 0.03872000000000001, "calib/mu_w": 0.035283739837398374, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.013140731535179334, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2850.0, "completions/max_terminated_length": 2850.0, "completions/mean_length": 436.140625, "completions/mean_terminated_length": 443.0635070800781, "completions/min_length": 0.0, "completions/min_terminated_length": 120.0, "epoch": 0.09066666666666667, "grad_norm": 0.5455954670906067, "learning_rate": 3.1944444444444443e-06, "loss": 0.0458, "num_tokens": 17870887.0, "reward": 1.0959954261779785, "reward_std": 0.25687992572784424, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5089819431304932, "rewards/format_reward_step": 0.9609375, "step": 85 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.5122751533813705e-06, "aux_brier/mean_group_std": 0.0623507501053325, "aux_brier/mean_r": 0.95422815722706, "aux_brier/n_active_tok": 211.75, "aux_brier/n_groups": 14.4375, "aux_brier/n_step_records": 52.9375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5684009516654144, "calib/avg_num_step_conf": 6.98046875, "calib/ece": 0.483765744400527, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0020004132231404848, "calib/mean_conf": 0.04060843214756259, "calib/mu_c": 0.041565151515151506, "calib/mu_w": 0.03956473829201102, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0013175230566534913, "calib/std_conf": 0.02433384136945757, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2419.0, "completions/max_terminated_length": 2419.0, "completions/mean_length": 421.2109375, "completions/mean_terminated_length": 424.5275573730469, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.09173333333333333, "grad_norm": 0.16058893501758575, "learning_rate": 3.1666666666666667e-06, "loss": 0.0206, "num_tokens": 18084229.0, "reward": 1.1380919218063354, "reward_std": 0.23005223274230957, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5133053660392761, "rewards/format_reward_step": 0.98828125, "step": 86 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.670456863695936e-07, "aux_brier/mean_group_std": 0.07791156166880914, "aux_brier/mean_r": 0.9439562510298461, "aux_brier/n_active_tok": 206.75, "aux_brier/n_groups": 13.90625, "aux_brier/n_step_records": 51.6875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5460416666666666, "calib/avg_num_step_conf": 6.671875, "calib/ece": 0.6001550000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002374999999999995, "calib/mean_conf": 0.039845, "calib/mu_c": 0.0407, "calib/mu_w": 0.038325000000000005, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.016022616047325106, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2618.0, "completions/max_terminated_length": 2618.0, "completions/mean_length": 390.78125, "completions/mean_terminated_length": 393.8582763671875, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.0928, "grad_norm": 0.09765290468931198, "learning_rate": 3.138888888888889e-06, "loss": 0.044, "num_tokens": 18289765.0, "reward": 1.2153167724609375, "reward_std": 0.24007631838321686, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.400330126285553, "rewards/format_reward_step": 0.97265625, "step": 87 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.4889353256164473e-06, "aux_brier/mean_group_std": 0.06597210251909351, "aux_brier/mean_r": 0.9570897658892974, "aux_brier/n_active_tok": 247.5, "aux_brier/n_groups": 17.625, "aux_brier/n_step_records": 61.875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6568800156586416, "calib/avg_num_step_conf": 7.91015625, "calib/ece": 0.4857814516129032, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004032258064516129, "calib/gap": 0.002216337182749377, "calib/mean_conf": 0.050275000000000014, "calib/mu_c": 0.05132061068702289, "calib/mu_w": 0.049104273504273516, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003915322580645161, "calib/std_conf": 0.06167137429357372, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2746.0, "completions/max_terminated_length": 2746.0, "completions/mean_length": 482.83984375, "completions/mean_terminated_length": 482.83984375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.09386666666666667, "grad_norm": 0.007360141258686781, "learning_rate": 3.1111111111111116e-06, "loss": 0.1211, "num_tokens": 18523220.0, "reward": 1.122927188873291, "reward_std": 0.24178332090377808, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.4995215833187103, "rewards/format_reward_step": 0.96484375, "step": 88 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 8.395145387751768e-07, "aux_brier/mean_group_std": 0.054611773308866046, "aux_brier/mean_r": 0.9631183750878994, "aux_brier/n_active_tok": 228.125, "aux_brier/n_groups": 16.46875, "aux_brier/n_step_records": 57.03125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5369155844155845, "calib/avg_num_step_conf": 7.921875, "calib/ece": 0.3896452, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011148051948051962, "calib/mean_conf": 0.0503548, "calib/mu_c": 0.05097909090909091, "calib/mu_w": 0.049864285714285715, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.021703103855439665, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2491.0, "completions/max_terminated_length": 2491.0, "completions/mean_length": 439.375, "completions/mean_terminated_length": 444.5849914550781, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.09493333333333333, "grad_norm": 0.10169034451246262, "learning_rate": 3.0833333333333336e-06, "loss": -0.0015, "num_tokens": 18744588.0, "reward": 1.0649060010910034, "reward_std": 0.23866789042949677, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5877489447593689, "rewards/format_reward_step": 0.9765625, "step": 89 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.2062817567891635e-06, "aux_brier/mean_group_std": 0.09117741198873634, "aux_brier/mean_r": 0.9286967400176888, "aux_brier/n_active_tok": 215.5, "aux_brier/n_groups": 13.125, "aux_brier/n_step_records": 53.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5315340211250307, "calib/avg_num_step_conf": 6.89453125, "calib/ece": 0.48888281250000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002046917219356424, "calib/mean_conf": 0.05017968750000001, "calib/mu_c": 0.0511231884057971, "calib/mu_w": 0.04907627118644067, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.020003684658140956, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1441.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 403.39453125, "completions/mean_terminated_length": 404.97650146484375, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.096, "grad_norm": 0.03674687072634697, "learning_rate": 3.055555555555556e-06, "loss": 0.0449, "num_tokens": 18951177.0, "reward": 1.1614923477172852, "reward_std": 0.2281077802181244, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5053443908691406, "rewards/format_reward_step": 0.9921875, "step": 90 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.253700443603627e-07, "aux_brier/mean_group_std": 0.05518224027551509, "aux_brier/mean_r": 0.9644409634664668, "aux_brier/n_active_tok": 236.25, "aux_brier/n_groups": 16.875, "aux_brier/n_step_records": 59.0625, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5720764384130721, "calib/avg_num_step_conf": 8.7109375, "calib/ece": 0.5293606557377049, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00454780862701655, "calib/mean_conf": 0.05670491803278689, "calib/mu_c": 0.05858741258741259, "calib/mu_w": 0.05403960396039604, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.019107199108717287, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2849.0, "completions/max_terminated_length": 2849.0, "completions/mean_length": 452.87890625, "completions/mean_terminated_length": 460.0674743652344, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.09706666666666666, "grad_norm": 0.06745661795139313, "learning_rate": 3.0277777777777776e-06, "loss": 0.0254, "num_tokens": 19174826.0, "reward": 1.149299144744873, "reward_std": 0.19182530045509338, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.45657166838645935, "rewards/format_reward_step": 0.953125, "step": 91 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.7157158948318845e-06, "aux_brier/mean_group_std": 0.04431406446187951, "aux_brier/mean_r": 0.9749018104343687, "aux_brier/n_active_tok": 210.625, "aux_brier/n_groups": 12.75, "aux_brier/n_step_records": 52.65625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5638138138138139, "calib/avg_num_step_conf": 6.8984375, "calib/ece": 0.5035749019607844, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007602439939939948, "calib/mean_conf": 0.06113098039215686, "calib/mu_c": 0.06444027777777778, "calib/mu_w": 0.05683783783783783, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03487146061904344, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1341.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 387.64453125, "completions/mean_terminated_length": 389.16473388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.09813333333333334, "grad_norm": 0.05189352482557297, "learning_rate": 3e-06, "loss": -0.0047, "num_tokens": 19380783.0, "reward": 1.1858357191085815, "reward_std": 0.22427129745483398, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5011553764343262, "rewards/format_reward_step": 0.99609375, "step": 92 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.502749402417571e-06, "aux_brier/mean_group_std": 0.03999081135614383, "aux_brier/mean_r": 0.971616352156379, "aux_brier/n_active_tok": 255.25, "aux_brier/n_groups": 17.25, "aux_brier/n_step_records": 63.8125, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5026763013515323, "calib/avg_num_step_conf": 8.421875, "calib/ece": 0.5088076923076923, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006101264552388616, "calib/mean_conf": 0.06868218623481782, "calib/mu_c": 0.06606382978723403, "calib/mu_w": 0.07216509433962265, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0033198380566801617, "calib/std_conf": 0.03558034197842046, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2469.0, "completions/max_terminated_length": 2469.0, "completions/mean_length": 480.2421875, "completions/mean_terminated_length": 487.8651123046875, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.0992, "grad_norm": 0.2826470732688904, "learning_rate": 2.9722222222222225e-06, "loss": 0.0328, "num_tokens": 19609501.0, "reward": 1.1513302326202393, "reward_std": 0.2515048086643219, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.48032090067863464, "rewards/format_reward_step": 0.9609375, "step": 93 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.0836285457570547e-06, "aux_brier/mean_group_std": 0.06554498580068994, "aux_brier/mean_r": 0.9523248224343105, "aux_brier/n_active_tok": 213.75, "aux_brier/n_groups": 13.15625, "aux_brier/n_step_records": 53.4375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5711429308565532, "calib/avg_num_step_conf": 6.91015625, "calib/ece": 0.4837656000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0035662022703818394, "calib/mean_conf": 0.06359440000000001, "calib/mu_c": 0.06522058823529413, "calib/mu_w": 0.06165438596491229, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00168, "calib/std_conf": 0.034787815807262173, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2736.0, "completions/max_terminated_length": 2736.0, "completions/mean_length": 429.109375, "completions/mean_terminated_length": 432.4881896972656, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.10026666666666667, "grad_norm": 0.08196055144071579, "learning_rate": 2.944444444444445e-06, "loss": 0.0661, "num_tokens": 19828033.0, "reward": 1.1469007730484009, "reward_std": 0.2203456163406372, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5094780921936035, "rewards/format_reward_step": 0.9765625, "step": 94 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.3601831312134465e-07, "aux_brier/mean_group_std": 0.08406900227541236, "aux_brier/mean_r": 0.9480423782630827, "aux_brier/n_active_tok": 241.625, "aux_brier/n_groups": 15.0, "aux_brier/n_step_records": 60.40625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4671528136293237, "calib/avg_num_step_conf": 7.5859375, "calib/ece": 0.5123715415019763, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00632872999483737, "calib/mean_conf": 0.07656126482213439, "calib/mu_c": 0.07395973154362419, "calib/mu_w": 0.08028846153846156, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03774829789086048, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2708.0, "completions/max_terminated_length": 2708.0, "completions/mean_length": 467.8359375, "completions/mean_terminated_length": 467.8359375, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.10133333333333333, "grad_norm": 0.007402057759463787, "learning_rate": 2.916666666666667e-06, "loss": 0.0373, "num_tokens": 20053927.0, "reward": 1.1945419311523438, "reward_std": 0.2608509063720703, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.48129260540008545, "rewards/format_reward_step": 0.984375, "step": 95 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.0788587567220915e-06, "aux_brier/mean_group_std": 0.06961766976966079, "aux_brier/mean_r": 0.9448844609802067, "aux_brier/n_active_tok": 234.5, "aux_brier/n_groups": 13.9375, "aux_brier/n_step_records": 58.625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49485869935308135, "calib/avg_num_step_conf": 7.3515625, "calib/ece": 0.5742952755905512, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001960027238678927, "calib/mean_conf": 0.07531102362204727, "calib/mu_c": 0.07462424242424243, "calib/mu_w": 0.07658426966292135, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.031891945341925375, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2218.0, "completions/max_terminated_length": 2218.0, "completions/mean_length": 426.11328125, "completions/mean_terminated_length": 426.11328125, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.1024, "grad_norm": 0.00814022496342659, "learning_rate": 2.888888888888889e-06, "loss": 0.0198, "num_tokens": 20268828.0, "reward": 1.2499287128448486, "reward_std": 0.19599775969982147, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.4372149705886841, "rewards/format_reward_step": 0.9921875, "step": 96 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.0563297737096207e-06, "aux_brier/mean_group_std": 0.03971339490624335, "aux_brier/mean_r": 0.9759721514353451, "aux_brier/n_active_tok": 243.875, "aux_brier/n_groups": 14.78125, "aux_brier/n_step_records": 60.96875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.546875, "calib/avg_num_step_conf": 7.71875, "calib/ece": 0.4365217391304348, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00037874999999999714, "calib/mean_conf": 0.07596837944664031, "calib/mu_c": 0.07578125000000001, "calib/mu_w": 0.07616, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0032806324110671936, "calib/std_conf": 0.03612241741171675, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1976.0, "completions/max_terminated_length": 1976.0, "completions/mean_length": 429.80859375, "completions/mean_terminated_length": 429.80859375, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.10346666666666667, "grad_norm": 0.00799585785716772, "learning_rate": 2.861111111111111e-06, "loss": 0.0563, "num_tokens": 20483931.0, "reward": 1.1334080696105957, "reward_std": 0.27943581342697144, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5570694208145142, "rewards/format_reward_step": 0.98828125, "step": 97 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 8.640557688371908e-07, "aux_brier/mean_group_std": 0.07610027405608073, "aux_brier/mean_r": 0.9383796943442817, "aux_brier/n_active_tok": 262.5, "aux_brier/n_groups": 19.84375, "aux_brier/n_step_records": 65.625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5169400736524942, "calib/avg_num_step_conf": 8.390625, "calib/ece": 0.5106209677419355, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002351657181118169, "calib/mean_conf": 0.07736290322580645, "calib/mu_c": 0.07638620689655173, "calib/mu_w": 0.0787378640776699, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0016532258064516129, "calib/std_conf": 0.03461955318225557, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2653.0, "completions/max_terminated_length": 2653.0, "completions/mean_length": 452.18359375, "completions/mean_terminated_length": 455.74407958984375, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.10453333333333334, "grad_norm": 0.07025198638439178, "learning_rate": 2.8333333333333335e-06, "loss": 0.0327, "num_tokens": 20705874.0, "reward": 1.1751664876937866, "reward_std": 0.26449382305145264, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.48191598057746887, "rewards/format_reward_step": 0.96875, "step": 98 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.9174293071078097e-06, "aux_brier/mean_group_std": 0.04591891670532119, "aux_brier/mean_r": 0.9734047437176357, "aux_brier/n_active_tok": 279.125, "aux_brier/n_groups": 17.09375, "aux_brier/n_step_records": 69.78125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4921597843053782, "calib/avg_num_step_conf": 9.296875, "calib/ece": 0.26998393574297186, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0026619838229033632, "calib/mean_conf": 0.08816867469879518, "calib/mu_c": 0.0864367816091954, "calib/mu_w": 0.08909876543209877, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004377510040160643, "calib/std_conf": 0.03797287743023869, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2532.0, "completions/max_terminated_length": 2532.0, "completions/mean_length": 532.2421875, "completions/mean_terminated_length": 540.6904907226562, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.1056, "grad_norm": 0.09254779666662216, "learning_rate": 2.805555555555556e-06, "loss": 0.0012, "num_tokens": 20947928.0, "reward": 0.9968215823173523, "reward_std": 0.2163114994764328, "rewards/accuracy_reward_step": 0.33984375, "rewards/final_brier_reward_step": 0.6825988292694092, "rewards/format_reward_step": 0.97265625, "step": 99 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.5276631067373536e-06, "aux_brier/mean_group_std": 0.08466932921242375, "aux_brier/mean_r": 0.9345134794677326, "aux_brier/n_active_tok": 258.5, "aux_brier/n_groups": 16.4375, "aux_brier/n_step_records": 64.625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6570675641351282, "calib/avg_num_step_conf": 8.38671875, "calib/ece": 0.41934661354581676, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010640335280670557, "calib/mean_conf": 0.08639043824701197, "calib/mu_c": 0.09177419354838712, "calib/mu_w": 0.08113385826771656, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005856573705179283, "calib/std_conf": 0.05052094106667177, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2206.0, "completions/max_terminated_length": 2206.0, "completions/mean_length": 470.2265625, "completions/mean_terminated_length": 473.92913818359375, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.10666666666666667, "grad_norm": 0.11505744606256485, "learning_rate": 2.7777777777777783e-06, "loss": 0.0374, "num_tokens": 21175714.0, "reward": 1.1184043884277344, "reward_std": 0.264805406332016, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.575179934501648, "rewards/format_reward_step": 0.98046875, "step": 100 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.4317106415973058e-06, "aux_brier/mean_group_std": 0.038398240214649, "aux_brier/mean_r": 0.9726341007415349, "aux_brier/n_active_tok": 297.5, "aux_brier/n_groups": 19.84375, "aux_brier/n_step_records": 74.375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5655408902691511, "calib/avg_num_step_conf": 9.45703125, "calib/ece": 0.3644402, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001179761904761878, "calib/mean_conf": 0.09707980000000001, "calib/mu_c": 0.09642857142857145, "calib/mu_w": 0.09760833333333332, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0067599999999999995, "calib/std_conf": 0.05164149728619417, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2559.0, "completions/max_terminated_length": 2559.0, "completions/mean_length": 546.71484375, "completions/mean_terminated_length": 546.71484375, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.10773333333333333, "grad_norm": 0.006851323414593935, "learning_rate": 2.7500000000000004e-06, "loss": 0.0721, "num_tokens": 21422665.0, "reward": 1.078688621520996, "reward_std": 0.2098299115896225, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6116296052932739, "rewards/format_reward_step": 0.9765625, "step": 101 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.360309383029005e-07, "aux_brier/mean_group_std": 0.06816755253203652, "aux_brier/mean_r": 0.9489262297625767, "aux_brier/n_active_tok": 256.125, "aux_brier/n_groups": 15.6875, "aux_brier/n_step_records": 64.03125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5015853322304935, "calib/avg_num_step_conf": 8.0625, "calib/ece": 0.543542168674699, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0053684863523573295, "calib/mean_conf": 0.0854538152610442, "calib/mu_c": 0.08344871794871796, "calib/mu_w": 0.08881720430107529, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0012449799196787147, "calib/std_conf": 0.041623562549827055, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2612.0, "completions/max_terminated_length": 2612.0, "completions/mean_length": 454.578125, "completions/mean_terminated_length": 458.157470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.1088, "grad_norm": 0.06018517166376114, "learning_rate": 2.7222222222222224e-06, "loss": 0.0079, "num_tokens": 21645733.0, "reward": 1.2068250179290771, "reward_std": 0.20233562588691711, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.4523000717163086, "rewards/format_reward_step": 0.96875, "step": 102 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.562805981464749e-07, "aux_brier/mean_group_std": 0.05272825131698993, "aux_brier/mean_r": 0.9651729844405009, "aux_brier/n_active_tok": 277.625, "aux_brier/n_groups": 18.8125, "aux_brier/n_step_records": 69.40625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4689289432587194, "calib/avg_num_step_conf": 8.734375, "calib/ece": 0.45475502008032126, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01188001041124416, "calib/mean_conf": 0.09271485943775103, "calib/mu_c": 0.0873235294117647, "calib/mu_w": 0.09920353982300886, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0006425702811244991, "calib/std_conf": 0.04301726351036292, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2631.0, "completions/max_terminated_length": 2631.0, "completions/mean_length": 521.6015625, "completions/mean_terminated_length": 523.6470947265625, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.10986666666666667, "grad_norm": 0.031255025416612625, "learning_rate": 2.6944444444444444e-06, "loss": 0.0458, "num_tokens": 21883815.0, "reward": 1.1456598043441772, "reward_std": 0.1891973614692688, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5201395153999329, "rewards/format_reward_step": 0.96875, "step": 103 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.535850859718973e-07, "aux_brier/mean_group_std": 0.05220563907588298, "aux_brier/mean_r": 0.9593211452621763, "aux_brier/n_active_tok": 283.5, "aux_brier/n_groups": 18.25, "aux_brier/n_step_records": 70.875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4958333333333333, "calib/avg_num_step_conf": 8.890625, "calib/ece": 0.34982598425196854, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0003283959899749722, "calib/mean_conf": 0.1017488188976378, "calib/mu_c": 0.10192982456140354, "calib/mu_w": 0.10160142857142856, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0013779527559055118, "calib/std_conf": 0.05140161516832019, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2452.0, "completions/max_terminated_length": 2452.0, "completions/mean_length": 493.2578125, "completions/mean_terminated_length": 495.19219970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.11093333333333333, "grad_norm": 0.016969097778201103, "learning_rate": 2.666666666666667e-06, "loss": 0.0387, "num_tokens": 22116769.0, "reward": 1.0946837663650513, "reward_std": 0.2238212525844574, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6209226250648499, "rewards/format_reward_step": 0.98828125, "step": 104 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 9.042816317972324e-08, "aux_brier/mean_group_std": 0.03094594997743212, "aux_brier/mean_r": 0.9761414627030536, "aux_brier/n_active_tok": 271.25, "aux_brier/n_groups": 17.65625, "aux_brier/n_step_records": 67.8125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.38565625000000003, "calib/avg_num_step_conf": 8.73046875, "calib/ece": 0.40810276679841895, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.021159374999999994, "calib/mean_conf": 0.09790513833992096, "calib/mu_c": 0.08720000000000003, "calib/mu_w": 0.10835937500000002, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00596837944664032, "calib/std_conf": 0.04883584357688764, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2619.0, "completions/max_terminated_length": 2619.0, "completions/mean_length": 475.61328125, "completions/mean_terminated_length": 479.3582763671875, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.112, "grad_norm": 0.2394425868988037, "learning_rate": 2.6388888888888893e-06, "loss": 0.0307, "num_tokens": 22344286.0, "reward": 1.122889757156372, "reward_std": 0.2992156147956848, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5696839690208435, "rewards/format_reward_step": 0.984375, "step": 105 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.998493286558549e-07, "aux_brier/mean_group_std": 0.0665005169500878, "aux_brier/mean_r": 0.9518789157993527, "aux_brier/n_active_tok": 248.5, "aux_brier/n_groups": 12.71875, "aux_brier/n_step_records": 62.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4780040221216692, "calib/avg_num_step_conf": 7.8828125, "calib/ece": 0.3719525691699605, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004566804927099072, "calib/mean_conf": 0.09429249011857709, "calib/mu_c": 0.09183760683760682, "calib/mu_w": 0.09640441176470589, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0018972332015810274, "calib/std_conf": 0.03128628206703324, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2247.0, "completions/max_terminated_length": 2247.0, "completions/mean_length": 441.84375, "completions/mean_terminated_length": 443.5765075683594, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.11306666666666666, "grad_norm": 0.08020593971014023, "learning_rate": 2.6111111111111113e-06, "loss": 0.0212, "num_tokens": 22561982.0, "reward": 1.102532148361206, "reward_std": 0.18902963399887085, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6054410934448242, "rewards/format_reward_step": 0.98828125, "step": 106 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.942115454156017e-07, "aux_brier/mean_group_std": 0.047392673093088485, "aux_brier/mean_r": 0.96368972565201, "aux_brier/n_active_tok": 279.125, "aux_brier/n_groups": 16.375, "aux_brier/n_step_records": 69.78125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6213818860877685, "calib/avg_num_step_conf": 8.85546875, "calib/ece": 0.5150179282868524, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010865246098439363, "calib/mean_conf": 0.09693426294820717, "calib/mu_c": 0.1011764705882353, "calib/mu_w": 0.09031122448979594, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001195219123505976, "calib/std_conf": 0.0380255295527035, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2905.0, "completions/max_terminated_length": 2905.0, "completions/mean_length": 474.578125, "completions/mean_terminated_length": 476.4392395019531, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.11413333333333334, "grad_norm": 0.06961288303136826, "learning_rate": 2.5833333333333337e-06, "loss": 0.0066, "num_tokens": 22788090.0, "reward": 1.2111705541610718, "reward_std": 0.21619771420955658, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.49311956763267517, "rewards/format_reward_step": 0.98046875, "step": 107 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.191761250384786e-07, "aux_brier/mean_group_std": 0.0756230759081317, "aux_brier/mean_r": 0.9530716027752302, "aux_brier/n_active_tok": 287.0, "aux_brier/n_groups": 17.25, "aux_brier/n_step_records": 71.75, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4621648460774578, "calib/avg_num_step_conf": 9.03515625, "calib/ece": 0.5259881889763779, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.011758689175769665, "calib/mean_conf": 0.10558661417322836, "calib/mu_c": 0.10118867924528299, "calib/mu_w": 0.11294736842105266, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002795275590551181, "calib/std_conf": 0.04297122249310491, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2337.0, "completions/max_terminated_length": 2337.0, "completions/mean_length": 525.6796875, "completions/mean_terminated_length": 525.6796875, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.1152, "grad_norm": 0.0063099246472120285, "learning_rate": 2.5555555555555557e-06, "loss": 0.0491, "num_tokens": 23025896.0, "reward": 1.238161325454712, "reward_std": 0.24237248301506042, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.48389554023742676, "rewards/format_reward_step": 0.9921875, "step": 108 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.6950543046956312e-06, "aux_brier/mean_group_std": 0.03761034301602371, "aux_brier/mean_r": 0.9748748432411104, "aux_brier/n_active_tok": 299.625, "aux_brier/n_groups": 18.53125, "aux_brier/n_step_records": 74.90625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49778858911985846, "calib/avg_num_step_conf": 9.6328125, "calib/ece": 0.3645994047619047, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.001593122512162773, "calib/mean_conf": 0.11008313492063493, "calib/mu_c": 0.11092394957983195, "calib/mu_w": 0.10933082706766918, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00123015873015873, "calib/std_conf": 0.07197954159363211, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2822.0, "completions/max_terminated_length": 2822.0, "completions/mean_length": 516.72265625, "completions/mean_terminated_length": 520.7913208007812, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.11626666666666667, "grad_norm": 0.050698086619377136, "learning_rate": 2.5277777777777778e-06, "loss": -0.0133, "num_tokens": 23262777.0, "reward": 1.1084380149841309, "reward_std": 0.16250526905059814, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6056268215179443, "rewards/format_reward_step": 0.984375, "step": 109 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.758663766895154e-07, "aux_brier/mean_group_std": 0.051237897498278444, "aux_brier/mean_r": 0.9642251720551518, "aux_brier/n_active_tok": 274.375, "aux_brier/n_groups": 17.46875, "aux_brier/n_step_records": 68.59375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5400616332819723, "calib/avg_num_step_conf": 8.765625, "calib/ece": 0.3716, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0020005136106831106, "calib/mean_conf": 0.10352000000000001, "calib/mu_c": 0.10457627118644068, "calib/mu_w": 0.10257575757575757, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0015600000000000002, "calib/std_conf": 0.0407775624578027, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2863.0, "completions/max_terminated_length": 2863.0, "completions/mean_length": 494.12109375, "completions/mean_terminated_length": 499.9802551269531, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.11733333333333333, "grad_norm": 0.07684852182865143, "learning_rate": 2.5e-06, "loss": 0.0364, "num_tokens": 23494192.0, "reward": 1.0992043018341064, "reward_std": 0.2502002418041229, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5999422073364258, "rewards/format_reward_step": 0.9765625, "step": 110 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.9773352827749955e-06, "aux_brier/mean_group_std": 0.06141974342018306, "aux_brier/mean_r": 0.9544979093823415, "aux_brier/n_active_tok": 281.5, "aux_brier/n_groups": 20.34375, "aux_brier/n_step_records": 70.375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5029966563623746, "calib/avg_num_step_conf": 9.0234375, "calib/ece": 0.4303392857142857, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002291937417197687, "calib/mean_conf": 0.09688293650793653, "calib/mu_c": 0.0957824427480916, "calib/mu_w": 0.09807438016528928, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00369047619047619, "calib/std_conf": 0.04682154556902366, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2640.0, "completions/max_terminated_length": 2640.0, "completions/mean_length": 509.0078125, "completions/mean_terminated_length": 509.0078125, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.1184, "grad_norm": 0.0074748811312019825, "learning_rate": 2.4722222222222226e-06, "loss": 0.0328, "num_tokens": 23731906.0, "reward": 1.1437276601791382, "reward_std": 0.29110652208328247, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5592859387397766, "rewards/format_reward_step": 0.984375, "step": 111 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.231238049778649e-07, "aux_brier/mean_group_std": 0.09243357086112104, "aux_brier/mean_r": 0.929762165426795, "aux_brier/n_active_tok": 275.375, "aux_brier/n_groups": 17.8125, "aux_brier/n_step_records": 68.84375, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.4715924426450742, "calib/avg_num_step_conf": 9.3046875, "calib/ece": 0.43565922131147533, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.010405769230769232, "calib/mean_conf": 0.10393094262295083, "calib/mu_c": 0.09906923076923077, "calib/mu_w": 0.109475, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.003401639344262295, "calib/std_conf": 0.05059397840811337, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2943.0, "completions/max_terminated_length": 2943.0, "completions/mean_length": 580.8515625, "completions/mean_terminated_length": 590.0714721679688, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.11946666666666667, "grad_norm": 0.1376224309206009, "learning_rate": 2.4444444444444447e-06, "loss": 0.0421, "num_tokens": 23988524.0, "reward": 1.1155526638031006, "reward_std": 0.2667517066001892, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5325231552124023, "rewards/format_reward_step": 0.94921875, "step": 112 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.5850854699073214e-06, "aux_brier/mean_group_std": 0.06646439129154987, "aux_brier/mean_r": 0.9604305997364578, "aux_brier/n_active_tok": 277.25, "aux_brier/n_groups": 17.53125, "aux_brier/n_step_records": 69.3125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.46367900608519264, "calib/avg_num_step_conf": 8.70703125, "calib/ece": 0.4356746031746031, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0004006085192697767, "calib/mean_conf": 0.10400793650793652, "calib/mu_c": 0.10382352941176472, "calib/mu_w": 0.1042241379310345, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.04431363615988034, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2527.0, "completions/max_terminated_length": 2527.0, "completions/mean_length": 473.4140625, "completions/mean_terminated_length": 475.2706298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.12053333333333334, "grad_norm": 0.07470325380563736, "learning_rate": 2.4166666666666667e-06, "loss": 0.0322, "num_tokens": 24214918.0, "reward": 1.1531175374984741, "reward_std": 0.2532326877117157, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.542157769203186, "rewards/format_reward_step": 0.97265625, "step": 113 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.883232610583768e-07, "aux_brier/mean_group_std": 0.044180647779355924, "aux_brier/mean_r": 0.9690856573495789, "aux_brier/n_active_tok": 280.125, "aux_brier/n_groups": 17.75, "aux_brier/n_step_records": 70.03125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.42087765957446804, "calib/avg_num_step_conf": 8.94140625, "calib/ece": 0.5338976377952758, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.017955984042553177, "calib/mean_conf": 0.10397637795275592, "calib/mu_c": 0.09733125000000001, "calib/mu_w": 0.11528723404255319, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.003976377952755905, "calib/std_conf": 0.045206344855863216, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2306.0, "completions/max_terminated_length": 2306.0, "completions/mean_length": 494.703125, "completions/mean_terminated_length": 496.6431579589844, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.1216, "grad_norm": 0.04765012115240097, "learning_rate": 2.388888888888889e-06, "loss": 0.0317, "num_tokens": 24446586.0, "reward": 1.2371981143951416, "reward_std": 0.2438342273235321, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.47223010659217834, "rewards/format_reward_step": 0.98828125, "step": 114 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 8.733868374299902e-06, "aux_brier/mean_group_std": 0.04485596011072332, "aux_brier/mean_r": 0.9686095238860526, "aux_brier/n_active_tok": 280.5, "aux_brier/n_groups": 16.59375, "aux_brier/n_step_records": 70.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.48344494047619047, "calib/avg_num_step_conf": 8.80859375, "calib/ece": 0.3974842519685039, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004455233134920647, "calib/mean_conf": 0.10661023622047244, "calib/mu_c": 0.10436507936507937, "calib/mu_w": 0.10882031250000002, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004015748031496063, "calib/std_conf": 0.040382541512382776, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2237.0, "completions/max_terminated_length": 2237.0, "completions/mean_length": 465.6015625, "completions/mean_terminated_length": 465.6015625, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.12266666666666666, "grad_norm": 0.007443990558385849, "learning_rate": 2.361111111111111e-06, "loss": 0.0577, "num_tokens": 24671044.0, "reward": 1.139647364616394, "reward_std": 0.25040751695632935, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5898394584655762, "rewards/format_reward_step": 0.9921875, "step": 115 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.3664424699522115e-06, "aux_brier/mean_group_std": 0.028783907080098276, "aux_brier/mean_r": 0.9728226268136849, "aux_brier/n_active_tok": 326.625, "aux_brier/n_groups": 22.625, "aux_brier/n_step_records": 81.65625, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.4113945578231293, "calib/avg_num_step_conf": 10.23828125, "calib/ece": 0.45416326530612244, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.016285714285714306, "calib/mean_conf": 0.117265306122449, "calib/mu_c": 0.11028571428571428, "calib/mu_w": 0.12657142857142858, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.051272292517580385, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3006.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 606.53125, "completions/mean_terminated_length": 608.9098510742188, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.12373333333333333, "grad_norm": 0.10887009650468826, "learning_rate": 2.3333333333333336e-06, "loss": 0.1208, "num_tokens": 24930836.0, "reward": 1.1513084173202515, "reward_std": 0.25570201873779297, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.5114836096763611, "rewards/format_reward_step": 0.953125, "step": 116 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.524404008052983e-06, "aux_brier/mean_group_std": 0.04356276080217747, "aux_brier/mean_r": 0.9644402749269458, "aux_brier/n_active_tok": 315.125, "aux_brier/n_groups": 21.0, "aux_brier/n_step_records": 78.78125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.42810639880952384, "calib/avg_num_step_conf": 10.00390625, "calib/ece": 0.39664724409448826, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01872901785714287, "calib/mean_conf": 0.11658110236220473, "calib/mu_c": 0.10714285714285715, "calib/mu_w": 0.12587187500000002, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.008582677165354331, "calib/std_conf": 0.05578804056113586, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2700.0, "completions/max_terminated_length": 2700.0, "completions/mean_length": 519.5546875, "completions/mean_terminated_length": 519.5546875, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 0.1248, "grad_norm": 0.006424721796065569, "learning_rate": 2.305555555555556e-06, "loss": 0.056, "num_tokens": 25170442.0, "reward": 1.135505199432373, "reward_std": 0.18943741917610168, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5888957977294922, "rewards/format_reward_step": 0.9921875, "step": 117 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.084950010560753e-07, "aux_brier/mean_group_std": 0.040890445550818694, "aux_brier/mean_r": 0.9700902830864013, "aux_brier/n_active_tok": 294.125, "aux_brier/n_groups": 17.1875, "aux_brier/n_step_records": 73.53125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.3619748029541364, "calib/avg_num_step_conf": 9.3828125, "calib/ece": 0.4123622047244095, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.02869236020604482, "calib/mean_conf": 0.11503937007874017, "calib/mu_c": 0.10114503816793893, "calib/mu_w": 0.12983739837398375, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005826771653543307, "calib/std_conf": 0.05699667093121313, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2602.0, "completions/max_terminated_length": 2602.0, "completions/mean_length": 515.7109375, "completions/mean_terminated_length": 517.7333374023438, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.12586666666666665, "grad_norm": 0.11549760401248932, "learning_rate": 2.277777777777778e-06, "loss": 0.0371, "num_tokens": 25406472.0, "reward": 1.1468514204025269, "reward_std": 0.23238511383533478, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5639684200286865, "rewards/format_reward_step": 0.98828125, "step": 118 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.66289899305217e-07, "aux_brier/mean_group_std": 0.050577590547167976, "aux_brier/mean_r": 0.96530315658855, "aux_brier/n_active_tok": 321.25, "aux_brier/n_groups": 22.875, "aux_brier/n_step_records": 80.3125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.485006518904824, "calib/avg_num_step_conf": 10.2265625, "calib/ece": 0.4096370967741936, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.008767926988265964, "calib/mean_conf": 0.11455645161290325, "calib/mu_c": 0.11038461538461539, "calib/mu_w": 0.11915254237288135, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04934045687043385, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2716.0, "completions/max_terminated_length": 2716.0, "completions/mean_length": 592.30078125, "completions/mean_terminated_length": 596.9645385742188, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.12693333333333334, "grad_norm": 0.09584935754537582, "learning_rate": 2.25e-06, "loss": 0.0962, "num_tokens": 25663165.0, "reward": 1.1316813230514526, "reward_std": 0.21808713674545288, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5579754114151001, "rewards/format_reward_step": 0.96875, "step": 119 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.0583767721117354e-06, "aux_brier/mean_group_std": 0.05486355727121817, "aux_brier/mean_r": 0.9627016716133301, "aux_brier/n_active_tok": 297.0, "aux_brier/n_groups": 16.5625, "aux_brier/n_step_records": 74.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.37634133878385284, "calib/avg_num_step_conf": 9.3984375, "calib/ece": 0.48863137254901956, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.016694430250383252, "calib/mean_conf": 0.11042745098039218, "calib/mu_c": 0.10368421052631578, "calib/mu_w": 0.12037864077669903, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0014901960784313726, "calib/std_conf": 0.04268288943318205, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1873.0, "completions/max_terminated_length": 1873.0, "completions/mean_length": 498.953125, "completions/mean_terminated_length": 500.9098205566406, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.128, "grad_norm": 0.12316922843456268, "learning_rate": 2.222222222222222e-06, "loss": 0.0183, "num_tokens": 25897585.0, "reward": 1.2196736335754395, "reward_std": 0.21989823877811432, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.5115074515342712, "rewards/format_reward_step": 0.99609375, "step": 120 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.767846133648998e-07, "aux_brier/mean_group_std": 0.051207335816678724, "aux_brier/mean_r": 0.964539341683809, "aux_brier/n_active_tok": 303.875, "aux_brier/n_groups": 18.28125, "aux_brier/n_step_records": 75.96875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.49880413703943116, "calib/avg_num_step_conf": 9.5234375, "calib/ece": 0.40971887550200803, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.008032128514056224, "calib/gap": -0.01667420814479642, "calib/mean_conf": 0.12835341365461847, "calib/mu_c": 0.12038461538461537, "calib/mu_w": 0.1370588235294118, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007991967871485943, "calib/std_conf": 0.09070949480724892, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2857.0, "completions/max_terminated_length": 2857.0, "completions/mean_length": 577.28125, "completions/mean_terminated_length": 579.5451049804688, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.12906666666666666, "grad_norm": 0.020458100363612175, "learning_rate": 2.1944444444444445e-06, "loss": 0.0404, "num_tokens": 26150425.0, "reward": 1.134911060333252, "reward_std": 0.3060748875141144, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5630819797515869, "rewards/format_reward_step": 0.97265625, "step": 121 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.248979595418035e-08, "aux_brier/mean_group_std": 0.048168414011383685, "aux_brier/mean_r": 0.9656072429914944, "aux_brier/n_active_tok": 308.875, "aux_brier/n_groups": 19.90625, "aux_brier/n_step_records": 77.21875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5066433339814634, "calib/avg_num_step_conf": 9.84375, "calib/ece": 0.4457839999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": -0.0026240197031564133, "calib/mean_conf": 0.117496, "calib/mu_c": 0.11633093525179855, "calib/mu_w": 0.11895495495495496, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00364, "calib/std_conf": 0.06606870654099413, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2739.0, "completions/max_terminated_length": 2739.0, "completions/mean_length": 525.6875, "completions/mean_terminated_length": 529.8267822265625, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.13013333333333332, "grad_norm": 0.09221775084733963, "learning_rate": 2.166666666666667e-06, "loss": 0.0117, "num_tokens": 26392345.0, "reward": 1.1667943000793457, "reward_std": 0.24005213379859924, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5421773791313171, "rewards/format_reward_step": 0.9765625, "step": 122 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 9.577460198806342e-07, "aux_brier/mean_group_std": 0.05941136835504828, "aux_brier/mean_r": 0.9547793621450941, "aux_brier/n_active_tok": 335.25, "aux_brier/n_groups": 23.0625, "aux_brier/n_step_records": 83.8125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.36483739837398377, "calib/avg_num_step_conf": 10.4765625, "calib/ece": 0.3868825910931174, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.026819433516915805, "calib/mean_conf": 0.12785425101214576, "calib/mu_c": 0.11439024390243903, "calib/mu_w": 0.14120967741935483, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.008380566801619435, "calib/std_conf": 0.05536638192180416, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3039.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 613.9765625, "completions/mean_terminated_length": 613.9765625, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 0.1312, "grad_norm": 0.006852935999631882, "learning_rate": 2.138888888888889e-06, "loss": 0.0536, "num_tokens": 26654811.0, "reward": 1.1145949363708496, "reward_std": 0.286065936088562, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5755671858787537, "rewards/format_reward_step": 0.96484375, "step": 123 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.67160504219766e-07, "aux_brier/mean_group_std": 0.048935497574640725, "aux_brier/mean_r": 0.9620801441547425, "aux_brier/n_active_tok": 306.375, "aux_brier/n_groups": 18.75, "aux_brier/n_step_records": 76.59375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5272200772200772, "calib/avg_num_step_conf": 9.8515625, "calib/ece": 0.45996047430830045, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0033712998712998737, "calib/mean_conf": 0.12501976284584979, "calib/mu_c": 0.12641891891891893, "calib/mu_w": 0.12304761904761906, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.052370785884622795, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2241.0, "completions/max_terminated_length": 2241.0, "completions/mean_length": 543.3828125, "completions/mean_terminated_length": 545.5137329101562, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.13226666666666667, "grad_norm": 0.08716801553964615, "learning_rate": 2.1111111111111114e-06, "loss": 0.0124, "num_tokens": 26900733.0, "reward": 1.2038832902908325, "reward_std": 0.2243453860282898, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.5342831611633301, "rewards/format_reward_step": 0.984375, "step": 124 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.8530113138903737e-06, "aux_brier/mean_group_std": 0.0586785178479902, "aux_brier/mean_r": 0.9477386938887327, "aux_brier/n_active_tok": 328.75, "aux_brier/n_groups": 24.59375, "aux_brier/n_step_records": 82.1875, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.37030423280423286, "calib/avg_num_step_conf": 10.3125, "calib/ece": 0.38705691056910574, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.018782142857142872, "calib/mean_conf": 0.12513821138211384, "calib/mu_c": 0.1159761904761905, "calib/mu_w": 0.13475833333333337, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.05033268591678368, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2926.0, "completions/max_terminated_length": 2926.0, "completions/mean_length": 590.55859375, "completions/mean_terminated_length": 595.2086791992188, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.13333333333333333, "grad_norm": 0.020117120817303658, "learning_rate": 2.0833333333333334e-06, "loss": 0.0643, "num_tokens": 27156724.0, "reward": 1.1140141487121582, "reward_std": 0.2691173851490021, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5654317736625671, "rewards/format_reward_step": 0.9609375, "step": 125 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.6155431531726947e-07, "aux_brier/mean_group_std": 0.07693358174807846, "aux_brier/mean_r": 0.9415905289825439, "aux_brier/n_active_tok": 310.625, "aux_brier/n_groups": 18.75, "aux_brier/n_step_records": 77.65625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5189185650693899, "calib/avg_num_step_conf": 10.32421875, "calib/ece": 0.4160135887096774, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0018803757528148696, "calib/mean_conf": 0.12430899193548389, "calib/mu_c": 0.1251733582089552, "calib/mu_w": 0.12329298245614033, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.041028563754314754, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2601.0, "completions/max_terminated_length": 2601.0, "completions/mean_length": 529.34375, "completions/mean_terminated_length": 539.8884887695312, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.1344, "grad_norm": 0.1353030502796173, "learning_rate": 2.0555555555555555e-06, "loss": -0.0152, "num_tokens": 27397700.0, "reward": 1.147750735282898, "reward_std": 0.24750962853431702, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5597528219223022, "rewards/format_reward_step": 0.96875, "step": 126 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.346220760451878e-07, "aux_brier/mean_group_std": 0.04223396530529107, "aux_brier/mean_r": 0.9626316450811843, "aux_brier/n_active_tok": 347.5, "aux_brier/n_groups": 22.5, "aux_brier/n_step_records": 86.875, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5099878934624698, "calib/avg_num_step_conf": 11.2578125, "calib/ece": 0.3930983606557377, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0027078288942695705, "calib/mean_conf": 0.131, "calib/mu_c": 0.1296904761904762, "calib/mu_w": 0.13239830508474576, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003852459016393442, "calib/std_conf": 0.048811783043998976, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2902.0, "completions/max_terminated_length": 2902.0, "completions/mean_length": 589.78515625, "completions/mean_terminated_length": 594.4291381835938, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.13546666666666668, "grad_norm": 0.15027998387813568, "learning_rate": 2.027777777777778e-06, "loss": 0.0319, "num_tokens": 27652357.0, "reward": 1.111243486404419, "reward_std": 0.23273774981498718, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5699740648269653, "rewards/format_reward_step": 0.953125, "step": 127 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -8.814065146078853e-07, "aux_brier/mean_group_std": 0.05058813252810048, "aux_brier/mean_r": 0.9581730020925724, "aux_brier/n_active_tok": 336.625, "aux_brier/n_groups": 24.875, "aux_brier/n_step_records": 84.15625, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5134017094017094, "calib/avg_num_step_conf": 11.26171875, "calib/ece": 0.3956528925619835, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002644786324786319, "calib/mean_conf": 0.1309586776859504, "calib/mu_c": 0.12968, "calib/mu_w": 0.1323247863247863, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005041322314049587, "calib/std_conf": 0.05739494847674471, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2723.0, "completions/max_terminated_length": 2723.0, "completions/mean_length": 565.67578125, "completions/mean_terminated_length": 579.2520141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.13653333333333334, "grad_norm": 0.07882338017225266, "learning_rate": 2.0000000000000003e-06, "loss": 0.0493, "num_tokens": 27903834.0, "reward": 1.1020238399505615, "reward_std": 0.2673460841178894, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5643455982208252, "rewards/format_reward_step": 0.9453125, "step": 128 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.0833478269745012e-06, "aux_brier/mean_group_std": 0.06112429228881233, "aux_brier/mean_r": 0.955118267986609, "aux_brier/n_active_tok": 327.625, "aux_brier/n_groups": 22.46875, "aux_brier/n_step_records": 81.90625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5082930432446952, "calib/avg_num_step_conf": 10.55078125, "calib/ece": 0.4648266129032258, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.007679156594144476, "calib/mean_conf": 0.1345282258064516, "calib/mu_c": 0.13136986301369863, "calib/mu_w": 0.1390490196078431, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00532258064516129, "calib/std_conf": 0.04688832831671554, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2483.0, "completions/max_terminated_length": 2483.0, "completions/mean_length": 535.19921875, "completions/mean_terminated_length": 539.4133911132812, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.1376, "grad_norm": 0.042812030762434006, "learning_rate": 1.9722222222222224e-06, "loss": 0.0608, "num_tokens": 28143229.0, "reward": 1.1868423223495483, "reward_std": 0.23943841457366943, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.5286191701889038, "rewards/format_reward_step": 0.96875, "step": 129 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.331072412741779e-07, "aux_brier/mean_group_std": 0.05442685065988097, "aux_brier/mean_r": 0.9539009489837013, "aux_brier/n_active_tok": 342.0, "aux_brier/n_groups": 25.9375, "aux_brier/n_step_records": 85.5, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5260685270222537, "calib/avg_num_step_conf": 11.37109375, "calib/ece": 0.4773606557377049, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00567177675732955, "calib/mean_conf": 0.13329508196721312, "calib/mu_c": 0.135503355704698, "calib/mu_w": 0.12983157894736844, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04546331407445597, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3028.0, "completions/max_terminated_length": 3028.0, "completions/mean_length": 567.5234375, "completions/mean_terminated_length": 571.9921264648438, "completions/min_length": 0.0, "completions/min_terminated_length": 176.0, "epoch": 0.13866666666666666, "grad_norm": 0.1459866762161255, "learning_rate": 1.944444444444445e-06, "loss": 0.0811, "num_tokens": 28393803.0, "reward": 1.1860744953155518, "reward_std": 0.21869081258773804, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.5099233984947205, "rewards/format_reward_step": 0.953125, "step": 130 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.550119352437655e-07, "aux_brier/mean_group_std": 0.04851165169904652, "aux_brier/mean_r": 0.9608547248981462, "aux_brier/n_active_tok": 341.375, "aux_brier/n_groups": 22.53125, "aux_brier/n_step_records": 85.34375, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5204288025889968, "calib/avg_num_step_conf": 10.9375, "calib/ece": 0.2827530364372469, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0011623516720604132, "calib/mean_conf": 0.13757085020242918, "calib/mu_c": 0.13689320388349513, "calib/mu_w": 0.13805555555555554, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0016599190283400809, "calib/std_conf": 0.05183837034637416, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2570.0, "completions/max_terminated_length": 2570.0, "completions/mean_length": 557.3046875, "completions/mean_terminated_length": 561.6929321289062, "completions/min_length": 0.0, "completions/min_terminated_length": 208.0, "epoch": 0.13973333333333332, "grad_norm": 0.057814519852399826, "learning_rate": 1.916666666666667e-06, "loss": 0.0819, "num_tokens": 28642681.0, "reward": 1.0447964668273926, "reward_std": 0.2140725702047348, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.6479359269142151, "rewards/format_reward_step": 0.9609375, "step": 131 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.144993835999202e-07, "aux_brier/mean_group_std": 0.07577764684231701, "aux_brier/mean_r": 0.9313513824151439, "aux_brier/n_active_tok": 347.875, "aux_brier/n_groups": 25.65625, "aux_brier/n_step_records": 86.96875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4040906017579445, "calib/avg_num_step_conf": 11.55859375, "calib/ece": 0.448553036437247, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.004048582995951417, "calib/gap": -0.017928735632183945, "calib/mean_conf": 0.1493417004048583, "calib/mu_c": 0.14193793103448277, "calib/mu_w": 0.1598666666666667, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005425101214574899, "calib/std_conf": 0.07819505665039202, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2878.0, "completions/max_terminated_length": 2878.0, "completions/mean_length": 595.609375, "completions/mean_terminated_length": 600.2991943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.1408, "grad_norm": 0.07412625104188919, "learning_rate": 1.888888888888889e-06, "loss": 0.0305, "num_tokens": 28900749.0, "reward": 1.1827707290649414, "reward_std": 0.2722616195678711, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.5279582142829895, "rewards/format_reward_step": 0.9609375, "step": 132 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.536213528885444e-07, "aux_brier/mean_group_std": 0.045708571092240434, "aux_brier/mean_r": 0.9610903137371872, "aux_brier/n_active_tok": 398.125, "aux_brier/n_groups": 25.625, "aux_brier/n_step_records": 99.53125, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.49353647276084944, "calib/avg_num_step_conf": 12.87109375, "calib/ece": 0.302834008097166, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0025939849624060096, "calib/mean_conf": 0.16218623481781377, "calib/mu_c": 0.16078947368421054, "calib/mu_w": 0.16338345864661655, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0017408906882591094, "calib/std_conf": 0.06694064786686449, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2873.0, "completions/max_terminated_length": 2873.0, "completions/mean_length": 680.58984375, "completions/mean_terminated_length": 688.6600952148438, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.14186666666666667, "grad_norm": 0.07950592041015625, "learning_rate": 1.8611111111111113e-06, "loss": 0.0824, "num_tokens": 29181324.0, "reward": 1.0859920978546143, "reward_std": 0.3186224699020386, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6330312490463257, "rewards/format_reward_step": 0.96484375, "step": 133 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -8.510764198388365e-07, "aux_brier/mean_group_std": 0.053893961703775195, "aux_brier/mean_r": 0.9582566091227595, "aux_brier/n_active_tok": 360.375, "aux_brier/n_groups": 22.53125, "aux_brier/n_step_records": 90.09375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6057312252964427, "calib/avg_num_step_conf": 11.46484375, "calib/ece": 0.4122550607287449, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.004048582995951417, "calib/gap": -0.003852173913043516, "calib/mean_conf": 0.15179352226720652, "calib/mu_c": 0.15, "calib/mu_w": 0.1538521739130435, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.014817813765182186, "calib/std_conf": 0.08473283733725175, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2222.0, "completions/max_terminated_length": 2222.0, "completions/mean_length": 640.37109375, "completions/mean_terminated_length": 642.8823852539062, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.14293333333333333, "grad_norm": 0.023576512932777405, "learning_rate": 1.8333333333333333e-06, "loss": 0.0249, "num_tokens": 29454211.0, "reward": 1.1368221044540405, "reward_std": 0.2845694422721863, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5707259178161621, "rewards/format_reward_step": 0.95703125, "step": 134 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.277911868135533e-07, "aux_brier/mean_group_std": 0.032645955365345085, "aux_brier/mean_r": 0.9686876131335355, "aux_brier/n_active_tok": 366.875, "aux_brier/n_groups": 25.65625, "aux_brier/n_step_records": 91.71875, "calib/answer_extract_rate": 0.9296875, "calib/auroc": 0.5428687977453089, "calib/avg_num_step_conf": 12.15234375, "calib/ece": 0.4467754237288135, "calib/final_conf_rate": 0.921875, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009821923904175661, "calib/mean_conf": 0.1422076271186441, "calib/mu_c": 0.1462446043165468, "calib/mu_w": 0.13642268041237113, "calib/nonempty_final_conf_rate": 0.921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.06078193958006931, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 3015.0, "completions/max_terminated_length": 3015.0, "completions/mean_length": 653.34375, "completions/mean_terminated_length": 671.7108154296875, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.144, "grad_norm": 0.10819050669670105, "learning_rate": 1.8055555555555557e-06, "loss": 0.0698, "num_tokens": 29727347.0, "reward": 1.1328237056732178, "reward_std": 0.3328965902328491, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5156698226928711, "rewards/format_reward_step": 0.921875, "step": 135 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -8.993378980415301e-08, "aux_brier/mean_group_std": 0.04996433901579282, "aux_brier/mean_r": 0.9631342331370977, "aux_brier/n_active_tok": 352.875, "aux_brier/n_groups": 21.96875, "aux_brier/n_step_records": 88.21875, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5575663336200621, "calib/avg_num_step_conf": 11.41015625, "calib/ece": 0.34097967479674796, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005946999272149839, "calib/mean_conf": 0.14796341463414633, "calib/mu_c": 0.15103361344537816, "calib/mu_w": 0.14508661417322832, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0026016260162601626, "calib/std_conf": 0.05371312956370412, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2873.0, "completions/max_terminated_length": 2873.0, "completions/mean_length": 599.9453125, "completions/mean_terminated_length": 602.298095703125, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.14506666666666668, "grad_norm": 0.030207408592104912, "learning_rate": 1.777777777777778e-06, "loss": 0.0836, "num_tokens": 29989421.0, "reward": 1.0984869003295898, "reward_std": 0.26870197057724, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.612697422504425, "rewards/format_reward_step": 0.9609375, "step": 136 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.451327966283671e-08, "aux_brier/mean_group_std": 0.0484212163788584, "aux_brier/mean_r": 0.9503683049922916, "aux_brier/n_active_tok": 369.25, "aux_brier/n_groups": 26.65625, "aux_brier/n_step_records": 92.3125, "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.5387426900584795, "calib/avg_num_step_conf": 12.57421875, "calib/ece": 0.36955874999999994, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007091102756892259, "calib/mean_conf": 0.15544125, "calib/mu_c": 0.15880952380952382, "calib/mu_w": 0.15171842105263156, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.05854874043709935, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.47265625, "completions/mean_terminated_length": 625.6932373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.14613333333333334, "grad_norm": 0.3105625510215759, "learning_rate": 1.75e-06, "loss": 0.0671, "num_tokens": 30253454.0, "reward": 1.1048812866210938, "reward_std": 0.25779303908348083, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5757750868797302, "rewards/format_reward_step": 0.9375, "step": 137 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.694103077351741e-07, "aux_brier/mean_group_std": 0.03401642074858287, "aux_brier/mean_r": 0.9591893936447753, "aux_brier/n_active_tok": 393.875, "aux_brier/n_groups": 30.78125, "aux_brier/n_step_records": 98.46875, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.49203821656050956, "calib/avg_num_step_conf": 12.51171875, "calib/ece": 0.48958024691358026, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002535550288846139, "calib/mean_conf": 0.15651028806584363, "calib/mu_c": 0.1574076433121019, "calib/mu_w": 0.15487209302325577, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.061549696347692494, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2996.0, "completions/max_terminated_length": 2996.0, "completions/mean_length": 626.64453125, "completions/mean_terminated_length": 629.1019897460938, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.1472, "grad_norm": 0.016836857423186302, "learning_rate": 1.7222222222222224e-06, "loss": 0.1075, "num_tokens": 30518211.0, "reward": 1.210523009300232, "reward_std": 0.26902103424072266, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.4983419179916382, "rewards/format_reward_step": 0.9453125, "step": 138 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.899891807383597e-08, "aux_brier/mean_group_std": 0.05492316790100756, "aux_brier/mean_r": 0.9489450025348883, "aux_brier/n_active_tok": 348.75, "aux_brier/n_groups": 22.53125, "aux_brier/n_step_records": 87.1875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5227302459445317, "calib/avg_num_step_conf": 11.296875, "calib/ece": 0.43578486055776894, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005493328100471012, "calib/mean_conf": 0.15672509960159364, "calib/mu_c": 0.15444897959183668, "calib/mu_w": 0.1599423076923077, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0034262948207171313, "calib/std_conf": 0.060926447455042644, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2958.0, "completions/max_terminated_length": 2958.0, "completions/mean_length": 574.36328125, "completions/mean_terminated_length": 576.61572265625, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.14826666666666666, "grad_norm": 0.018420971930027008, "learning_rate": 1.6944444444444446e-06, "loss": 0.0332, "num_tokens": 30768344.0, "reward": 1.2034287452697754, "reward_std": 0.2504957318305969, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.5559024810791016, "rewards/format_reward_step": 0.98046875, "step": 139 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.8163406878833896e-07, "aux_brier/mean_group_std": 0.03262215501609706, "aux_brier/mean_r": 0.965656692646287, "aux_brier/n_active_tok": 324.625, "aux_brier/n_groups": 20.34375, "aux_brier/n_step_records": 81.15625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5280590010056989, "calib/avg_num_step_conf": 10.8359375, "calib/ece": 0.46169642857142856, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": -0.001984210526315866, "calib/mean_conf": 0.16924801587301588, "calib/mu_c": 0.16849999999999998, "calib/mu_w": 0.17048421052631585, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0039642857142857145, "calib/std_conf": 0.08187443805715158, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2600.0, "completions/max_terminated_length": 2600.0, "completions/mean_length": 573.1875, "completions/mean_terminated_length": 577.7008056640625, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.14933333333333335, "grad_norm": 0.1649053990840912, "learning_rate": 1.6666666666666667e-06, "loss": -0.0182, "num_tokens": 31020096.0, "reward": 1.2412121295928955, "reward_std": 0.2222604751586914, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.5429735779762268, "rewards/format_reward_step": 0.984375, "step": 140 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.444762182667425e-07, "aux_brier/mean_group_std": 0.06021194060762077, "aux_brier/mean_r": 0.9393043191043328, "aux_brier/n_active_tok": 321.875, "aux_brier/n_groups": 21.25, "aux_brier/n_step_records": 80.46875, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.543185584092792, "calib/avg_num_step_conf": 10.59765625, "calib/ece": 0.414133606557377, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005155288594310997, "calib/mean_conf": 0.17201393442622953, "calib/mu_c": 0.17416901408450705, "calib/mu_w": 0.16901372549019605, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0020901639344262295, "calib/std_conf": 0.06446232641739856, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2651.0, "completions/max_terminated_length": 2651.0, "completions/mean_length": 628.8125, "completions/mean_terminated_length": 636.268798828125, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.1504, "grad_norm": 0.1896704137325287, "learning_rate": 1.638888888888889e-06, "loss": 0.048, "num_tokens": 31288168.0, "reward": 1.1682220697402954, "reward_std": 0.2870158553123474, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.5557004809379578, "rewards/format_reward_step": 0.94921875, "step": 141 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.39145309413502e-07, "aux_brier/mean_group_std": 0.03928649252262543, "aux_brier/mean_r": 0.9603253421566224, "aux_brier/n_active_tok": 374.625, "aux_brier/n_groups": 24.3125, "aux_brier/n_step_records": 93.65625, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5714238845144357, "calib/avg_num_step_conf": 11.83984375, "calib/ece": 0.3322186234817814, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.015594225721784788, "calib/mean_conf": 0.18195141700404854, "calib/mu_c": 0.18952755905511814, "calib/mu_w": 0.17393333333333336, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.058917062993306804, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2889.0, "completions/max_terminated_length": 2889.0, "completions/mean_length": 627.08984375, "completions/mean_terminated_length": 629.549072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.15146666666666667, "grad_norm": 0.12213481962680817, "learning_rate": 1.6111111111111113e-06, "loss": 0.0969, "num_tokens": 31553863.0, "reward": 1.1338919401168823, "reward_std": 0.23693037033081055, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6215052604675293, "rewards/format_reward_step": 0.96484375, "step": 142 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.411192180917787e-07, "aux_brier/mean_group_std": 0.045889375441185175, "aux_brier/mean_r": 0.9586582023419146, "aux_brier/n_active_tok": 363.0, "aux_brier/n_groups": 24.0, "aux_brier/n_step_records": 90.75, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5159789644012945, "calib/avg_num_step_conf": 11.6796875, "calib/ece": 0.4137368421052631, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.008189050701186557, "calib/mean_conf": 0.1801093117408907, "calib/mu_c": 0.17669444444444446, "calib/mu_w": 0.18488349514563102, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005425101214574899, "calib/std_conf": 0.06294300643471418, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2669.0, "completions/max_terminated_length": 2669.0, "completions/mean_length": 626.84765625, "completions/mean_terminated_length": 634.2806396484375, "completions/min_length": 0.0, "completions/min_terminated_length": 212.0, "epoch": 0.15253333333333333, "grad_norm": 0.13386240601539612, "learning_rate": 1.5833333333333333e-06, "loss": -0.0116, "num_tokens": 31821672.0, "reward": 1.1864228248596191, "reward_std": 0.20693063735961914, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5660035610198975, "rewards/format_reward_step": 0.96484375, "step": 143 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.054484572495575e-07, "aux_brier/mean_group_std": 0.021728829468979274, "aux_brier/mean_r": 0.9707798113110372, "aux_brier/n_active_tok": 339.25, "aux_brier/n_groups": 21.0625, "aux_brier/n_step_records": 84.8125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4765873015873015, "calib/avg_num_step_conf": 10.64453125, "calib/ece": 0.47948995983935744, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.007267099567099589, "calib/mean_conf": 0.1831606425702811, "calib/mu_c": 0.1807090909090909, "calib/mu_w": 0.1879761904761905, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0568009833013899, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2647.0, "completions/max_terminated_length": 2647.0, "completions/mean_length": 613.65625, "completions/mean_terminated_length": 616.0628051757812, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.1536, "grad_norm": 0.11186142265796661, "learning_rate": 1.5555555555555558e-06, "loss": 0.0529, "num_tokens": 32082896.0, "reward": 1.262184739112854, "reward_std": 0.238124281167984, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.5253016948699951, "rewards/format_reward_step": 0.97265625, "step": 144 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.698954099627059e-08, "aux_brier/mean_group_std": 0.06225892794985184, "aux_brier/mean_r": 0.9345236053453235, "aux_brier/n_active_tok": 361.125, "aux_brier/n_groups": 22.15625, "aux_brier/n_step_records": 90.28125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.47831623099393306, "calib/avg_num_step_conf": 11.953125, "calib/ece": 0.48815322580645165, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004930417197213655, "calib/mean_conf": 0.19329838709677422, "calib/mu_c": 0.1917278106508876, "calib/mu_w": 0.19665822784810125, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.06643508593025099, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3044.0, "completions/max_terminated_length": 3044.0, "completions/mean_length": 600.625, "completions/mean_terminated_length": 605.3543090820312, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.15466666666666667, "grad_norm": 0.08590273559093475, "learning_rate": 1.527777777777778e-06, "loss": -0.009, "num_tokens": 32339360.0, "reward": 1.2758750915527344, "reward_std": 0.3127176761627197, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.5175624489784241, "rewards/format_reward_step": 0.96484375, "step": 145 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.1917248227708583e-07, "aux_brier/mean_group_std": 0.025748211011913675, "aux_brier/mean_r": 0.9714801237499964, "aux_brier/n_active_tok": 360.625, "aux_brier/n_groups": 23.90625, "aux_brier/n_step_records": 90.15625, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5339159663865547, "calib/avg_num_step_conf": 12.296875, "calib/ece": 0.31854098360655736, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.00819672131147541, "calib/gap": -0.010017613445378243, "calib/mean_conf": 0.1930983606557377, "calib/mu_c": 0.18796638655462183, "calib/mu_w": 0.19798400000000008, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011967213114754094, "calib/std_conf": 0.09245587426354877, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2554.0, "completions/max_terminated_length": 2554.0, "completions/mean_length": 603.58984375, "completions/mean_terminated_length": 618.0760498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.15573333333333333, "grad_norm": 0.2840138077735901, "learning_rate": 1.5e-06, "loss": 0.0313, "num_tokens": 32601095.0, "reward": 1.0962424278259277, "reward_std": 0.2664157450199127, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6193447113037109, "rewards/format_reward_step": 0.953125, "step": 146 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.304601738640667e-07, "aux_brier/mean_group_std": 0.03671174738710874, "aux_brier/mean_r": 0.9587684823999163, "aux_brier/n_active_tok": 382.375, "aux_brier/n_groups": 22.71875, "aux_brier/n_step_records": 95.59375, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5988041370394311, "calib/avg_num_step_conf": 12.59375, "calib/ece": 0.28570281124497987, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.015285714285714264, "calib/mean_conf": 0.19630522088353416, "calib/mu_c": 0.20428571428571426, "calib/mu_w": 0.189, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0020481927710843373, "calib/std_conf": 0.061311005295401386, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3048.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 626.09375, "completions/mean_terminated_length": 631.0236206054688, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.1568, "grad_norm": 0.12055756896734238, "learning_rate": 1.4722222222222225e-06, "loss": 0.0216, "num_tokens": 32865055.0, "reward": 1.109532356262207, "reward_std": 0.22169142961502075, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6490668058395386, "rewards/format_reward_step": 0.96484375, "step": 147 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -8.684366448186154e-09, "aux_brier/mean_group_std": 0.04558691229013127, "aux_brier/mean_r": 0.9565527625199103, "aux_brier/n_active_tok": 327.375, "aux_brier/n_groups": 19.5, "aux_brier/n_step_records": 81.84375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.3752980132450331, "calib/avg_num_step_conf": 10.38671875, "calib/ece": 0.41633266932270924, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.02939804635761592, "calib/mean_conf": 0.19916932270916335, "calib/mu_c": 0.1874569536423841, "calib/mu_w": 0.21685500000000002, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.006954183266932271, "calib/std_conf": 0.06896771731045896, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2561.0, "completions/max_terminated_length": 2561.0, "completions/mean_length": 577.5703125, "completions/mean_terminated_length": 579.8353271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.15786666666666666, "grad_norm": 0.12901774048805237, "learning_rate": 1.4444444444444445e-06, "loss": 0.0563, "num_tokens": 33118025.0, "reward": 1.2113531827926636, "reward_std": 0.2812713086605072, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.5563505291938782, "rewards/format_reward_step": 0.96484375, "step": 148 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.896290706213335e-07, "aux_brier/mean_group_std": 0.04546189259266168, "aux_brier/mean_r": 0.948554689743723, "aux_brier/n_active_tok": 390.375, "aux_brier/n_groups": 26.9375, "aux_brier/n_step_records": 97.59375, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.39986856668511345, "calib/avg_num_step_conf": 12.58984375, "calib/ece": 0.3802798353909465, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.00411522633744856, "calib/gap": -0.020164775871610446, "calib/mean_conf": 0.2031769547325103, "calib/mu_c": 0.19454676258992804, "calib/mu_w": 0.21471153846153848, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005720164609053498, "calib/std_conf": 0.07810664311931681, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2563.0, "completions/max_terminated_length": 2563.0, "completions/mean_length": 646.76171875, "completions/mean_terminated_length": 657.02783203125, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.15893333333333334, "grad_norm": 0.10148246586322784, "learning_rate": 1.4166666666666667e-06, "loss": 0.0616, "num_tokens": 33388052.0, "reward": 1.154981255531311, "reward_std": 0.27073484659194946, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5652374625205994, "rewards/format_reward_step": 0.94140625, "step": 149 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.473403036886481e-08, "aux_brier/mean_group_std": 0.05383136089239446, "aux_brier/mean_r": 0.9481860467106232, "aux_brier/n_active_tok": 336.25, "aux_brier/n_groups": 22.46875, "aux_brier/n_step_records": 84.0625, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.47717687074829923, "calib/avg_num_step_conf": 10.9453125, "calib/ece": 0.40201510204081625, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.004081632653061225, "calib/gap": -0.005967142857142893, "calib/mean_conf": 0.18696448979591834, "calib/mu_c": 0.18440714285714283, "calib/mu_w": 0.19037428571428572, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.008775510204081634, "calib/std_conf": 0.0733140569270794, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2914.0, "completions/max_terminated_length": 2914.0, "completions/mean_length": 577.734375, "completions/mean_terminated_length": 582.283447265625, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.16, "grad_norm": 0.10174485296010971, "learning_rate": 1.3888888888888892e-06, "loss": 0.0685, "num_tokens": 33640912.0, "reward": 1.1664072275161743, "reward_std": 0.2701171040534973, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.5718789100646973, "rewards/format_reward_step": 0.953125, "step": 150 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.223346269898286e-07, "aux_brier/mean_group_std": 0.052152782577405195, "aux_brier/mean_r": 0.9439619393993495, "aux_brier/n_active_tok": 401.875, "aux_brier/n_groups": 25.6875, "aux_brier/n_step_records": 100.46875, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.4124579124579124, "calib/avg_num_step_conf": 12.8125, "calib/ece": 0.2639591836734694, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.018569023569023596, "calib/mean_conf": 0.2079591836734694, "calib/mu_c": 0.1977272727272727, "calib/mu_w": 0.2162962962962963, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011469387755102041, "calib/std_conf": 0.06867788141208185, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2857.0, "completions/max_terminated_length": 2857.0, "completions/mean_length": 663.21875, "completions/mean_terminated_length": 671.0830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.16106666666666666, "grad_norm": 0.1616038680076599, "learning_rate": 1.3611111111111112e-06, "loss": 0.0596, "num_tokens": 33917720.0, "reward": 1.0691440105438232, "reward_std": 0.26870036125183105, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.6437633037567139, "rewards/format_reward_step": 0.94921875, "step": 151 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.653142637398247e-07, "aux_brier/mean_group_std": 0.02575822913121207, "aux_brier/mean_r": 0.9665353932893623, "aux_brier/n_active_tok": 377.625, "aux_brier/n_groups": 24.8125, "aux_brier/n_step_records": 94.40625, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.3994061123715468, "calib/avg_num_step_conf": 12.33984375, "calib/ece": 0.3063346938775511, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01923955691979179, "calib/mean_conf": 0.19448163265306123, "calib/mu_c": 0.18450847457627118, "calib/mu_w": 0.20374803149606296, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009591836734693876, "calib/std_conf": 0.05143568075254085, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2989.0, "completions/max_terminated_length": 2989.0, "completions/mean_length": 609.06640625, "completions/mean_terminated_length": 618.7341918945312, "completions/min_length": 0.0, "completions/min_terminated_length": 204.0, "epoch": 0.16213333333333332, "grad_norm": 0.20822611451148987, "learning_rate": 1.3333333333333334e-06, "loss": 0.0118, "num_tokens": 34179033.0, "reward": 1.0934128761291504, "reward_std": 0.3019166886806488, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.623651385307312, "rewards/format_reward_step": 0.953125, "step": 152 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.2212314958192092e-07, "aux_brier/mean_group_std": 0.06429451642092333, "aux_brier/mean_r": 0.9246247519055582, "aux_brier/n_active_tok": 378.125, "aux_brier/n_groups": 27.5625, "aux_brier/n_step_records": 94.53125, "calib/answer_extract_rate": 0.93359375, "calib/auroc": 0.4944755680049798, "calib/avg_num_step_conf": 12.41796875, "calib/ece": 0.4628143459915611, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.011530345471521986, "calib/mean_conf": 0.2020801687763713, "calib/mu_c": 0.19799346405228754, "calib/mu_w": 0.20952380952380953, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.009662447257383962, "calib/std_conf": 0.06340212154183225, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2807.0, "completions/max_terminated_length": 2807.0, "completions/mean_length": 635.62109375, "completions/mean_terminated_length": 650.8760375976562, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.1632, "grad_norm": 0.10940448939800262, "learning_rate": 1.3055555555555556e-06, "loss": 0.0031, "num_tokens": 34449072.0, "reward": 1.2001734972000122, "reward_std": 0.28632062673568726, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.5194436311721802, "rewards/format_reward_step": 0.921875, "step": 153 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 8.324293562789187e-07, "aux_brier/mean_group_std": 0.045935056409833495, "aux_brier/mean_r": 0.9476644687064727, "aux_brier/n_active_tok": 368.75, "aux_brier/n_groups": 25.84375, "aux_brier/n_step_records": 92.1875, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5990069314849374, "calib/avg_num_step_conf": 11.59765625, "calib/ece": 0.3197551020408163, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.004081632653061225, "calib/gap": 0.00954578778992271, "calib/mean_conf": 0.20106122448979594, "calib/mu_c": 0.2058925619834711, "calib/mu_w": 0.1963467741935484, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01346938775510204, "calib/std_conf": 0.08182240802887362, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2883.0, "completions/max_terminated_length": 2883.0, "completions/mean_length": 621.921875, "completions/mean_terminated_length": 624.36083984375, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.16426666666666667, "grad_norm": 0.15322059392929077, "learning_rate": 1.2777777777777779e-06, "loss": 0.1123, "num_tokens": 34712724.0, "reward": 1.106763243675232, "reward_std": 0.31117719411849976, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.6301780343055725, "rewards/format_reward_step": 0.953125, "step": 154 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.1865451843859915e-07, "aux_brier/mean_group_std": 0.05433175965076657, "aux_brier/mean_r": 0.9470791553939181, "aux_brier/n_active_tok": 332.625, "aux_brier/n_groups": 19.4375, "aux_brier/n_step_records": 83.15625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6190704820434031, "calib/avg_num_step_conf": 10.59765625, "calib/ece": 0.32160799999999995, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02423365981691311, "calib/mean_conf": 0.186392, "calib/mu_c": 0.19831496062992127, "calib/mu_w": 0.17408130081300816, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.059302262486350385, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2693.0, "completions/max_terminated_length": 2693.0, "completions/mean_length": 545.7578125, "completions/mean_terminated_length": 550.0551147460938, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.16533333333333333, "grad_norm": 0.10123839974403381, "learning_rate": 1.25e-06, "loss": 0.0165, "num_tokens": 34959654.0, "reward": 1.1443431377410889, "reward_std": 0.22528581321239471, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6398723721504211, "rewards/format_reward_step": 0.9765625, "step": 155 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.37029733688388e-07, "aux_brier/mean_group_std": 0.04280717767977099, "aux_brier/mean_r": 0.9580829089013806, "aux_brier/n_active_tok": 370.25, "aux_brier/n_groups": 21.28125, "aux_brier/n_step_records": 92.5625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.596854091610911, "calib/avg_num_step_conf": 11.61328125, "calib/ece": 0.265412, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.015066778178075169, "calib/mean_conf": 0.198588, "calib/mu_c": 0.2066637931034483, "calib/mu_w": 0.19159701492537312, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.05752657000030507, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2932.0, "completions/max_terminated_length": 2932.0, "completions/mean_length": 628.546875, "completions/mean_terminated_length": 628.546875, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 0.1664, "grad_norm": 0.13852278888225555, "learning_rate": 1.2222222222222223e-06, "loss": 0.0179, "num_tokens": 35225322.0, "reward": 1.1086516380310059, "reward_std": 0.21563559770584106, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6689819097518921, "rewards/format_reward_step": 0.9765625, "step": 156 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.6852321416702694e-08, "aux_brier/mean_group_std": 0.05208226435626195, "aux_brier/mean_r": 0.9412569170187247, "aux_brier/n_active_tok": 400.125, "aux_brier/n_groups": 27.53125, "aux_brier/n_step_records": 100.03125, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5289351851851851, "calib/avg_num_step_conf": 12.76171875, "calib/ece": 0.38207317073170727, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0033537581699346064, "calib/mean_conf": 0.20451219512195123, "calib/mu_c": 0.20590277777777777, "calib/mu_w": 0.20254901960784316, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0006097560975609757, "calib/std_conf": 0.05819407757054948, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2914.0, "completions/max_terminated_length": 2914.0, "completions/mean_length": 650.11328125, "completions/mean_terminated_length": 652.6627807617188, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.16746666666666668, "grad_norm": 0.0653674378991127, "learning_rate": 1.1944444444444446e-06, "loss": 0.022, "num_tokens": 35495479.0, "reward": 1.1867190599441528, "reward_std": 0.24630805850028992, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.58281409740448, "rewards/format_reward_step": 0.95703125, "step": 157 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.8494483211561885e-07, "aux_brier/mean_group_std": 0.050062138825723176, "aux_brier/mean_r": 0.9477031249893692, "aux_brier/n_active_tok": 348.125, "aux_brier/n_groups": 20.34375, "aux_brier/n_step_records": 87.03125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4556154282473058, "calib/avg_num_step_conf": 11.17578125, "calib/ece": 0.45511999999999997, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.015852240499149184, "calib/mean_conf": 0.20088, "calib/mu_c": 0.19542682926829266, "calib/mu_w": 0.21127906976744185, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.05442816917736624, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2859.0, "completions/max_terminated_length": 2859.0, "completions/mean_length": 567.84375, "completions/mean_terminated_length": 572.31494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.16853333333333334, "grad_norm": 0.10214134305715561, "learning_rate": 1.1666666666666668e-06, "loss": 0.0484, "num_tokens": 35746087.0, "reward": 1.2592241764068604, "reward_std": 0.24514839053153992, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.5368973016738892, "rewards/format_reward_step": 0.96875, "step": 158 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.022316010323436e-07, "aux_brier/mean_group_std": 0.03782838734823086, "aux_brier/mean_r": 0.9599033471502554, "aux_brier/n_active_tok": 342.0, "aux_brier/n_groups": 18.75, "aux_brier/n_step_records": 85.5, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.47769723092998956, "calib/avg_num_step_conf": 11.16015625, "calib/ece": 0.3485483870967742, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.008064516129032258, "calib/gap": -0.0015303030303030485, "calib/mean_conf": 0.19443548387096773, "calib/mu_c": 0.193719696969697, "calib/mu_w": 0.19525000000000003, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.005362903225806451, "calib/std_conf": 0.08709229827208749, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1856.0, "completions/max_terminated_length": 1856.0, "completions/mean_length": 561.015625, "completions/mean_terminated_length": 567.6680297851562, "completions/min_length": 0.0, "completions/min_terminated_length": 202.0, "epoch": 0.1696, "grad_norm": 0.1387680023908615, "learning_rate": 1.138888888888889e-06, "loss": -0.0135, "num_tokens": 35994491.0, "reward": 1.1509547233581543, "reward_std": 0.18694695830345154, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.6038192510604858, "rewards/format_reward_step": 0.9609375, "step": 159 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.000048038412586e-07, "aux_brier/mean_group_std": 0.026407065263686264, "aux_brier/mean_r": 0.9594201561051607, "aux_brier/n_active_tok": 385.75, "aux_brier/n_groups": 25.96875, "aux_brier/n_step_records": 96.4375, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.42458791208791213, "calib/avg_num_step_conf": 12.09765625, "calib/ece": 0.3847950819672131, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.009203296703296726, "calib/mean_conf": 0.2035655737704918, "calib/mu_c": 0.19964285714285712, "calib/mu_w": 0.20884615384615385, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007295081967213114, "calib/std_conf": 0.05453221152239515, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2950.0, "completions/max_terminated_length": 2950.0, "completions/mean_length": 628.58984375, "completions/mean_terminated_length": 636.0435180664062, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.17066666666666666, "grad_norm": 0.12198327481746674, "learning_rate": 1.111111111111111e-06, "loss": 0.0637, "num_tokens": 36260250.0, "reward": 1.1690070629119873, "reward_std": 0.26782548427581787, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.5822784900665283, "rewards/format_reward_step": 0.953125, "step": 160 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.144779553375775e-06, "aux_brier/mean_group_std": 0.048538273679804914, "aux_brier/mean_r": 0.9491158293442891, "aux_brier/n_active_tok": 321.0, "aux_brier/n_groups": 19.78125, "aux_brier/n_step_records": 80.25, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.48594527363184076, "calib/avg_num_step_conf": 10.23828125, "calib/ece": 0.5374898785425101, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0035655058043117527, "calib/mean_conf": 0.19125506072874496, "calib/mu_c": 0.19222222222222218, "calib/mu_w": 0.18865671641791043, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.043178892127996645, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2804.0, "completions/max_terminated_length": 2804.0, "completions/mean_length": 567.4765625, "completions/mean_terminated_length": 571.9448852539062, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.17173333333333332, "grad_norm": 0.0686148852109909, "learning_rate": 1.0833333333333335e-06, "loss": 0.0419, "num_tokens": 36509444.0, "reward": 1.309281826019287, "reward_std": 0.22022128105163574, "rewards/accuracy_reward_step": 0.703125, "rewards/final_brier_reward_step": 0.4949398338794708, "rewards/format_reward_step": 0.96484375, "step": 161 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.6172897535637887e-06, "aux_brier/mean_group_std": 0.055106406224832934, "aux_brier/mean_r": 0.9525150527892502, "aux_brier/n_active_tok": 324.125, "aux_brier/n_groups": 19.03125, "aux_brier/n_step_records": 81.03125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.3841981489910484, "calib/avg_num_step_conf": 10.79296875, "calib/ece": 0.4960526315789474, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.004048582995951417, "calib/gap": -0.02704635108481268, "calib/mean_conf": 0.1961740890688259, "calib/mu_c": 0.18763313609467452, "calib/mu_w": 0.2146794871794872, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.004008097165991903, "calib/std_conf": 0.06898000397257777, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2472.0, "completions/max_terminated_length": 2472.0, "completions/mean_length": 539.4921875, "completions/mean_terminated_length": 545.8893432617188, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.1728, "grad_norm": 0.34846580028533936, "learning_rate": 1.0555555555555557e-06, "loss": -0.0373, "num_tokens": 36751698.0, "reward": 1.2679799795150757, "reward_std": 0.2818097472190857, "rewards/accuracy_reward_step": 0.66015625, "rewards/final_brier_reward_step": 0.5094199180603027, "rewards/format_reward_step": 0.9609375, "step": 162 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.6125856217594787e-07, "aux_brier/mean_group_std": 0.03905567499231794, "aux_brier/mean_r": 0.949688208760553, "aux_brier/n_active_tok": 406.625, "aux_brier/n_groups": 25.1875, "aux_brier/n_step_records": 101.65625, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.39600868856910126, "calib/avg_num_step_conf": 12.9921875, "calib/ece": 0.3180123456790124, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.018423296225902802, "calib/mean_conf": 0.21244032921810702, "calib/mu_c": 0.2036456692913386, "calib/mu_w": 0.2220689655172414, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003909465020576132, "calib/std_conf": 0.06151455663565264, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2929.0, "completions/max_terminated_length": 2929.0, "completions/mean_length": 677.015625, "completions/mean_terminated_length": 685.0435180664062, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.17386666666666667, "grad_norm": 0.07026407867670059, "learning_rate": 1.0277777777777777e-06, "loss": 0.0476, "num_tokens": 37029846.0, "reward": 1.1228903532028198, "reward_std": 0.29127073287963867, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.608748733997345, "rewards/format_reward_step": 0.94921875, "step": 163 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -9.855497080657472e-07, "aux_brier/mean_group_std": 0.03595025642705498, "aux_brier/mean_r": 0.9571039514589692, "aux_brier/n_active_tok": 391.875, "aux_brier/n_groups": 24.65625, "aux_brier/n_step_records": 97.96875, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.39746981413648075, "calib/avg_num_step_conf": 12.375, "calib/ece": 0.31181069958847724, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.015326617826617844, "calib/mean_conf": 0.21106995884773666, "calib/mu_c": 0.2036904761904762, "calib/mu_w": 0.21901709401709404, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0021810699588477368, "calib/std_conf": 0.05238633828674187, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3023.0, "completions/max_terminated_length": 3023.0, "completions/mean_length": 691.46484375, "completions/mean_terminated_length": 696.909423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 230.0, "epoch": 0.17493333333333333, "grad_norm": 0.13067634403705597, "learning_rate": 1.0000000000000002e-06, "loss": 0.0226, "num_tokens": 37312997.0, "reward": 1.1199582815170288, "reward_std": 0.23064762353897095, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.6126458644866943, "rewards/format_reward_step": 0.94921875, "step": 164 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -9.031915049473316e-07, "aux_brier/mean_group_std": 0.030710379266636553, "aux_brier/mean_r": 0.9616685494588795, "aux_brier/n_active_tok": 386.5, "aux_brier/n_groups": 22.5, "aux_brier/n_step_records": 96.625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.49996786838892104, "calib/avg_num_step_conf": 12.14453125, "calib/ece": 0.2575120000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006183792815371764, "calib/mean_conf": 0.210488, "calib/mu_c": 0.21377777777777776, "calib/mu_w": 0.207593984962406, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.056882491647254695, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2156.0, "completions/max_terminated_length": 2156.0, "completions/mean_length": 627.57421875, "completions/mean_terminated_length": 630.0353393554688, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.176, "grad_norm": 0.06213150545954704, "learning_rate": 9.722222222222224e-07, "loss": 0.0006, "num_tokens": 37579232.0, "reward": 1.1124401092529297, "reward_std": 0.24169263243675232, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.668510913848877, "rewards/format_reward_step": 0.9765625, "step": 165 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.261744837117057e-07, "aux_brier/mean_group_std": 0.05133524416069172, "aux_brier/mean_r": 0.9443120632087336, "aux_brier/n_active_tok": 389.375, "aux_brier/n_groups": 23.28125, "aux_brier/n_step_records": 97.34375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.42995267072346177, "calib/avg_num_step_conf": 12.30859375, "calib/ece": 0.40340080971659914, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.031599729546991195, "calib/mean_conf": 0.21870445344129552, "calib/mu_c": 0.2056551724137931, "calib/mu_w": 0.2372549019607843, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.017530364372469638, "calib/std_conf": 0.0740591574761541, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2475.0, "completions/max_terminated_length": 2475.0, "completions/mean_length": 677.39453125, "completions/mean_terminated_length": 680.051025390625, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.17706666666666668, "grad_norm": 0.09717828780412674, "learning_rate": 9.444444444444445e-07, "loss": 0.0779, "num_tokens": 37858829.0, "reward": 1.1948519945144653, "reward_std": 0.2601965367794037, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.5762829780578613, "rewards/format_reward_step": 0.9609375, "step": 166 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.001674133638275e-06, "aux_brier/mean_group_std": 0.047615831888514336, "aux_brier/mean_r": 0.9482347667715699, "aux_brier/n_active_tok": 377.125, "aux_brier/n_groups": 24.625, "aux_brier/n_step_records": 94.28125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6770378078390564, "calib/avg_num_step_conf": 11.95703125, "calib/ece": 0.42252016129032255, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.012096774193548387, "calib/gap": 0.04104301075268815, "calib/mean_conf": 0.21715725806451613, "calib/mu_c": 0.23254838709677417, "calib/mu_w": 0.19150537634408601, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0073387096774193555, "calib/std_conf": 0.10760442915199611, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2612.0, "completions/max_terminated_length": 2612.0, "completions/mean_length": 602.26953125, "completions/mean_terminated_length": 607.0117797851562, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.17813333333333334, "grad_norm": 0.16451962292194366, "learning_rate": 9.166666666666666e-07, "loss": 0.0027, "num_tokens": 38118618.0, "reward": 1.2368392944335938, "reward_std": 0.2707372307777405, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.5879822969436646, "rewards/format_reward_step": 0.96875, "step": 167 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.3709917139337335e-06, "aux_brier/mean_group_std": 0.05347549880297483, "aux_brier/mean_r": 0.9434481294100598, "aux_brier/n_active_tok": 399.125, "aux_brier/n_groups": 25.875, "aux_brier/n_step_records": 99.78125, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.41141070467993546, "calib/avg_num_step_conf": 12.8984375, "calib/ece": 0.3626518218623482, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01445367132867137, "calib/mean_conf": 0.21629554655870445, "calib/mu_c": 0.2102097902097902, "calib/mu_w": 0.22466346153846156, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.06087205088837687, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2924.0, "completions/max_terminated_length": 2924.0, "completions/mean_length": 676.53125, "completions/mean_terminated_length": 687.2698974609375, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.1792, "grad_norm": 0.06556492298841476, "learning_rate": 8.88888888888889e-07, "loss": 0.0276, "num_tokens": 38396482.0, "reward": 1.1862057447433472, "reward_std": 0.2864423394203186, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.5885733366012573, "rewards/format_reward_step": 0.9609375, "step": 168 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.4825908009052924e-06, "aux_brier/mean_group_std": 0.04257043861768157, "aux_brier/mean_r": 0.9512754169066496, "aux_brier/n_active_tok": 349.5, "aux_brier/n_groups": 19.875, "aux_brier/n_step_records": 87.375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5556414739107046, "calib/avg_num_step_conf": 11.01171875, "calib/ece": 0.3867611336032389, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00040209790209799756, "calib/mean_conf": 0.20619433198380568, "calib/mu_c": 0.20636363636363642, "calib/mu_w": 0.20596153846153842, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0070040485829959516, "calib/std_conf": 0.06392754856749398, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2717.0, "completions/max_terminated_length": 2717.0, "completions/mean_length": 620.5234375, "completions/mean_terminated_length": 627.8814697265625, "completions/min_length": 0.0, "completions/min_terminated_length": 176.0, "epoch": 0.18026666666666666, "grad_norm": 0.10657239705324173, "learning_rate": 8.611111111111112e-07, "loss": 0.0349, "num_tokens": 38659520.0, "reward": 1.1889736652374268, "reward_std": 0.22296060621738434, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.5918323993682861, "rewards/format_reward_step": 0.96484375, "step": 169 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.332860124270141e-06, "aux_brier/mean_group_std": 0.042277673380353306, "aux_brier/mean_r": 0.9467389730056519, "aux_brier/n_active_tok": 385.0, "aux_brier/n_groups": 24.53125, "aux_brier/n_step_records": 96.25, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5693082788671024, "calib/avg_num_step_conf": 12.48828125, "calib/ece": 0.377479674796748, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0040650406504065045, "calib/gap": 0.005894607843137251, "calib/mean_conf": 0.2160975609756098, "calib/mu_c": 0.21854166666666666, "calib/mu_w": 0.2126470588235294, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00410569105691057, "calib/std_conf": 0.07053954834155107, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2687.0, "completions/max_terminated_length": 2687.0, "completions/mean_length": 647.04296875, "completions/mean_terminated_length": 652.1378173828125, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.18133333333333335, "grad_norm": 0.21552686393260956, "learning_rate": 8.333333333333333e-07, "loss": 0.0343, "num_tokens": 38929315.0, "reward": 1.1894385814666748, "reward_std": 0.258508563041687, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5936917662620544, "rewards/format_reward_step": 0.95703125, "step": 170 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.7282674045780997e-06, "aux_brier/mean_group_std": 0.04000196243230344, "aux_brier/mean_r": 0.9509666472589995, "aux_brier/n_active_tok": 379.875, "aux_brier/n_groups": 21.8125, "aux_brier/n_step_records": 94.96875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4397070688264134, "calib/avg_num_step_conf": 11.88671875, "calib/ece": 0.31183858267716535, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.013038105877241996, "calib/mean_conf": 0.2126496062992126, "calib/mu_c": 0.2063358778625954, "calib/mu_w": 0.2193739837398374, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004370078740157475, "calib/std_conf": 0.055578058002606234, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3066.0, "completions/max_terminated_length": 3066.0, "completions/mean_length": 630.234375, "completions/mean_terminated_length": 630.234375, "completions/min_length": 187.0, "completions/min_terminated_length": 187.0, "epoch": 0.1824, "grad_norm": 0.05583479255437851, "learning_rate": 8.055555555555557e-07, "loss": 0.0442, "num_tokens": 39197551.0, "reward": 1.1687397956848145, "reward_std": 0.21178996562957764, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6437092423439026, "rewards/format_reward_step": 0.9921875, "step": 171 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.689906251715726e-07, "aux_brier/mean_group_std": 0.03973866861127699, "aux_brier/mean_r": 0.9531008431630384, "aux_brier/n_active_tok": 344.375, "aux_brier/n_groups": 22.15625, "aux_brier/n_step_records": 86.09375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.42074484456755923, "calib/avg_num_step_conf": 11.49609375, "calib/ece": 0.5115991902834007, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.025467836257309945, "calib/mean_conf": 0.1976315789473684, "calib/mu_c": 0.18979532163742688, "calib/mu_w": 0.21526315789473682, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.008461538461538461, "calib/std_conf": 0.06319904529861764, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2896.0, "completions/max_terminated_length": 2896.0, "completions/mean_length": 587.79296875, "completions/mean_terminated_length": 599.5020141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.18346666666666667, "grad_norm": 0.047075413167476654, "learning_rate": 7.777777777777779e-07, "loss": -0.0212, "num_tokens": 39451378.0, "reward": 1.2815196514129639, "reward_std": 0.20947349071502686, "rewards/accuracy_reward_step": 0.671875, "rewards/final_brier_reward_step": 0.5088908672332764, "rewards/format_reward_step": 0.96484375, "step": 172 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -9.859709276804907e-08, "aux_brier/mean_group_std": 0.05299500299835104, "aux_brier/mean_r": 0.9444791571380627, "aux_brier/n_active_tok": 388.75, "aux_brier/n_groups": 24.46875, "aux_brier/n_step_records": 97.1875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4778145695364238, "calib/avg_num_step_conf": 12.34765625, "calib/ece": 0.3916334661354582, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001389403973509895, "calib/mean_conf": 0.21896414342629483, "calib/mu_c": 0.2184105960264901, "calib/mu_w": 0.2198, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00450199203187251, "calib/std_conf": 0.07545692624464759, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2867.0, "completions/max_terminated_length": 2867.0, "completions/mean_length": 636.6171875, "completions/mean_terminated_length": 639.11376953125, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.18453333333333333, "grad_norm": 0.07081261277198792, "learning_rate": 7.5e-07, "loss": 0.0694, "num_tokens": 39717512.0, "reward": 1.2260929346084595, "reward_std": 0.2734184265136719, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.5918715000152588, "rewards/format_reward_step": 0.9765625, "step": 173 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.210774001231712e-07, "aux_brier/mean_group_std": 0.04575014371484926, "aux_brier/mean_r": 0.9496463163006874, "aux_brier/n_active_tok": 393.875, "aux_brier/n_groups": 25.375, "aux_brier/n_step_records": 98.46875, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5674525434033643, "calib/avg_num_step_conf": 12.84765625, "calib/ece": 0.25828278688524586, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010952442072552887, "calib/mean_conf": 0.20999590163934428, "calib/mu_c": 0.21587610619469028, "calib/mu_w": 0.2049236641221374, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002581967213114754, "calib/std_conf": 0.06193799768295656, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2814.0, "completions/max_terminated_length": 2814.0, "completions/mean_length": 693.7109375, "completions/mean_terminated_length": 704.7222900390625, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.1856, "grad_norm": 0.08657406270503998, "learning_rate": 7.222222222222222e-07, "loss": 0.0234, "num_tokens": 39999334.0, "reward": 1.0763218402862549, "reward_std": 0.39385682344436646, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6490377187728882, "rewards/format_reward_step": 0.9453125, "step": 174 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.139712241890692e-07, "aux_brier/mean_group_std": 0.03608444217310498, "aux_brier/mean_r": 0.9460198432192941, "aux_brier/n_active_tok": 443.625, "aux_brier/n_groups": 28.46875, "aux_brier/n_step_records": 110.90625, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.47785853691365504, "calib/avg_num_step_conf": 14.3359375, "calib/ece": 0.26918852459016396, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.009607645198196374, "calib/mean_conf": 0.22098360655737706, "calib/mu_c": 0.21598290598290598, "calib/mu_w": 0.22559055118110236, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005331967213114754, "calib/std_conf": 0.07520861517691181, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3007.0, "completions/max_terminated_length": 3007.0, "completions/mean_length": 701.9765625, "completions/mean_terminated_length": 713.1190795898438, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.18666666666666668, "grad_norm": 0.061242636293172836, "learning_rate": 6.944444444444446e-07, "loss": 0.0908, "num_tokens": 40284864.0, "reward": 1.0939886569976807, "reward_std": 0.2849002480506897, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6415797472000122, "rewards/format_reward_step": 0.953125, "step": 175 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.099879021856237e-06, "aux_brier/mean_group_std": 0.0736976996091162, "aux_brier/mean_r": 0.9345000241336686, "aux_brier/n_active_tok": 392.875, "aux_brier/n_groups": 25.84375, "aux_brier/n_step_records": 98.21875, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5523860303918956, "calib/avg_num_step_conf": 12.9375, "calib/ece": 0.28895102040816323, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0018313782991202232, "calib/mean_conf": 0.20843673469387755, "calib/mu_c": 0.2093636363636364, "calib/mu_w": 0.20753225806451617, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0017551020408163266, "calib/std_conf": 0.053229049510457684, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3037.0, "completions/max_terminated_length": 3037.0, "completions/mean_length": 633.68359375, "completions/mean_terminated_length": 643.7421264648438, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.18773333333333334, "grad_norm": 0.24485737085342407, "learning_rate": 6.666666666666667e-07, "loss": 0.0103, "num_tokens": 40551151.0, "reward": 1.1106715202331543, "reward_std": 0.2508005201816559, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.6379984617233276, "rewards/format_reward_step": 0.95703125, "step": 176 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.6494836561383757e-07, "aux_brier/mean_group_std": 0.04556374996161313, "aux_brier/mean_r": 0.9522006859987248, "aux_brier/n_active_tok": 387.25, "aux_brier/n_groups": 23.1875, "aux_brier/n_step_records": 96.8125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.48005679617916613, "calib/avg_num_step_conf": 12.47265625, "calib/ece": 0.30024497991967874, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00039802504195174393, "calib/mean_conf": 0.21029317269076306, "calib/mu_c": 0.21048818897637794, "calib/mu_w": 0.2100901639344262, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00024899598393574295, "calib/std_conf": 0.061485545315180516, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2973.0, "completions/max_terminated_length": 2973.0, "completions/mean_length": 649.671875, "completions/mean_terminated_length": 652.2196655273438, "completions/min_length": 0.0, "completions/min_terminated_length": 129.0, "epoch": 0.1888, "grad_norm": 0.12950608134269714, "learning_rate": 6.388888888888889e-07, "loss": 0.0967, "num_tokens": 40821299.0, "reward": 1.1421005725860596, "reward_std": 0.2758725881576538, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6387151479721069, "rewards/format_reward_step": 0.97265625, "step": 177 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.0601387265829487e-06, "aux_brier/mean_group_std": 0.04761884644367766, "aux_brier/mean_r": 0.9442871713716057, "aux_brier/n_active_tok": 383.5, "aux_brier/n_groups": 25.21875, "aux_brier/n_step_records": 95.875, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.36010623026126903, "calib/avg_num_step_conf": 12.00390625, "calib/ece": 0.4411370967741936, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.02323887453344814, "calib/mean_conf": 0.21275, "calib/mu_c": 0.20469135802469135, "calib/mu_w": 0.2279302325581395, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0003306451612903226, "calib/std_conf": 0.05673966084586006, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3064.0, "completions/max_terminated_length": 3064.0, "completions/mean_length": 617.62109375, "completions/mean_terminated_length": 620.0431518554688, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.18986666666666666, "grad_norm": 0.18901731073856354, "learning_rate": 6.111111111111112e-07, "loss": 0.079, "num_tokens": 41085482.0, "reward": 1.2486571073532104, "reward_std": 0.29391467571258545, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.5415034294128418, "rewards/format_reward_step": 0.9609375, "step": 178 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.0484976412263896e-07, "aux_brier/mean_group_std": 0.04620241091579395, "aux_brier/mean_r": 0.9442976833248373, "aux_brier/n_active_tok": 393.125, "aux_brier/n_groups": 25.25, "aux_brier/n_step_records": 98.28125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4868718238283456, "calib/avg_num_step_conf": 12.74609375, "calib/ece": 0.42495121951219517, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0040650406504065045, "calib/gap": -0.007903444381705205, "calib/mean_conf": 0.2137479674796748, "calib/mu_c": 0.2107922077922078, "calib/mu_w": 0.218695652173913, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006341463414634147, "calib/std_conf": 0.07836967779892862, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2895.0, "completions/max_terminated_length": 2895.0, "completions/mean_length": 633.45703125, "completions/mean_terminated_length": 643.511962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 158.0, "epoch": 0.19093333333333334, "grad_norm": 0.14594194293022156, "learning_rate": 5.833333333333334e-07, "loss": -0.0126, "num_tokens": 41353911.0, "reward": 1.2267322540283203, "reward_std": 0.31886544823646545, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.5631790161132812, "rewards/format_reward_step": 0.9609375, "step": 179 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.8389634481863482e-07, "aux_brier/mean_group_std": 0.056880049986435574, "aux_brier/mean_r": 0.9370285953179978, "aux_brier/n_active_tok": 424.875, "aux_brier/n_groups": 23.9375, "aux_brier/n_step_records": 106.21875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5114116652578191, "calib/avg_num_step_conf": 13.40625, "calib/ece": 0.401574008097166, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001350402930402922, "calib/mean_conf": 0.23421546558704454, "calib/mu_c": 0.23371794871794868, "calib/mu_w": 0.2350683516483516, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.002105263157894737, "calib/std_conf": 0.07039990629943667, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2948.0, "completions/max_terminated_length": 2948.0, "completions/mean_length": 749.87109375, "completions/mean_terminated_length": 755.7755737304688, "completions/min_length": 0.0, "completions/min_terminated_length": 229.0, "epoch": 0.192, "grad_norm": 0.11435962468385696, "learning_rate": 5.555555555555555e-07, "loss": 0.0017, "num_tokens": 41649734.0, "reward": 1.2317256927490234, "reward_std": 0.26203978061676025, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.5753401517868042, "rewards/format_reward_step": 0.95703125, "step": 180 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.961547231894457e-07, "aux_brier/mean_group_std": 0.031881441830085566, "aux_brier/mean_r": 0.9558846945465009, "aux_brier/n_active_tok": 395.375, "aux_brier/n_groups": 25.28125, "aux_brier/n_step_records": 98.84375, "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.5390349688262402, "calib/avg_num_step_conf": 12.77734375, "calib/ece": 0.28520164609053494, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007824207102195724, "calib/mean_conf": 0.21084773662551443, "calib/mu_c": 0.2148403361344538, "calib/mu_w": 0.20701612903225808, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0031687242798353913, "calib/std_conf": 0.06076675254645212, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2954.0, "completions/max_terminated_length": 2954.0, "completions/mean_length": 621.98828125, "completions/mean_terminated_length": 629.3636474609375, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.19306666666666666, "grad_norm": 0.06092178449034691, "learning_rate": 5.277777777777779e-07, "loss": 0.0464, "num_tokens": 41915227.0, "reward": 1.093285322189331, "reward_std": 0.30635935068130493, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6309539079666138, "rewards/format_reward_step": 0.94140625, "step": 181 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.831385654908594e-08, "aux_brier/mean_group_std": 0.052071621815678024, "aux_brier/mean_r": 0.946060715115527, "aux_brier/n_active_tok": 421.375, "aux_brier/n_groups": 27.34375, "aux_brier/n_step_records": 105.34375, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.4750526315789474, "calib/avg_num_step_conf": 13.19140625, "calib/ece": 0.4024897959183673, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.012244897959183673, "calib/gap": -0.008835087719298224, "calib/mean_conf": 0.2279591836734694, "calib/mu_c": 0.22453333333333333, "calib/mu_w": 0.23336842105263156, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00910204081632653, "calib/std_conf": 0.09911402610509867, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2971.0, "completions/max_terminated_length": 2971.0, "completions/mean_length": 700.98828125, "completions/mean_terminated_length": 703.7373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.19413333333333332, "grad_norm": 0.0773182213306427, "learning_rate": 5.000000000000001e-07, "loss": 0.0802, "num_tokens": 42200840.0, "reward": 1.202471375465393, "reward_std": 0.2898944020271301, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.567698061466217, "rewards/format_reward_step": 0.94921875, "step": 182 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.7296919236908792e-06, "aux_brier/mean_group_std": 0.04741884363906103, "aux_brier/mean_r": 0.9460917643336427, "aux_brier/n_active_tok": 437.25, "aux_brier/n_groups": 28.03125, "aux_brier/n_step_records": 109.3125, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.45679527285366706, "calib/avg_num_step_conf": 14.11328125, "calib/ece": 0.3562107438016529, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0169420229405631, "calib/mean_conf": 0.22472314049586775, "calib/mu_c": 0.2173722627737226, "calib/mu_w": 0.2343142857142857, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0074090909090909094, "calib/std_conf": 0.08235255921932236, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2757.0, "completions/max_terminated_length": 2757.0, "completions/mean_length": 707.265625, "completions/mean_terminated_length": 715.6522216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 234.0, "epoch": 0.1952, "grad_norm": 0.08158347010612488, "learning_rate": 4.7222222222222226e-07, "loss": 0.0185, "num_tokens": 42488580.0, "reward": 1.1549781560897827, "reward_std": 0.3270761966705322, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.5886627435684204, "rewards/format_reward_step": 0.9453125, "step": 183 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.581238692205389e-07, "aux_brier/mean_group_std": 0.05508480402311649, "aux_brier/mean_r": 0.9444662865142436, "aux_brier/n_active_tok": 427.625, "aux_brier/n_groups": 25.4375, "aux_brier/n_step_records": 106.90625, "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.48194875776397517, "calib/avg_num_step_conf": 13.40625, "calib/ece": 0.4503900414937759, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0012860248447205125, "calib/mean_conf": 0.21819087136929463, "calib/mu_c": 0.2177639751552795, "calib/mu_w": 0.21905000000000002, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00026556016597510375, "calib/std_conf": 0.05177942444964169, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2749.0, "completions/max_terminated_length": 2749.0, "completions/mean_length": 696.00390625, "completions/mean_terminated_length": 698.7333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.19626666666666667, "grad_norm": 0.10481294989585876, "learning_rate": 4.444444444444445e-07, "loss": 0.0754, "num_tokens": 42772037.0, "reward": 1.2343754768371582, "reward_std": 0.3066892921924591, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.5390644073486328, "rewards/format_reward_step": 0.94140625, "step": 184 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -9.824576012729125e-07, "aux_brier/mean_group_std": 0.058168486918124304, "aux_brier/mean_r": 0.9290612995322908, "aux_brier/n_active_tok": 487.0, "aux_brier/n_groups": 34.53125, "aux_brier/n_step_records": 121.75, "calib/answer_extract_rate": 0.90234375, "calib/auroc": 0.5088359585094122, "calib/avg_num_step_conf": 16.77734375, "calib/ece": 0.38885108620689657, "calib/final_conf_rate": 0.90625, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0003413694967346337, "calib/mean_conf": 0.2167821206896552, "calib/mu_c": 0.21664233576642333, "calib/mu_w": 0.21698370526315797, "calib/nonempty_final_conf_rate": 0.90625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00755798275862069, "calib/std_conf": 0.06162091893672249, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2903.0, "completions/max_terminated_length": 2903.0, "completions/mean_length": 695.12109375, "completions/mean_terminated_length": 726.33056640625, "completions/min_length": 0.0, "completions/min_terminated_length": 267.0, "epoch": 0.19733333333333333, "grad_norm": 0.09712018817663193, "learning_rate": 4.1666666666666667e-07, "loss": 0.0305, "num_tokens": 43056908.0, "reward": 1.1246925592422485, "reward_std": 0.27672067284584045, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.5534579753875732, "rewards/format_reward_step": 0.90234375, "step": 185 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -9.009602786047566e-08, "aux_brier/mean_group_std": 0.048765210204440045, "aux_brier/mean_r": 0.9480614807649559, "aux_brier/n_active_tok": 419.25, "aux_brier/n_groups": 26.15625, "aux_brier/n_step_records": 104.8125, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4258877840909091, "calib/avg_num_step_conf": 14.03515625, "calib/ece": 0.43604435483870974, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.004032258064516129, "calib/gap": -0.028473863636363672, "calib/mean_conf": 0.22589112903225808, "calib/mu_c": 0.21578749999999997, "calib/mu_w": 0.24426136363636364, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00838709677419355, "calib/std_conf": 0.08466852017369092, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2666.0, "completions/max_terminated_length": 2666.0, "completions/mean_length": 652.296875, "completions/mean_terminated_length": 662.6508178710938, "completions/min_length": 0.0, "completions/min_terminated_length": 221.0, "epoch": 0.1984, "grad_norm": 0.12557275593280792, "learning_rate": 3.8888888888888895e-07, "loss": 0.0041, "num_tokens": 43328936.0, "reward": 1.243396520614624, "reward_std": 0.2822433114051819, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.5517109632492065, "rewards/format_reward_step": 0.9609375, "step": 186 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.755831485818817e-07, "aux_brier/mean_group_std": 0.04175361480822199, "aux_brier/mean_r": 0.9410636697086978, "aux_brier/n_active_tok": 524.25, "aux_brier/n_groups": 38.84375, "aux_brier/n_step_records": 131.0625, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.4454494656701823, "calib/avg_num_step_conf": 16.7265625, "calib/ece": 0.33743801652892563, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.008264462809917356, "calib/gap": -0.003020185793113167, "calib/mean_conf": 0.24214876033057853, "calib/mu_c": 0.24086330935251796, "calib/mu_w": 0.24388349514563112, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002603305785123967, "calib/std_conf": 0.10650112800838431, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2782.0, "completions/max_terminated_length": 2782.0, "completions/mean_length": 764.265625, "completions/mean_terminated_length": 773.3280639648438, "completions/min_length": 0.0, "completions/min_terminated_length": 200.0, "epoch": 0.19946666666666665, "grad_norm": 0.09417112171649933, "learning_rate": 3.611111111111111e-07, "loss": 0.0624, "num_tokens": 43626132.0, "reward": 1.1626980304718018, "reward_std": 0.3148071765899658, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5961047410964966, "rewards/format_reward_step": 0.94140625, "step": 187 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 9.413561161686346e-07, "aux_brier/mean_group_std": 0.04933843877518988, "aux_brier/mean_r": 0.9390276907970772, "aux_brier/n_active_tok": 446.375, "aux_brier/n_groups": 28.28125, "aux_brier/n_step_records": 111.59375, "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.480017251293847, "calib/avg_num_step_conf": 14.62890625, "calib/ece": 0.4054462809917355, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.016693358251868906, "calib/mean_conf": 0.22695041322314052, "calib/mu_c": 0.2204662162162162, "calib/mu_w": 0.2371595744680851, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.010413223140495868, "calib/std_conf": 0.07876052743378137, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2841.0, "completions/max_terminated_length": 2841.0, "completions/mean_length": 729.7578125, "completions/mean_terminated_length": 744.2948608398438, "completions/min_length": 0.0, "completions/min_terminated_length": 220.0, "epoch": 0.20053333333333334, "grad_norm": 0.08273537456989288, "learning_rate": 3.3333333333333335e-07, "loss": 0.0031, "num_tokens": 43917022.0, "reward": 1.1846611499786377, "reward_std": 0.2963374853134155, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.5589576363563538, "rewards/format_reward_step": 0.93359375, "step": 188 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.4883606792859272e-07, "aux_brier/mean_group_std": 0.05791338370473411, "aux_brier/mean_r": 0.9341813753973884, "aux_brier/n_active_tok": 424.0, "aux_brier/n_groups": 25.375, "aux_brier/n_step_records": 106.0, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.44540510389067073, "calib/avg_num_step_conf": 13.6171875, "calib/ece": 0.3704115226337449, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.00823045267489712, "calib/gap": -0.017843397015757934, "calib/mean_conf": 0.2291769547325103, "calib/mu_c": 0.22176056338028166, "calib/mu_w": 0.2396039603960396, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007613168724279837, "calib/std_conf": 0.09103509455209602, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2937.0, "completions/max_terminated_length": 2937.0, "completions/mean_length": 682.359375, "completions/mean_terminated_length": 693.1904907226562, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.2016, "grad_norm": 0.05336812511086464, "learning_rate": 3.055555555555556e-07, "loss": -0.0115, "num_tokens": 44199474.0, "reward": 1.1750032901763916, "reward_std": 0.25221875309944153, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.5828253626823425, "rewards/format_reward_step": 0.94921875, "step": 189 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.978675969286253e-07, "aux_brier/mean_group_std": 0.047272347596613716, "aux_brier/mean_r": 0.9375782702077352, "aux_brier/n_active_tok": 493.0, "aux_brier/n_groups": 31.375, "aux_brier/n_step_records": 123.25, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.4364692138537658, "calib/avg_num_step_conf": 15.5234375, "calib/ece": 0.3354732510288066, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.00411522633744856, "calib/gap": -0.009278449697636082, "calib/mean_conf": 0.2345267489711934, "calib/mu_c": 0.2304411764705882, "calib/mu_w": 0.2397196261682243, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005164609053497943, "calib/std_conf": 0.08527540897015692, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2994.0, "completions/max_terminated_length": 2994.0, "completions/mean_length": 770.83203125, "completions/mean_terminated_length": 773.8549194335938, "completions/min_length": 0.0, "completions/min_terminated_length": 236.0, "epoch": 0.20266666666666666, "grad_norm": 0.1245565190911293, "learning_rate": 2.7777777777777776e-07, "loss": 0.0666, "num_tokens": 44502415.0, "reward": 1.1539157629013062, "reward_std": 0.31633496284484863, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.6000380516052246, "rewards/format_reward_step": 0.9453125, "step": 190 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 8.412974211724489e-08, "aux_brier/mean_group_std": 0.04102866641121818, "aux_brier/mean_r": 0.9444359094352115, "aux_brier/n_active_tok": 478.25, "aux_brier/n_groups": 30.6875, "aux_brier/n_step_records": 119.5625, "calib/answer_extract_rate": 0.9296875, "calib/auroc": 0.5599073414112616, "calib/avg_num_step_conf": 16.03515625, "calib/ece": 0.29742194092827007, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007707982893799009, "calib/mean_conf": 0.22662869198312235, "calib/mu_c": 0.23036885245901642, "calib/mu_w": 0.2226608695652174, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004641350210970464, "calib/std_conf": 0.0690357739126874, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2849.0, "completions/max_terminated_length": 2849.0, "completions/mean_length": 702.69921875, "completions/mean_terminated_length": 725.366943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 214.0, "epoch": 0.20373333333333332, "grad_norm": 0.14571070671081543, "learning_rate": 2.5000000000000004e-07, "loss": 0.0261, "num_tokens": 44786474.0, "reward": 1.0908126831054688, "reward_std": 0.3123927116394043, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6132504343986511, "rewards/format_reward_step": 0.921875, "step": 191 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.373964120924946e-07, "aux_brier/mean_group_std": 0.045150856460124775, "aux_brier/mean_r": 0.9374481307385238, "aux_brier/n_active_tok": 447.125, "aux_brier/n_groups": 32.125, "aux_brier/n_step_records": 111.78125, "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.47491337491337493, "calib/avg_num_step_conf": 14.26953125, "calib/ece": 0.3504439834024896, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.014719473319473347, "calib/mean_conf": 0.2134564315352697, "calib/mu_c": 0.20667692307692306, "calib/mu_w": 0.2213963963963964, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.012240663900414936, "calib/std_conf": 0.07706309837292116, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3069.0, "completions/max_terminated_length": 3069.0, "completions/mean_length": 714.58984375, "completions/mean_terminated_length": 725.9325561523438, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.2048, "grad_norm": 0.10657009482383728, "learning_rate": 2.2222222222222224e-07, "loss": 0.1075, "num_tokens": 45074385.0, "reward": 1.1244527101516724, "reward_std": 0.3406021296977997, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5915606021881104, "rewards/format_reward_step": 0.9375, "step": 192 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.973558316885175e-07, "aux_brier/mean_group_std": 0.033618848938848846, "aux_brier/mean_r": 0.941780056931819, "aux_brier/n_active_tok": 509.875, "aux_brier/n_groups": 36.59375, "aux_brier/n_step_records": 127.46875, "calib/answer_extract_rate": 0.9296875, "calib/auroc": 0.4246153846153846, "calib/avg_num_step_conf": 16.67578125, "calib/ece": 0.3357083333333333, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.025279720279720364, "calib/mean_conf": 0.225125, "calib/mu_c": 0.2135384615384615, "calib/mu_w": 0.23881818181818187, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009583333333333334, "calib/std_conf": 0.07517746365545107, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 3067.0, "completions/max_terminated_length": 3067.0, "completions/mean_length": 738.125, "completions/mean_terminated_length": 755.8400268554688, "completions/min_length": 0.0, "completions/min_terminated_length": 268.0, "epoch": 0.20586666666666667, "grad_norm": 0.0628858208656311, "learning_rate": 1.9444444444444447e-07, "loss": 0.1084, "num_tokens": 45369057.0, "reward": 1.119228720664978, "reward_std": 0.3596567213535309, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5862898230552673, "rewards/format_reward_step": 0.9296875, "step": 193 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.823576080723811e-07, "aux_brier/mean_group_std": 0.06014853987405492, "aux_brier/mean_r": 0.9392914650332841, "aux_brier/n_active_tok": 396.25, "aux_brier/n_groups": 26.15625, "aux_brier/n_step_records": 99.0625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6006480754124116, "calib/avg_num_step_conf": 12.50390625, "calib/ece": 0.3495564516129033, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.008064516129032258, "calib/gap": 0.017903901544907036, "calib/mean_conf": 0.20721774193548387, "calib/mu_c": 0.21544776119402986, "calib/mu_w": 0.19754385964912283, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.008225806451612902, "calib/std_conf": 0.09362718281911017, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2978.0, "completions/max_terminated_length": 2978.0, "completions/mean_length": 622.40625, "completions/mean_terminated_length": 629.7865600585938, "completions/min_length": 0.0, "completions/min_terminated_length": 220.0, "epoch": 0.20693333333333333, "grad_norm": 0.029946941882371902, "learning_rate": 1.6666666666666668e-07, "loss": 0.0616, "num_tokens": 45634337.0, "reward": 1.163004994392395, "reward_std": 0.23555651307106018, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.6207699179649353, "rewards/format_reward_step": 0.96875, "step": 194 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.687961891487348e-07, "aux_brier/mean_group_std": 0.04913224150212791, "aux_brier/mean_r": 0.9448929913266568, "aux_brier/n_active_tok": 430.0, "aux_brier/n_groups": 27.65625, "aux_brier/n_step_records": 107.5, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5977300120530333, "calib/avg_num_step_conf": 14.2734375, "calib/ece": 0.3194555102040817, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02282391857506355, "calib/mean_conf": 0.21593714285714286, "calib/mu_c": 0.22655725190839693, "calib/mu_w": 0.20373333333333338, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00034938775510204084, "calib/std_conf": 0.07311663011928271, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2646.0, "completions/max_terminated_length": 2646.0, "completions/mean_length": 663.22265625, "completions/mean_terminated_length": 676.4342651367188, "completions/min_length": 0.0, "completions/min_terminated_length": 195.0, "epoch": 0.208, "grad_norm": 0.07781007885932922, "learning_rate": 1.3888888888888888e-07, "loss": -0.0016, "num_tokens": 45910106.0, "reward": 1.1444282531738281, "reward_std": 0.3061702251434326, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.624588131904602, "rewards/format_reward_step": 0.953125, "step": 195 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.523369221218434e-07, "aux_brier/mean_group_std": 0.05294471690079702, "aux_brier/mean_r": 0.9497470029057982, "aux_brier/n_active_tok": 358.25, "aux_brier/n_groups": 19.90625, "aux_brier/n_step_records": 89.5625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.49286221820295845, "calib/avg_num_step_conf": 11.2890625, "calib/ece": 0.351336, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004259543957108702, "calib/mean_conf": 0.205064, "calib/mu_c": 0.20313868613138686, "calib/mu_w": 0.20739823008849556, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004200000000000001, "calib/std_conf": 0.04522410755338351, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2715.0, "completions/max_terminated_length": 2715.0, "completions/mean_length": 550.87890625, "completions/mean_terminated_length": 553.0392456054688, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.20906666666666668, "grad_norm": 0.11793574690818787, "learning_rate": 1.1111111111111112e-07, "loss": -0.0065, "num_tokens": 46153675.0, "reward": 1.1744682788848877, "reward_std": 0.2142975777387619, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.6119354963302612, "rewards/format_reward_step": 0.97265625, "step": 196 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.52652420484889e-07, "aux_brier/mean_group_std": 0.08507632149990002, "aux_brier/mean_r": 0.9213449414230831, "aux_brier/n_active_tok": 451.25, "aux_brier/n_groups": 27.78125, "aux_brier/n_step_records": 112.8125, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.45739233193277307, "calib/avg_num_step_conf": 14.98828125, "calib/ece": 0.30197906882591097, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.008097165991902834, "calib/gap": -0.01607548319327734, "calib/mean_conf": 0.23105736842105265, "calib/mu_c": 0.22331250000000002, "calib/mu_w": 0.23938798319327736, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007408906882591094, "calib/std_conf": 0.10174426176240516, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2619.0, "completions/max_terminated_length": 2619.0, "completions/mean_length": 679.02734375, "completions/mean_terminated_length": 689.8056030273438, "completions/min_length": 0.0, "completions/min_terminated_length": 253.0, "epoch": 0.21013333333333334, "grad_norm": 0.030979149043560028, "learning_rate": 8.333333333333334e-08, "loss": 0.005, "num_tokens": 46432562.0, "reward": 1.1333051919937134, "reward_std": 0.32122713327407837, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6191585063934326, "rewards/format_reward_step": 0.95703125, "step": 197 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.787927080889375e-07, "aux_brier/mean_group_std": 0.03967720219074483, "aux_brier/mean_r": 0.9549582358524856, "aux_brier/n_active_tok": 423.125, "aux_brier/n_groups": 26.3125, "aux_brier/n_step_records": 105.78125, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.5554054054054054, "calib/avg_num_step_conf": 13.640625, "calib/ece": 0.39092181069958853, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008062731152204794, "calib/mean_conf": 0.21813168724279838, "calib/mu_c": 0.22128378378378377, "calib/mu_w": 0.21322105263157898, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.06666135120810136, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2919.0, "completions/max_terminated_length": 2919.0, "completions/mean_length": 655.60546875, "completions/mean_terminated_length": 668.6653442382812, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.2112, "grad_norm": 0.11825759708881378, "learning_rate": 5.555555555555556e-08, "loss": 0.0297, "num_tokens": 46705781.0, "reward": 1.1942287683486938, "reward_std": 0.27700430154800415, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.5737901926040649, "rewards/format_reward_step": 0.9453125, "step": 198 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.295434076547922e-07, "aux_brier/mean_group_std": 0.060451388675561456, "aux_brier/mean_r": 0.9310014651700576, "aux_brier/n_active_tok": 473.5, "aux_brier/n_groups": 31.375, "aux_brier/n_step_records": 118.375, "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.4061172161172161, "calib/avg_num_step_conf": 15.35546875, "calib/ece": 0.40435684647302905, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.028663736263736278, "calib/mean_conf": 0.22842323651452287, "calib/mu_c": 0.21760000000000002, "calib/mu_w": 0.2462637362637363, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0051867219917012455, "calib/std_conf": 0.07099765731263125, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2980.0, "completions/max_terminated_length": 2980.0, "completions/mean_length": 727.73828125, "completions/mean_terminated_length": 742.235107421875, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.21226666666666666, "grad_norm": 0.0343790277838707, "learning_rate": 2.777777777777778e-08, "loss": 0.0531, "num_tokens": 46996282.0, "reward": 1.195791482925415, "reward_std": 0.3694891035556793, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.5566034913063049, "rewards/format_reward_step": 0.94140625, "step": 199 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.1390579573999915e-07, "aux_brier/mean_group_std": 0.05422818649641198, "aux_brier/mean_r": 0.9449914681356391, "aux_brier/n_active_tok": 444.875, "aux_brier/n_groups": 28.96875, "aux_brier/n_step_records": 111.21875, "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.4531749491426911, "calib/avg_num_step_conf": 14.47265625, "calib/ece": 0.39196680497925307, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.008298755186721992, "calib/gap": -0.0005050857308921963, "calib/mean_conf": 0.22424896265560165, "calib/mu_c": 0.22405405405405404, "calib/mu_w": 0.22455913978494624, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001053941908713693, "calib/std_conf": 0.10013735509197372, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2911.0, "completions/max_terminated_length": 2911.0, "completions/mean_length": 704.6875, "completions/mean_terminated_length": 715.873046875, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.21333333333333335, "grad_norm": 0.09823764115571976, "learning_rate": 0.0, "loss": 0.027, "num_tokens": 47284730.0, "reward": 1.1912832260131836, "reward_std": 0.23737585544586182, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.5620080232620239, "rewards/format_reward_step": 0.9375, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.04016249892651103, "train_runtime": 18559.3971, "train_samples_per_second": 2.759, "train_steps_per_second": 0.011 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 47284730, "num_train_epochs": 1, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }