{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_brier/lambda": 0.1, "aux_brier/loss": 5.791089203391117e-07, "aux_brier/mean_group_std": 0.06289231620091193, "aux_brier/mean_r": 0.4665906001184907, "aux_brier/n_active_tok": 24.615384615384617, "aux_brier/n_groups": 5.3076923076923075, "aux_brier/n_step_records": 6.153846153846154, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.3237912356853485, "learning_rate": 2.5000000000000004e-07, "loss": 0.0332, "num_tokens": 264685.0, "reward": 0.03929531201720238, "reward_std": 0.08434611558914185, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step_strict": 0.0390625, "step": 1 }, { "aux_brier/lambda": 0.10000000000000003, "aux_brier/loss": 2.461345396504181e-08, "aux_brier/mean_group_std": 0.046398653263787254, "aux_brier/mean_r": 0.430243897442093, "aux_brier/n_active_tok": 28.42105263157895, "aux_brier/n_groups": 5.894736842105263, "aux_brier/n_step_records": 7.105263157894737, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.006301195360720158, "learning_rate": 5.000000000000001e-07, "loss": 0.0643, "num_tokens": 533467.0, "reward": 0.08358447253704071, "reward_std": 0.15892045199871063, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step_strict": 0.08984375, "step": 2 }, { "aux_brier/lambda": 0.09999999999999999, "aux_brier/loss": -5.62932740422184e-09, "aux_brier/mean_group_std": 0.04158255792864185, "aux_brier/mean_r": 0.3927569710388179, "aux_brier/n_active_tok": 24.0, "aux_brier/n_groups": 5.0, "aux_brier/n_step_records": 6.0, "calib/answer_extract_rate": 0.05859375, "calib/auroc": 0.5208333333333333, "calib/avg_num_step_conf": 0.2421875, "calib/ece": 0.6027272727272728, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.9090909090909091, "calib/gap": 0.12083333333333324, "calib/mean_conf": 0.8754545454545454, "calib/mu_c": 0.9633333333333333, "calib/mu_w": 0.8425, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.046875, "calib/pce": 0.6027272727272728, "calib/std_conf": 0.2744039370365466, "calib/step_conf_rate": 0.046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 3047.0, "completions/max_terminated_length": 3047.0, "completions/mean_length": 696.83203125, "completions/mean_terminated_length": 752.6961669921875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.003430852433666587, "learning_rate": 7.5e-07, "loss": 0.0347, "num_tokens": 817112.0, "reward": 0.033635444939136505, "reward_std": 0.07786141335964203, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.017354296520352364, "rewards/format_reward_step_strict": 0.03515625, "step": 3 }, { "aux_brier/lambda": 0.1, "aux_brier/loss": -1.676789982077218e-08, "aux_brier/mean_group_std": 0.0222393300341596, "aux_brier/mean_r": 0.5159534199701812, "aux_brier/n_active_tok": 24.615384615384617, "aux_brier/n_groups": 5.538461538461538, "aux_brier/n_step_records": 6.153846153846154, "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.475, "calib/avg_num_step_conf": 0.31640625, "calib/ece": 0.7883333333333334, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.8333333333333334, "calib/gap": -0.03599999999999992, "calib/mean_conf": 0.9199999999999999, "calib/mu_c": 0.89, "calib/mu_w": 0.9259999999999999, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.07421875, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.7708333333333335, "calib/std_conf": 0.10984838035522722, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3020.0, "completions/max_terminated_length": 3020.0, "completions/mean_length": 689.0546875, "completions/mean_terminated_length": 747.4491577148438, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.004266666666666667, "grad_norm": 0.004446873906999826, "learning_rate": 1.0000000000000002e-06, "loss": 0.009, "num_tokens": 1099678.0, "reward": 0.036271486431360245, "reward_std": 0.08171947300434113, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.012273438274860382, "rewards/format_reward_step_strict": 0.04296875, "step": 4 }, { "aux_brier/lambda": 0.1, "aux_brier/loss": 1.6811566839491066e-09, "aux_brier/mean_group_std": 0.01697577418694997, "aux_brier/mean_r": 0.6002687683778114, "aux_brier/n_active_tok": 28.0, "aux_brier/n_groups": 6.6923076923076925, "aux_brier/n_step_records": 7.0, "calib/answer_extract_rate": 0.07421875, "calib/auroc": 0.35714285714285715, "calib/avg_num_step_conf": 0.359375, "calib/ece": 0.7699999999999999, "calib/final_conf_rate": 0.03125, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": 0.07428571428571418, "calib/mean_conf": 0.895, "calib/mu_c": 0.96, "calib/mu_w": 0.8857142857142858, "calib/nonempty_final_conf_rate": 0.03125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.7699999999999999, "calib/std_conf": 0.1606237840420901, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2968.0, "completions/max_terminated_length": 2968.0, "completions/mean_length": 719.578125, "completions/mean_terminated_length": 797.4545288085938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.0036794233601540327, "learning_rate": 1.25e-06, "loss": 0.0054, "num_tokens": 1390578.0, "reward": 0.027565428987145424, "reward_std": 0.07796680927276611, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.00869921874254942, "rewards/format_reward_step_strict": 0.02734375, "step": 5 }, { "aux_brier/lambda": 0.10000000000000002, "aux_brier/loss": 6.092032317495111e-08, "aux_brier/mean_group_std": 0.08271630606675115, "aux_brier/mean_r": 0.49483007053304573, "aux_brier/n_active_tok": 26.25, "aux_brier/n_groups": 5.5, "aux_brier/n_step_records": 6.5625, "calib/answer_extract_rate": 0.0859375, "calib/auroc": 0.5641025641025641, "calib/avg_num_step_conf": 0.6953125, "calib/ece": 0.745625, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.0546875, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": 0.02897435897435896, "calib/mean_conf": 0.933125, "calib/mu_c": 0.9566666666666667, "calib/mu_w": 0.9276923076923077, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.12109375, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.745625, "calib/std_conf": 0.04958814752539159, "calib/step_conf_rate": 0.09375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 3053.0, "completions/max_terminated_length": 3053.0, "completions/mean_length": 630.1640625, "completions/mean_terminated_length": 698.3636474609375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.33460941910743713, "learning_rate": 1.5e-06, "loss": 0.0258, "num_tokens": 1657852.0, "reward": 0.04356689751148224, "reward_std": 0.09231738746166229, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.01801757887005806, "rewards/format_reward_step_strict": 0.0546875, "step": 6 }, { "aux_brier/lambda": 0.09999999999999999, "aux_brier/loss": -1.59655296982919e-08, "aux_brier/mean_group_std": 0.03944458446963456, "aux_brier/mean_r": 0.4118567446097141, "aux_brier/n_active_tok": 24.0, "aux_brier/n_groups": 5.3, "aux_brier/n_step_records": 6.0, "calib/answer_extract_rate": 0.0859375, "calib/auroc": 0.20370370370370372, "calib/avg_num_step_conf": 0.234375, "calib/ece": 0.7083333333333334, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.9166666666666666, "calib/gap": -0.028888888888888853, "calib/mean_conf": 0.9583333333333334, "calib/mu_c": 0.9366666666666666, "calib/mu_w": 0.9655555555555555, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.09375, "calib/nonempty_step_conf_rate": 0.046875, "calib/pce": 0.7083333333333334, "calib/std_conf": 0.02939198681424732, "calib/step_conf_rate": 0.046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 3059.0, "completions/max_terminated_length": 3059.0, "completions/mean_length": 742.8125, "completions/mean_terminated_length": 826.7825927734375, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.007466666666666667, "grad_norm": 0.002587189432233572, "learning_rate": 1.75e-06, "loss": 0.0202, "num_tokens": 1955436.0, "reward": 0.03569609299302101, "reward_std": 0.07640209794044495, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.00997187476605177, "rewards/format_reward_step_strict": 0.03515625, "step": 7 }, { "aux_brier/lambda": 0.10000000000000002, "aux_brier/loss": -6.6088248599151165e-09, "aux_brier/mean_group_std": 0.016897729228566256, "aux_brier/mean_r": 0.4548081345696824, "aux_brier/n_active_tok": 23.764705882352942, "aux_brier/n_groups": 5.411764705882353, "aux_brier/n_step_records": 5.9411764705882355, "calib/answer_extract_rate": 0.11328125, "calib/auroc": 0.5, "calib/avg_num_step_conf": 0.39453125, "calib/ece": 0.5121041666666667, "calib/final_conf_rate": 0.09375, "calib/format_rate": 0.0703125, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": 0.027035714285714385, "calib/mean_conf": 0.8162291666666667, "calib/mu_c": 0.8320000000000001, "calib/mu_w": 0.8049642857142857, "calib/nonempty_final_conf_rate": 0.09375, "calib/nonempty_reasoning_rate": 0.12890625, "calib/nonempty_step_conf_rate": 0.0859375, "calib/pce": 0.4558333333333334, "calib/std_conf": 0.3177236082393609, "calib/step_conf_rate": 0.0859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3048.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 602.73828125, "completions/mean_terminated_length": 662.2360229492188, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.008533333333333334, "grad_norm": 0.004288786556571722, "learning_rate": 2.0000000000000003e-06, "loss": 0.0139, "num_tokens": 2216249.0, "reward": 0.08339668065309525, "reward_std": 0.13205386698246002, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.036711715161800385, "rewards/format_reward_step_strict": 0.0703125, "step": 8 }, { "aux_brier/lambda": 0.1, "aux_brier/loss": -6.720305435946119e-08, "aux_brier/mean_group_std": 0.011471152313687427, "aux_brier/mean_r": 0.41468368217524537, "aux_brier/n_active_tok": 20.857142857142858, "aux_brier/n_groups": 4.571428571428571, "aux_brier/n_step_records": 5.214285714285714, "calib/answer_extract_rate": 0.08984375, "calib/auroc": 0.6018518518518519, "calib/avg_num_step_conf": 0.2890625, "calib/ece": 0.5273333333333334, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.8666666666666667, "calib/gap": 0.060000000000000164, "calib/mean_conf": 0.9273333333333335, "calib/mu_c": 0.9633333333333334, "calib/mu_w": 0.9033333333333332, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.5273333333333334, "calib/std_conf": 0.1376694914964419, "calib/step_conf_rate": 0.06640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 3021.0, "completions/max_terminated_length": 3021.0, "completions/mean_length": 663.39453125, "completions/mean_terminated_length": 748.1453247070312, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0096, "grad_norm": 0.004142031539231539, "learning_rate": 2.25e-06, "loss": 0.0572, "num_tokens": 2493614.0, "reward": 0.05300693213939667, "reward_std": 0.11931192874908447, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.02452773228287697, "rewards/format_reward_step_strict": 0.046875, "step": 9 }, { "aux_brier/lambda": 0.09999999999999999, "aux_brier/loss": 1.40633008038904e-06, "aux_brier/mean_group_std": 0.08625601403947647, "aux_brier/mean_r": 0.41661931629281, "aux_brier/n_active_tok": 32.0, "aux_brier/n_groups": 5.2, "aux_brier/n_step_records": 8.0, "calib/answer_extract_rate": 0.0625, "calib/avg_num_step_conf": 0.3125, "calib/ece": 0.9378571428571427, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.8571428571428571, "calib/mean_conf": 0.9378571428571431, "calib/mu_c": NaN, "calib/mu_w": 0.9378571428571431, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.08984375, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.9378571428571427, "calib/std_conf": 0.0655938337072027, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3063.0, "completions/max_terminated_length": 3063.0, "completions/mean_length": 656.359375, "completions/mean_terminated_length": 706.0000610351562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.016049616038799286, "learning_rate": 2.5e-06, "loss": 0.0226, "num_tokens": 2768442.0, "reward": 0.01897294819355011, "reward_std": 0.040863893926143646, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0055792974308133125, "rewards/format_reward_step_strict": 0.03515625, "step": 10 }, { "aux_brier/lambda": 0.10000000000000003, "aux_brier/loss": -1.3934533342011926e-08, "aux_brier/mean_group_std": 0.07243567145754536, "aux_brier/mean_r": 0.4100341393936979, "aux_brier/n_active_tok": 36.166666666666664, "aux_brier/n_groups": 5.75, "aux_brier/n_step_records": 9.041666666666666, "calib/answer_extract_rate": 0.17578125, "calib/auroc": 0.8355555555555555, "calib/avg_num_step_conf": 0.85546875, "calib/ece": 0.6752941176470588, "calib/final_conf_rate": 0.1328125, "calib/format_rate": 0.1171875, "calib/frac_conf_gt_0.9": 0.8529411764705882, "calib/gap": 0.06195555555555554, "calib/mean_conf": 0.9400000000000001, "calib/mu_c": 0.9855555555555556, "calib/mu_w": 0.9236000000000001, "calib/nonempty_final_conf_rate": 0.1328125, "calib/nonempty_reasoning_rate": 0.22265625, "calib/nonempty_step_conf_rate": 0.17578125, "calib/pce": 0.6752941176470588, "calib/std_conf": 0.09032620621620152, "calib/step_conf_rate": 0.17578125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3010.0, "completions/max_terminated_length": 3010.0, "completions/mean_length": 677.8359375, "completions/mean_terminated_length": 735.2796630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.011733333333333333, "grad_norm": 0.7488620281219482, "learning_rate": 2.7500000000000004e-06, "loss": 0.0113, "num_tokens": 3046448.0, "reward": 0.1034020483493805, "reward_std": 0.13478590548038483, "rewards/accuracy_reward_step": 0.03515625, "rewards/final_brier_reward_step": 0.04642070084810257, "rewards/format_reward_step_strict": 0.11328125, "step": 11 }, { "aux_brier/lambda": 0.10000000000000002, "aux_brier/loss": 6.589414259741556e-08, "aux_brier/mean_group_std": 0.05026715882137679, "aux_brier/mean_r": 0.5114636334870325, "aux_brier/n_active_tok": 35.23809523809524, "aux_brier/n_groups": 6.285714285714286, "aux_brier/n_step_records": 8.80952380952381, "calib/answer_extract_rate": 0.16796875, "calib/auroc": 0.6019736842105263, "calib/avg_num_step_conf": 0.73828125, "calib/ece": 0.6296296296296297, "calib/final_conf_rate": 0.10546875, "calib/format_rate": 0.09375, "calib/frac_conf_gt_0.9": 0.8518518518518519, "calib/gap": -0.01552631578947361, "calib/mean_conf": 0.9259259259259258, "calib/mu_c": 0.915, "calib/mu_w": 0.9305263157894736, "calib/nonempty_final_conf_rate": 0.10546875, "calib/nonempty_reasoning_rate": 0.1953125, "calib/nonempty_step_conf_rate": 0.1484375, "calib/pce": 0.6296296296296297, "calib/std_conf": 0.11873591173378746, "calib/step_conf_rate": 0.1484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2959.0, "completions/max_terminated_length": 2959.0, "completions/mean_length": 593.83984375, "completions/mean_terminated_length": 652.459228515625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.6210219860076904, "learning_rate": 3e-06, "loss": 0.0426, "num_tokens": 3302647.0, "reward": 0.08681464195251465, "reward_std": 0.14767587184906006, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.03475859388709068, "rewards/format_reward_step_strict": 0.09375, "step": 12 }, { "aux_brier/lambda": 0.10000000000000002, "aux_brier/loss": 9.382572043747928e-08, "aux_brier/mean_group_std": 0.04230635742120345, "aux_brier/mean_r": 0.41870340991872157, "aux_brier/n_active_tok": 35.4, "aux_brier/n_groups": 6.4, "aux_brier/n_step_records": 8.85, "calib/answer_extract_rate": 0.15234375, "calib/auroc": 0.5178571428571429, "calib/avg_num_step_conf": 0.69140625, "calib/ece": 0.7665625, "calib/final_conf_rate": 0.125, "calib/format_rate": 0.09375, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": 0.0724999999999999, "calib/mean_conf": 0.8915625, "calib/mu_c": 0.955, "calib/mu_w": 0.8825000000000001, "calib/nonempty_final_conf_rate": 0.125, "calib/nonempty_reasoning_rate": 0.17578125, "calib/nonempty_step_conf_rate": 0.125, "calib/pce": 0.7665625, "calib/std_conf": 0.23365451759756328, "calib/step_conf_rate": 0.125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2970.0, "completions/max_terminated_length": 2970.0, "completions/mean_length": 632.59765625, "completions/mean_terminated_length": 686.2076416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.1094781756401062, "learning_rate": 3.2500000000000002e-06, "loss": 0.05, "num_tokens": 3569184.0, "reward": 0.06870673596858978, "reward_std": 0.1556394249200821, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.024826953187584877, "rewards/format_reward_step_strict": 0.09375, "step": 13 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.6569791104339695e-08, "aux_brier/mean_group_std": 0.06792149563949823, "aux_brier/mean_r": 0.4946154840274279, "aux_brier/n_active_tok": 30.344827586206897, "aux_brier/n_groups": 5.0, "aux_brier/n_step_records": 7.586206896551724, "calib/answer_extract_rate": 0.23046875, "calib/auroc": 0.46756756756756757, "calib/avg_num_step_conf": 0.8671875, "calib/ece": 0.646595744680851, "calib/final_conf_rate": 0.18359375, "calib/format_rate": 0.16015625, "calib/frac_conf_gt_0.9": 0.574468085106383, "calib/gap": 0.04943243243243245, "calib/mean_conf": 0.8380851063829787, "calib/mu_c": 0.877, "calib/mu_w": 0.8275675675675676, "calib/nonempty_final_conf_rate": 0.18359375, "calib/nonempty_reasoning_rate": 0.25390625, "calib/nonempty_step_conf_rate": 0.1953125, "calib/pce": 0.6359574468085106, "calib/std_conf": 0.25984969712764555, "calib/step_conf_rate": 0.1953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2976.0, "completions/max_terminated_length": 2976.0, "completions/mean_length": 694.9609375, "completions/mean_terminated_length": 726.1632080078125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.014933333333333333, "grad_norm": 0.08381670713424683, "learning_rate": 3.5e-06, "loss": 0.0793, "num_tokens": 3852494.0, "reward": 0.13864755630493164, "reward_std": 0.23478180170059204, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.06240273267030716, "rewards/format_reward_step_strict": 0.16015625, "step": 14 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.5840396259049243e-08, "aux_brier/mean_group_std": 0.10441008311054162, "aux_brier/mean_r": 0.44875280923062527, "aux_brier/n_active_tok": 38.3448275862069, "aux_brier/n_groups": 6.344827586206897, "aux_brier/n_step_records": 9.586206896551724, "calib/answer_extract_rate": 0.234375, "calib/auroc": 0.39, "calib/avg_num_step_conf": 1.1171875, "calib/ece": 0.5867777777777777, "calib/final_conf_rate": 0.17578125, "calib/format_rate": 0.16015625, "calib/frac_conf_gt_0.9": 0.7777777777777778, "calib/gap": 0.016833333333333145, "calib/mean_conf": 0.920111111111111, "calib/mu_c": 0.9313333333333332, "calib/mu_w": 0.9145000000000001, "calib/nonempty_final_conf_rate": 0.17578125, "calib/nonempty_reasoning_rate": 0.28515625, "calib/nonempty_step_conf_rate": 0.234375, "calib/pce": 0.5867777777777777, "calib/std_conf": 0.14971264657072184, "calib/step_conf_rate": 0.234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3064.0, "completions/max_terminated_length": 3064.0, "completions/mean_length": 568.4140625, "completions/mean_terminated_length": 627.2155151367188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.012408778071403503, "learning_rate": 3.7500000000000005e-06, "loss": 0.0791, "num_tokens": 4105888.0, "reward": 0.15055759251117706, "reward_std": 0.2482212483882904, "rewards/accuracy_reward_step": 0.05859375, "rewards/final_brier_reward_step": 0.07098037004470825, "rewards/format_reward_step_strict": 0.1484375, "step": 15 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -8.537876257327266e-08, "aux_brier/mean_group_std": 0.09054687516205945, "aux_brier/mean_r": 0.5068588152499831, "aux_brier/n_active_tok": 37.6551724137931, "aux_brier/n_groups": 5.793103448275862, "aux_brier/n_step_records": 9.413793103448276, "calib/answer_extract_rate": 0.21484375, "calib/auroc": 0.582175925925926, "calib/avg_num_step_conf": 1.0859375, "calib/ece": 0.6368749999999999, "calib/final_conf_rate": 0.1875, "calib/format_rate": 0.1640625, "calib/frac_conf_gt_0.9": 0.7291666666666666, "calib/gap": 0.06750000000000023, "calib/mean_conf": 0.8868749999999999, "calib/mu_c": 0.9375000000000001, "calib/mu_w": 0.8699999999999999, "calib/nonempty_final_conf_rate": 0.1875, "calib/nonempty_reasoning_rate": 0.2734375, "calib/nonempty_step_conf_rate": 0.23046875, "calib/pce": 0.6368749999999999, "calib/std_conf": 0.19325238517286145, "calib/step_conf_rate": 0.23046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 3060.0, "completions/max_terminated_length": 3060.0, "completions/mean_length": 659.6953125, "completions/mean_terminated_length": 712.5822143554688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.017066666666666667, "grad_norm": 0.37085506319999695, "learning_rate": 4.000000000000001e-06, "loss": 0.0919, "num_tokens": 4383618.0, "reward": 0.1381245106458664, "reward_std": 0.26648402214050293, "rewards/accuracy_reward_step": 0.046875, "rewards/final_brier_reward_step": 0.06031055003404617, "rewards/format_reward_step_strict": 0.15234375, "step": 16 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.2811494829278627e-08, "aux_brier/mean_group_std": 0.10850807264933728, "aux_brier/mean_r": 0.4261221348054745, "aux_brier/n_active_tok": 50.32258064516129, "aux_brier/n_groups": 7.741935483870968, "aux_brier/n_step_records": 12.580645161290322, "calib/answer_extract_rate": 0.296875, "calib/auroc": 0.5654761904761905, "calib/avg_num_step_conf": 1.55859375, "calib/ece": 0.6929032258064516, "calib/final_conf_rate": 0.2421875, "calib/format_rate": 0.20703125, "calib/frac_conf_gt_0.9": 0.7580645161290323, "calib/gap": 0.04687500000000011, "calib/mean_conf": 0.9187096774193547, "calib/mu_c": 0.9550000000000001, "calib/mu_w": 0.908125, "calib/nonempty_final_conf_rate": 0.2421875, "calib/nonempty_reasoning_rate": 0.359375, "calib/nonempty_step_conf_rate": 0.28515625, "calib/pce": 0.6929032258064516, "calib/std_conf": 0.1391040904174413, "calib/step_conf_rate": 0.28515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2709.0, "completions/max_terminated_length": 2709.0, "completions/mean_length": 511.2578125, "completions/mean_terminated_length": 545.3416748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.018133333333333335, "grad_norm": 0.1399787813425064, "learning_rate": 4.25e-06, "loss": 0.083, "num_tokens": 4618028.0, "reward": 0.1719304621219635, "reward_std": 0.2832571268081665, "rewards/accuracy_reward_step": 0.0546875, "rewards/final_brier_reward_step": 0.07053437829017639, "rewards/format_reward_step_strict": 0.19921875, "step": 17 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.0705595424968333e-08, "aux_brier/mean_group_std": 0.10074239404875009, "aux_brier/mean_r": 0.4890022355941549, "aux_brier/n_active_tok": 39.06666666666667, "aux_brier/n_groups": 5.566666666666666, "aux_brier/n_step_records": 9.766666666666667, "calib/answer_extract_rate": 0.26953125, "calib/auroc": 0.5829787234042552, "calib/avg_num_step_conf": 1.15234375, "calib/ece": 0.6591935483870968, "calib/final_conf_rate": 0.2421875, "calib/format_rate": 0.21875, "calib/frac_conf_gt_0.9": 0.7580645161290323, "calib/gap": 0.03896453900709218, "calib/mean_conf": 0.9011290322580646, "calib/mu_c": 0.9306666666666668, "calib/mu_w": 0.8917021276595746, "calib/nonempty_final_conf_rate": 0.2421875, "calib/nonempty_reasoning_rate": 0.296875, "calib/nonempty_step_conf_rate": 0.25, "calib/pce": 0.6591935483870968, "calib/std_conf": 0.17225360934192796, "calib/step_conf_rate": 0.25, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 3057.0, "completions/max_terminated_length": 3057.0, "completions/mean_length": 642.140625, "completions/mean_terminated_length": 670.9713745117188, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0192, "grad_norm": 0.41152575612068176, "learning_rate": 4.5e-06, "loss": 0.1446, "num_tokens": 4893136.0, "reward": 0.1874864101409912, "reward_std": 0.30030369758605957, "rewards/accuracy_reward_step": 0.05859375, "rewards/final_brier_reward_step": 0.08588320016860962, "rewards/format_reward_step_strict": 0.21484375, "step": 18 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.464701352901001e-08, "aux_brier/mean_group_std": 0.16125124480125577, "aux_brier/mean_r": 0.5427735700486835, "aux_brier/n_active_tok": 73.0, "aux_brier/n_groups": 6.96875, "aux_brier/n_step_records": 18.25, "calib/answer_extract_rate": 0.52734375, "calib/auroc": 0.3931818181818182, "calib/avg_num_step_conf": 2.37890625, "calib/ece": 0.7021097345132743, "calib/final_conf_rate": 0.44140625, "calib/format_rate": 0.3671875, "calib/frac_conf_gt_0.9": 0.6637168141592921, "calib/gap": -0.11874327272727303, "calib/mean_conf": 0.8644566371681416, "calib/mu_c": 0.7719839999999999, "calib/mu_w": 0.8907272727272729, "calib/nonempty_final_conf_rate": 0.44140625, "calib/nonempty_reasoning_rate": 0.609375, "calib/nonempty_step_conf_rate": 0.48046875, "calib/pce": 0.6726637168141594, "calib/std_conf": 0.21202785961379741, "calib/step_conf_rate": 0.48046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2379.0, "completions/max_terminated_length": 2379.0, "completions/mean_length": 440.4296875, "completions/mean_terminated_length": 454.6370849609375, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.020266666666666665, "grad_norm": 0.08732657134532928, "learning_rate": 4.75e-06, "loss": 0.0775, "num_tokens": 5110646.0, "reward": 0.2961248755455017, "reward_std": 0.3688579201698303, "rewards/accuracy_reward_step": 0.1015625, "rewards/final_brier_reward_step": 0.12199944257736206, "rewards/format_reward_step_strict": 0.328125, "step": 19 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.4677526945278174e-09, "aux_brier/mean_group_std": 0.22795472360291566, "aux_brier/mean_r": 0.551406513573312, "aux_brier/n_active_tok": 88.125, "aux_brier/n_groups": 6.90625, "aux_brier/n_step_records": 22.03125, "calib/answer_extract_rate": 0.671875, "calib/auroc": 0.5786764705882353, "calib/avg_num_step_conf": 2.82421875, "calib/ece": 0.6226138364779874, "calib/final_conf_rate": 0.62109375, "calib/format_rate": 0.515625, "calib/frac_conf_gt_0.9": 0.6981132075471698, "calib/gap": 0.05920924369747904, "calib/mean_conf": 0.8741861635220127, "calib/mu_c": 0.9185000000000001, "calib/mu_w": 0.859290756302521, "calib/nonempty_final_conf_rate": 0.62109375, "calib/nonempty_reasoning_rate": 0.7265625, "calib/nonempty_step_conf_rate": 0.6171875, "calib/pce": 0.6226138364779874, "calib/std_conf": 0.201787693548619, "calib/step_conf_rate": 0.6171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2937.0, "completions/max_terminated_length": 2937.0, "completions/mean_length": 370.1953125, "completions/mean_terminated_length": 385.243896484375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.021333333333333333, "grad_norm": 0.04635249078273773, "learning_rate": 5e-06, "loss": 0.134, "num_tokens": 5310288.0, "reward": 0.4592316746711731, "reward_std": 0.46332481503486633, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.21192675828933716, "rewards/format_reward_step_strict": 0.4921875, "step": 20 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.8800755660630664e-08, "aux_brier/mean_group_std": 0.2036430602918916, "aux_brier/mean_r": 0.4884979263540808, "aux_brier/n_active_tok": 108.125, "aux_brier/n_groups": 7.9375, "aux_brier/n_step_records": 27.03125, "calib/answer_extract_rate": 0.765625, "calib/auroc": 0.47993197278911565, "calib/avg_num_step_conf": 3.4453125, "calib/ece": 0.6724153297682708, "calib/final_conf_rate": 0.73046875, "calib/format_rate": 0.65234375, "calib/frac_conf_gt_0.9": 0.6631016042780749, "calib/gap": 0.003720521541950017, "calib/mean_conf": 0.8628253119429589, "calib/mu_c": 0.8657499999999999, "calib/mu_w": 0.8620294784580499, "calib/nonempty_final_conf_rate": 0.73046875, "calib/nonempty_reasoning_rate": 0.8671875, "calib/nonempty_step_conf_rate": 0.77734375, "calib/pce": 0.6606684491978608, "calib/std_conf": 0.2257502058947101, "calib/step_conf_rate": 0.77734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2741.0, "completions/max_terminated_length": 2741.0, "completions/mean_length": 350.2421875, "completions/mean_terminated_length": 354.395263671875, "completions/min_length": 0.0, "completions/min_terminated_length": 13.0, "epoch": 0.0224, "grad_norm": 0.024518921971321106, "learning_rate": 4.9722222222222224e-06, "loss": 0.1386, "num_tokens": 5502910.0, "reward": 0.5317217111587524, "reward_std": 0.463791161775589, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.2440744787454605, "rewards/format_reward_step_strict": 0.62109375, "step": 21 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.4348118521366757e-08, "aux_brier/mean_group_std": 0.21018110046635474, "aux_brier/mean_r": 0.5201762509289959, "aux_brier/n_active_tok": 118.375, "aux_brier/n_groups": 7.875, "aux_brier/n_step_records": 29.59375, "calib/answer_extract_rate": 0.79296875, "calib/auroc": 0.6036585365853657, "calib/avg_num_step_conf": 3.77734375, "calib/ece": 0.676582233502538, "calib/final_conf_rate": 0.76953125, "calib/format_rate": 0.6953125, "calib/frac_conf_gt_0.9": 0.7055837563451777, "calib/gap": 0.05874066604127581, "calib/mean_conf": 0.8847040609137056, "calib/mu_c": 0.9312195121951219, "calib/mu_w": 0.8724788461538461, "calib/nonempty_final_conf_rate": 0.76953125, "calib/nonempty_reasoning_rate": 0.86328125, "calib/nonempty_step_conf_rate": 0.796875, "calib/pce": 0.676582233502538, "calib/std_conf": 0.18038984344476477, "calib/step_conf_rate": 0.796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2918.0, "completions/max_terminated_length": 2918.0, "completions/mean_length": 349.08984375, "completions/mean_terminated_length": 353.229248046875, "completions/min_length": 0.0, "completions/min_terminated_length": 20.0, "epoch": 0.023466666666666667, "grad_norm": 0.24271811544895172, "learning_rate": 4.944444444444445e-06, "loss": 0.1173, "num_tokens": 5694093.0, "reward": 0.5552133321762085, "reward_std": 0.5039442181587219, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.25991591811180115, "rewards/format_reward_step_strict": 0.65234375, "step": 22 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -9.517676952119913e-09, "aux_brier/mean_group_std": 0.20458716808645896, "aux_brier/mean_r": 0.46074778540779154, "aux_brier/n_active_tok": 119.875, "aux_brier/n_groups": 8.0625, "aux_brier/n_step_records": 29.96875, "calib/answer_extract_rate": 0.8046875, "calib/auroc": 0.4563636363636364, "calib/avg_num_step_conf": 3.7734375, "calib/ece": 0.6743137254901962, "calib/final_conf_rate": 0.796875, "calib/format_rate": 0.71484375, "calib/frac_conf_gt_0.9": 0.7647058823529411, "calib/gap": -0.01882597402597408, "calib/mean_conf": 0.8994117647058825, "calib/mu_c": 0.8852, "calib/mu_w": 0.9040259740259741, "calib/nonempty_final_conf_rate": 0.796875, "calib/nonempty_reasoning_rate": 0.8984375, "calib/nonempty_step_conf_rate": 0.8515625, "calib/pce": 0.6643137254901962, "calib/std_conf": 0.16784285092517265, "calib/step_conf_rate": 0.8515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2896.0, "completions/max_terminated_length": 2896.0, "completions/mean_length": 327.28125, "completions/mean_terminated_length": 327.28125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "epoch": 0.024533333333333334, "grad_norm": 0.030861619859933853, "learning_rate": 4.9166666666666665e-06, "loss": 0.1793, "num_tokens": 5881813.0, "reward": 0.6187982559204102, "reward_std": 0.5070070624351501, "rewards/accuracy_reward_step": 0.203125, "rewards/final_brier_reward_step": 0.24863046407699585, "rewards/format_reward_step_strict": 0.70703125, "step": 23 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.1131297938948848e-08, "aux_brier/mean_group_std": 0.1958564425362323, "aux_brier/mean_r": 0.5294594716014864, "aux_brier/n_active_tok": 136.375, "aux_brier/n_groups": 8.90625, "aux_brier/n_step_records": 34.09375, "calib/answer_extract_rate": 0.85546875, "calib/auroc": 0.5885695187165776, "calib/avg_num_step_conf": 4.43359375, "calib/ece": 0.6848598130841121, "calib/final_conf_rate": 0.8359375, "calib/format_rate": 0.78515625, "calib/frac_conf_gt_0.9": 0.719626168224299, "calib/gap": 0.04093048128342236, "calib/mean_conf": 0.8811214953271028, "calib/mu_c": 0.9136363636363636, "calib/mu_w": 0.8727058823529412, "calib/nonempty_final_conf_rate": 0.8359375, "calib/nonempty_reasoning_rate": 0.921875, "calib/nonempty_step_conf_rate": 0.87890625, "calib/pce": 0.6801869158878504, "calib/std_conf": 0.1940051406140566, "calib/step_conf_rate": 0.87890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2692.0, "completions/max_terminated_length": 2692.0, "completions/mean_length": 303.0078125, "completions/mean_terminated_length": 306.6007995605469, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.0256, "grad_norm": 0.3642455041408539, "learning_rate": 4.888888888888889e-06, "loss": 0.0942, "num_tokens": 6063895.0, "reward": 0.6258395910263062, "reward_std": 0.45120394229888916, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.2846086025238037, "rewards/format_reward_step_strict": 0.75, "step": 24 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.354167785688203e-08, "aux_brier/mean_group_std": 0.19292708907994796, "aux_brier/mean_r": 0.5279369356274024, "aux_brier/n_active_tok": 140.0, "aux_brier/n_groups": 7.71875, "aux_brier/n_step_records": 35.0, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5089924670433146, "calib/avg_num_step_conf": 4.4453125, "calib/ece": 0.6421603375527426, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.8984375, "calib/frac_conf_gt_0.9": 0.6877637130801688, "calib/gap": 0.012508474576271245, "calib/mean_conf": 0.8953248945147679, "calib/mu_c": 0.9046666666666666, "calib/mu_w": 0.8921581920903954, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.6421603375527426, "calib/std_conf": 0.1481714707320978, "calib/step_conf_rate": 0.95703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 706.0, "completions/max_terminated_length": 706.0, "completions/mean_length": 245.2578125, "completions/mean_terminated_length": 246.21961975097656, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.02666666666666667, "grad_norm": 0.23752334713935852, "learning_rate": 4.861111111111111e-06, "loss": -0.0143, "num_tokens": 6229905.0, "reward": 0.7569185495376587, "reward_std": 0.44906532764434814, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.3479868769645691, "rewards/format_reward_step_strict": 0.86328125, "step": 25 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 9.23448233158819e-09, "aux_brier/mean_group_std": 0.1978469486302426, "aux_brier/mean_r": 0.5483203594789697, "aux_brier/n_active_tok": 149.5, "aux_brier/n_groups": 8.4375, "aux_brier/n_step_records": 37.375, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.4829678735339113, "calib/avg_num_step_conf": 4.71875, "calib/ece": 0.6882310924369749, "calib/final_conf_rate": 0.9296875, "calib/format_rate": 0.87109375, "calib/frac_conf_gt_0.9": 0.7058823529411765, "calib/gap": -0.023430392656807686, "calib/mean_conf": 0.8944579831932773, "calib/mu_c": 0.876245283018868, "calib/mu_w": 0.8996756756756756, "calib/nonempty_final_conf_rate": 0.9296875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.68, "calib/std_conf": 0.16985796191578595, "calib/step_conf_rate": 0.953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1587.0, "completions/max_terminated_length": 1587.0, "completions/mean_length": 271.40625, "completions/mean_terminated_length": 272.4706115722656, "completions/min_length": 0.0, "completions/min_terminated_length": 26.0, "epoch": 0.027733333333333332, "grad_norm": 0.04942494258284569, "learning_rate": 4.833333333333333e-06, "loss": 0.0014, "num_tokens": 6404625.0, "reward": 0.7099546194076538, "reward_std": 0.4411212205886841, "rewards/accuracy_reward_step": 0.21484375, "rewards/final_brier_reward_step": 0.29294368624687195, "rewards/format_reward_step_strict": 0.84375, "step": 26 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 8.965374892130895e-09, "aux_brier/mean_group_std": 0.1958536275388754, "aux_brier/mean_r": 0.5246195927248277, "aux_brier/n_active_tok": 140.0, "aux_brier/n_groups": 7.65625, "aux_brier/n_step_records": 35.0, "calib/answer_extract_rate": 0.9140625, "calib/auroc": 0.5, "calib/avg_num_step_conf": 4.45703125, "calib/ece": 0.7244303797468354, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.87109375, "calib/frac_conf_gt_0.9": 0.6877637130801688, "calib/gap": 0.01561774461028198, "calib/mean_conf": 0.8681434599156116, "calib/mu_c": 0.8813888888888889, "calib/mu_w": 0.8657711442786069, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.7203375527426161, "calib/std_conf": 0.21613973369684655, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2481.0, "completions/max_terminated_length": 2481.0, "completions/mean_length": 256.06640625, "completions/mean_terminated_length": 256.06640625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.0288, "grad_norm": 0.26420357823371887, "learning_rate": 4.805555555555556e-06, "loss": 0.0394, "num_tokens": 6575394.0, "reward": 0.6430661082267761, "reward_std": 0.4162224531173706, "rewards/accuracy_reward_step": 0.14453125, "rewards/final_brier_reward_step": 0.2832019329071045, "rewards/format_reward_step_strict": 0.85546875, "step": 27 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.8132602324093536e-08, "aux_brier/mean_group_std": 0.2016200024171586, "aux_brier/mean_r": 0.5291456947085008, "aux_brier/n_active_tok": 141.25, "aux_brier/n_groups": 8.0625, "aux_brier/n_step_records": 35.3125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.49616840966462317, "calib/avg_num_step_conf": 4.5234375, "calib/ece": 0.6449392712550608, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.6275303643724697, "calib/gap": 0.007981428056256568, "calib/mean_conf": 0.8757894736842105, "calib/mu_c": 0.8818644067796609, "calib/mu_w": 0.8738829787234044, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.640931174089069, "calib/std_conf": 0.18055045222908706, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2386.0, "completions/max_terminated_length": 2386.0, "completions/mean_length": 249.8125, "completions/mean_terminated_length": 249.8125, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "epoch": 0.029866666666666666, "grad_norm": 0.07079173624515533, "learning_rate": 4.777777777777778e-06, "loss": 0.0775, "num_tokens": 6746290.0, "reward": 0.7679669857025146, "reward_std": 0.4454382359981537, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.353117972612381, "rewards/format_reward_step_strict": 0.8984375, "step": 28 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.849219413851415e-09, "aux_brier/mean_group_std": 0.2091342825495418, "aux_brier/mean_r": 0.5685604715827565, "aux_brier/n_active_tok": 148.875, "aux_brier/n_groups": 7.9375, "aux_brier/n_step_records": 37.21875, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.4083418497700562, "calib/avg_num_step_conf": 4.671875, "calib/ece": 0.7211270491803279, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.610655737704918, "calib/gap": -0.027967552376085614, "calib/mean_conf": 0.8657172131147541, "calib/mu_c": 0.8421052631578949, "calib/mu_w": 0.8700728155339805, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.7155532786885246, "calib/std_conf": 0.20098374340613906, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2379.0, "completions/max_terminated_length": 2379.0, "completions/mean_length": 268.42578125, "completions/mean_terminated_length": 268.42578125, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.030933333333333334, "grad_norm": 0.09916239231824875, "learning_rate": 4.75e-06, "loss": 0.0289, "num_tokens": 6922135.0, "reward": 0.6932663917541504, "reward_std": 0.34607529640197754, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.2965030074119568, "rewards/format_reward_step_strict": 0.92578125, "step": 29 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.016452467645262e-09, "aux_brier/mean_group_std": 0.19812405039959854, "aux_brier/mean_r": 0.5731785546310743, "aux_brier/n_active_tok": 153.25, "aux_brier/n_groups": 9.8125, "aux_brier/n_step_records": 38.3125, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.49194444444444446, "calib/avg_num_step_conf": 4.7890625, "calib/ece": 0.6851836734693879, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.5795918367346938, "calib/gap": 0.017733333333333268, "calib/mean_conf": 0.8688571428571429, "calib/mu_c": 0.8833333333333333, "calib/mu_w": 0.8656, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.6851836734693879, "calib/std_conf": 0.17370958640260262, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2526.0, "completions/max_terminated_length": 2526.0, "completions/mean_length": 263.88671875, "completions/mean_terminated_length": 263.88671875, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "epoch": 0.032, "grad_norm": 0.4797416925430298, "learning_rate": 4.722222222222222e-06, "loss": 0.0018, "num_tokens": 7096674.0, "reward": 0.7211823463439941, "reward_std": 0.36588066816329956, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.3300417959690094, "rewards/format_reward_step_strict": 0.91796875, "step": 30 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.3833868207100437e-09, "aux_brier/mean_group_std": 0.2112627605462767, "aux_brier/mean_r": 0.5961313745848398, "aux_brier/n_active_tok": 162.0, "aux_brier/n_groups": 8.78125, "aux_brier/n_step_records": 40.5, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4347704991087344, "calib/avg_num_step_conf": 5.08203125, "calib/ece": 0.6572983870967744, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.5443548387096774, "calib/gap": -0.00869429590017834, "calib/mean_conf": 0.8273790322580645, "calib/mu_c": 0.8202272727272728, "calib/mu_w": 0.8289215686274511, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.6536290322580646, "calib/std_conf": 0.2199578072715507, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1720.0, "completions/max_terminated_length": 1720.0, "completions/mean_length": 261.76171875, "completions/mean_terminated_length": 261.76171875, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.03306666666666667, "grad_norm": 0.6870817542076111, "learning_rate": 4.694444444444445e-06, "loss": -0.0057, "num_tokens": 7269597.0, "reward": 0.73150634765625, "reward_std": 0.3738311529159546, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.363525390625, "rewards/format_reward_step_strict": 0.9375, "step": 31 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.489066333073332e-08, "aux_brier/mean_group_std": 0.21115774034924586, "aux_brier/mean_r": 0.6755294313460579, "aux_brier/n_active_tok": 142.875, "aux_brier/n_groups": 7.65625, "aux_brier/n_step_records": 35.71875, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.46529634581105167, "calib/avg_num_step_conf": 4.4921875, "calib/ece": 0.6478629032258065, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.4435483870967742, "calib/gap": -0.013204099821747084, "calib/mean_conf": 0.8035887096774195, "calib/mu_c": 0.7927272727272726, "calib/mu_w": 0.8059313725490197, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.6370161290322581, "calib/std_conf": 0.21958452381334861, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1562.0, "completions/max_terminated_length": 1562.0, "completions/mean_length": 240.34375, "completions/mean_terminated_length": 240.34375, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.034133333333333335, "grad_norm": 0.0818771943449974, "learning_rate": 4.666666666666667e-06, "loss": 0.0605, "num_tokens": 7437829.0, "reward": 0.7398393154144287, "reward_std": 0.3222612142562866, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.38904494047164917, "rewards/format_reward_step_strict": 0.93359375, "step": 32 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.1705792274673286e-08, "aux_brier/mean_group_std": 0.18984168847228203, "aux_brier/mean_r": 0.6854015350212486, "aux_brier/n_active_tok": 148.75, "aux_brier/n_groups": 8.125, "aux_brier/n_step_records": 37.1875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5320335863814124, "calib/avg_num_step_conf": 4.6796875, "calib/ece": 0.6018795180722891, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.3815261044176707, "calib/gap": 0.03542374051069719, "calib/mean_conf": 0.7703132530120481, "calib/mu_c": 0.7997619047619047, "calib/mu_w": 0.7643381642512075, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.6017590361445784, "calib/std_conf": 0.23741732499946133, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1270.0, "completions/max_terminated_length": 1270.0, "completions/mean_length": 264.72265625, "completions/mean_terminated_length": 265.76080322265625, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.0352, "grad_norm": 1.760973334312439, "learning_rate": 4.638888888888889e-06, "loss": -0.0303, "num_tokens": 7612470.0, "reward": 0.7472243309020996, "reward_std": 0.33903104066848755, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.4263971745967865, "rewards/format_reward_step_strict": 0.9375, "step": 33 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.220304423123821e-08, "aux_brier/mean_group_std": 0.17374843804909995, "aux_brier/mean_r": 0.7320428933024568, "aux_brier/n_active_tok": 139.125, "aux_brier/n_groups": 7.3125, "aux_brier/n_step_records": 34.78125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5317150297619047, "calib/avg_num_step_conf": 4.34765625, "calib/ece": 0.5133064516129033, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.31048387096774194, "calib/gap": 0.021443452380952244, "calib/mean_conf": 0.7316129032258064, "calib/mu_c": 0.7482142857142857, "calib/mu_w": 0.7267708333333335, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.5095564516129033, "calib/std_conf": 0.2545644195571614, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2056.0, "completions/max_terminated_length": 2056.0, "completions/mean_length": 237.6171875, "completions/mean_terminated_length": 237.6171875, "completions/min_length": 52.0, "completions/min_terminated_length": 52.0, "epoch": 0.03626666666666667, "grad_norm": 0.17196181416511536, "learning_rate": 4.611111111111112e-06, "loss": 0.031, "num_tokens": 7778412.0, "reward": 0.8209953308105469, "reward_std": 0.3929782509803772, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.48710623383522034, "rewards/format_reward_step_strict": 0.953125, "step": 34 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.531298018941918e-08, "aux_brier/mean_group_std": 0.16789939570729087, "aux_brier/mean_r": 0.7882815575863105, "aux_brier/n_active_tok": 151.125, "aux_brier/n_groups": 8.53125, "aux_brier/n_step_records": 37.78125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5594684385382059, "calib/avg_num_step_conf": 4.73828125, "calib/ece": 0.5158300395256918, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.2766798418972332, "calib/gap": 0.059986157253599015, "calib/mean_conf": 0.685790513833992, "calib/mu_c": 0.7355813953488372, "calib/mu_w": 0.6755952380952381, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.5158300395256918, "calib/std_conf": 0.2782372654379569, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 681.0, "completions/max_terminated_length": 681.0, "completions/mean_length": 244.8671875, "completions/mean_terminated_length": 245.8274688720703, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.037333333333333336, "grad_norm": 0.07762862741947174, "learning_rate": 4.583333333333333e-06, "loss": 0.0018, "num_tokens": 7950354.0, "reward": 0.7836238145828247, "reward_std": 0.34538066387176514, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.517307698726654, "rewards/format_reward_step_strict": 0.96484375, "step": 35 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.978279810081588e-08, "aux_brier/mean_group_std": 0.14886058145328834, "aux_brier/mean_r": 0.8125586780805625, "aux_brier/n_active_tok": 148.625, "aux_brier/n_groups": 8.1875, "aux_brier/n_step_records": 37.15625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.45760743321718933, "calib/avg_num_step_conf": 4.703125, "calib/ece": 0.35986000000000007, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.172, "calib/gap": -0.03168699186991886, "calib/mean_conf": 0.63794, "calib/mu_c": 0.6166463414634146, "calib/mu_w": 0.6483333333333334, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.33490000000000003, "calib/std_conf": 0.2565218439041791, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 752.0, "completions/max_terminated_length": 752.0, "completions/mean_length": 233.54296875, "completions/mean_terminated_length": 234.45883178710938, "completions/min_length": 0.0, "completions/min_terminated_length": 35.0, "epoch": 0.0384, "grad_norm": 0.6392661333084106, "learning_rate": 4.555555555555556e-06, "loss": -0.0299, "num_tokens": 8112853.0, "reward": 0.9495212435722351, "reward_std": 0.4306148886680603, "rewards/accuracy_reward_step": 0.32421875, "rewards/final_brier_reward_step": 0.58714759349823, "rewards/format_reward_step_strict": 0.95703125, "step": 36 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.859237547396368e-08, "aux_brier/mean_group_std": 0.1333417137353705, "aux_brier/mean_r": 0.8526910328969939, "aux_brier/n_active_tok": 151.375, "aux_brier/n_groups": 8.53125, "aux_brier/n_step_records": 37.84375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5208987783595114, "calib/avg_num_step_conf": 4.76953125, "calib/ece": 0.3800996015936255, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.14741035856573706, "calib/gap": 0.019286649214659723, "calib/mean_conf": 0.5515737051792828, "calib/mu_c": 0.56625, "calib/mu_w": 0.5469633507853403, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3463147410358566, "calib/std_conf": 0.295522384308376, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 796.0, "completions/max_terminated_length": 796.0, "completions/mean_length": 245.25390625, "completions/mean_terminated_length": 246.2156982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.039466666666666664, "grad_norm": 0.16471506655216217, "learning_rate": 4.527777777777778e-06, "loss": 0.0028, "num_tokens": 8282734.0, "reward": 0.8762906789779663, "reward_std": 0.36260733008384705, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.62235027551651, "rewards/format_reward_step_strict": 0.96484375, "step": 37 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.332215516016479e-08, "aux_brier/mean_group_std": 0.09968430742331046, "aux_brier/mean_r": 0.8990072955013308, "aux_brier/n_active_tok": 154.125, "aux_brier/n_groups": 9.21875, "aux_brier/n_step_records": 38.53125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4822916666666666, "calib/avg_num_step_conf": 4.81640625, "calib/ece": 0.3063888888888889, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.12301587301587301, "calib/gap": -0.02189583333333328, "calib/mean_conf": 0.5038492063492064, "calib/mu_c": 0.4871666666666667, "calib/mu_w": 0.5090625, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.28607142857142853, "calib/std_conf": 0.2990710901170733, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2226.0, "completions/max_terminated_length": 2226.0, "completions/mean_length": 262.5234375, "completions/mean_terminated_length": 262.5234375, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 0.04053333333333333, "grad_norm": 0.07935165613889694, "learning_rate": 4.5e-06, "loss": 0.0893, "num_tokens": 8456828.0, "reward": 0.8824505805969238, "reward_std": 0.330085813999176, "rewards/accuracy_reward_step": 0.234375, "rewards/final_brier_reward_step": 0.6391773819923401, "rewards/format_reward_step_strict": 0.9765625, "step": 38 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.1815635095335786e-08, "aux_brier/mean_group_std": 0.09178553734832681, "aux_brier/mean_r": 0.907417051799585, "aux_brier/n_active_tok": 156.125, "aux_brier/n_groups": 8.75, "aux_brier/n_step_records": 39.03125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5073298429319373, "calib/avg_num_step_conf": 4.890625, "calib/ece": 0.25776892430278886, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.055776892430278883, "calib/gap": 0.0013132635253054747, "calib/mean_conf": 0.4381673306772908, "calib/mu_c": 0.4391666666666667, "calib/mu_w": 0.43785340314136123, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.22844621513944224, "calib/std_conf": 0.2740238522501975, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2102.0, "completions/max_terminated_length": 2102.0, "completions/mean_length": 266.1171875, "completions/mean_terminated_length": 266.1171875, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 0.0416, "grad_norm": 0.33297401666641235, "learning_rate": 4.472222222222223e-06, "loss": 0.0174, "num_tokens": 8631042.0, "reward": 0.8796318173408508, "reward_std": 0.29891446232795715, "rewards/accuracy_reward_step": 0.234375, "rewards/final_brier_reward_step": 0.6747773885726929, "rewards/format_reward_step_strict": 0.953125, "step": 39 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.216957964735645e-07, "aux_brier/mean_group_std": 0.07120818175989829, "aux_brier/mean_r": 0.9208270935628, "aux_brier/n_active_tok": 178.25, "aux_brier/n_groups": 10.59375, "aux_brier/n_step_records": 44.5625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5219904389395915, "calib/avg_num_step_conf": 5.58203125, "calib/ece": 0.23673228346456693, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.05905511811023622, "calib/gap": 0.008357235984354616, "calib/mean_conf": 0.40578740157480314, "calib/mu_c": 0.41220338983050847, "calib/mu_w": 0.40384615384615385, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.20511811023622045, "calib/std_conf": 0.2758422500776374, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2127.0, "completions/max_terminated_length": 2127.0, "completions/mean_length": 291.62109375, "completions/mean_terminated_length": 291.62109375, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.042666666666666665, "grad_norm": 0.1428987979888916, "learning_rate": 4.444444444444444e-06, "loss": 0.0219, "num_tokens": 8812457.0, "reward": 0.9007915258407593, "reward_std": 0.303219199180603, "rewards/accuracy_reward_step": 0.234375, "rewards/final_brier_reward_step": 0.7125409841537476, "rewards/format_reward_step_strict": 0.9765625, "step": 40 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.718792642743885e-08, "aux_brier/mean_group_std": 0.08801166791896568, "aux_brier/mean_r": 0.9106452685143135, "aux_brier/n_active_tok": 167.125, "aux_brier/n_groups": 9.6875, "aux_brier/n_step_records": 41.78125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5757065319109115, "calib/avg_num_step_conf": 5.265625, "calib/ece": 0.18740157480314962, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.03937007874015748, "calib/gap": 0.06223469960696243, "calib/mean_conf": 0.35874015748031496, "calib/mu_c": 0.39230769230769236, "calib/mu_w": 0.33007299270072993, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04275590551181101, "calib/std_conf": 0.25022507073667105, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 919.0, "completions/max_terminated_length": 919.0, "completions/mean_length": 271.484375, "completions/mean_terminated_length": 272.5490417480469, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.04373333333333333, "grad_norm": 0.3115524649620056, "learning_rate": 4.416666666666667e-06, "loss": 0.0019, "num_tokens": 8989205.0, "reward": 1.1249455213546753, "reward_std": 0.3867625296115875, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6950949430465698, "rewards/format_reward_step_strict": 0.98046875, "step": 41 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.563943969358107e-07, "aux_brier/mean_group_std": 0.078453620524218, "aux_brier/mean_r": 0.9349983468055203, "aux_brier/n_active_tok": 154.75, "aux_brier/n_groups": 8.15625, "aux_brier/n_step_records": 38.6875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6110141131965663, "calib/avg_num_step_conf": 4.90234375, "calib/ece": 0.1409881422924901, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.03162055335968379, "calib/gap": 0.08844245598719624, "calib/mean_conf": 0.2941106719367589, "calib/mu_c": 0.3549367088607595, "calib/mu_w": 0.26649425287356326, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.061422924901185755, "calib/std_conf": 0.23719811186740408, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 560.0, "completions/max_terminated_length": 560.0, "completions/mean_length": 234.99609375, "completions/mean_terminated_length": 235.91766357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.0448, "grad_norm": 0.0935867577791214, "learning_rate": 4.388888888888889e-06, "loss": -0.0048, "num_tokens": 9153732.0, "reward": 0.9800031185150146, "reward_std": 0.3009245991706848, "rewards/accuracy_reward_step": 0.30859375, "rewards/final_brier_reward_step": 0.7559499740600586, "rewards/format_reward_step_strict": 0.96484375, "step": 42 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.9479223564711212e-07, "aux_brier/mean_group_std": 0.06715368757473159, "aux_brier/mean_r": 0.9352575832435351, "aux_brier/n_active_tok": 175.75, "aux_brier/n_groups": 10.625, "aux_brier/n_step_records": 43.9375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5921327547412407, "calib/avg_num_step_conf": 5.609375, "calib/ece": 0.12836653386454183, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0199203187250996, "calib/gap": 0.06621423979427832, "calib/mean_conf": 0.28804780876494024, "calib/mu_c": 0.3363235294117647, "calib/mu_w": 0.27010928961748637, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07274900398406374, "calib/std_conf": 0.23657873049429048, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1347.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 299.45703125, "completions/mean_terminated_length": 300.6313781738281, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.04586666666666667, "grad_norm": 0.37313738465309143, "learning_rate": 4.361111111111112e-06, "loss": 0.0321, "num_tokens": 9335617.0, "reward": 0.9451814889907837, "reward_std": 0.29758742451667786, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.7572882771492004, "rewards/format_reward_step_strict": 0.97265625, "step": 43 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.963976862835338e-07, "aux_brier/mean_group_std": 0.07284111904645876, "aux_brier/mean_r": 0.939259584647508, "aux_brier/n_active_tok": 179.375, "aux_brier/n_groups": 10.1875, "aux_brier/n_step_records": 44.84375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5067740901074235, "calib/avg_num_step_conf": 5.6171875, "calib/ece": 0.2019607843137255, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0392156862745098, "calib/gap": -0.005894660894660841, "calib/mean_conf": 0.2748235294117647, "calib/mu_c": 0.2704545454545455, "calib/mu_w": 0.27634920634920634, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10898039215686275, "calib/std_conf": 0.24381901885655005, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 855.0, "completions/max_terminated_length": 855.0, "completions/mean_length": 309.26171875, "completions/mean_terminated_length": 310.4745178222656, "completions/min_length": 0.0, "completions/min_terminated_length": 58.0, "epoch": 0.046933333333333334, "grad_norm": 0.22018788754940033, "learning_rate": 4.333333333333334e-06, "loss": 0.0311, "num_tokens": 9521108.0, "reward": 0.9387511610984802, "reward_std": 0.23437447845935822, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.7393797039985657, "rewards/format_reward_step_strict": 0.9921875, "step": 44 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.7874278649142603e-07, "aux_brier/mean_group_std": 0.07398399706363962, "aux_brier/mean_r": 0.9395140010316367, "aux_brier/n_active_tok": 180.875, "aux_brier/n_groups": 11.15625, "aux_brier/n_step_records": 45.21875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5308208366219416, "calib/avg_num_step_conf": 5.765625, "calib/ece": 0.188804780876494, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.01593625498007968, "calib/gap": 0.0098382004735596, "calib/mean_conf": 0.22219123505976093, "calib/mu_c": 0.2292857142857143, "calib/mu_w": 0.2194475138121547, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.06605577689243029, "calib/std_conf": 0.21994652393095757, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 946.0, "completions/max_terminated_length": 946.0, "completions/mean_length": 302.66796875, "completions/mean_terminated_length": 303.85491943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.048, "grad_norm": 0.12611983716487885, "learning_rate": 4.305555555555556e-06, "loss": -0.0078, "num_tokens": 9703639.0, "reward": 0.9419686794281006, "reward_std": 0.3299618065357208, "rewards/accuracy_reward_step": 0.28125, "rewards/final_brier_reward_step": 0.7288120985031128, "rewards/format_reward_step_strict": 0.95703125, "step": 45 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.4474342063808265e-07, "aux_brier/mean_group_std": 0.06889973066903073, "aux_brier/mean_r": 0.9353062366776169, "aux_brier/n_active_tok": 200.875, "aux_brier/n_groups": 12.78125, "aux_brier/n_step_records": 50.21875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.512075807334428, "calib/avg_num_step_conf": 6.27734375, "calib/ece": 0.2322352941176471, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.01679597701149424, "calib/mean_conf": 0.20698039215686276, "calib/mu_c": 0.21804597701149425, "calib/mu_w": 0.20125, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.049019607843137275, "calib/std_conf": 0.21204674754625277, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2091.0, "completions/max_terminated_length": 2091.0, "completions/mean_length": 337.0390625, "completions/mean_terminated_length": 337.0390625, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 0.04906666666666667, "grad_norm": 0.09090101718902588, "learning_rate": 4.277777777777778e-06, "loss": 0.04, "num_tokens": 9894689.0, "reward": 1.0143649578094482, "reward_std": 0.27545514702796936, "rewards/accuracy_reward_step": 0.33984375, "rewards/final_brier_reward_step": 0.7137101888656616, "rewards/format_reward_step_strict": 0.9921875, "step": 46 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -8.303458840508426e-07, "aux_brier/mean_group_std": 0.05258738284857578, "aux_brier/mean_r": 0.9464171112388273, "aux_brier/n_active_tok": 210.625, "aux_brier/n_groups": 13.8125, "aux_brier/n_step_records": 52.65625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4995444350995234, "calib/avg_num_step_conf": 6.59375, "calib/ece": 0.25390438247011954, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.00796812749003984, "calib/gap": -0.026028174936921777, "calib/mean_conf": 0.17286852589641435, "calib/mu_c": 0.15586206896551724, "calib/mu_w": 0.18189024390243902, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0400796812749004, "calib/std_conf": 0.17991072001609987, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2961.0, "completions/max_terminated_length": 2961.0, "completions/mean_length": 360.734375, "completions/mean_terminated_length": 360.734375, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 0.050133333333333335, "grad_norm": 0.08796198666095734, "learning_rate": 4.25e-06, "loss": 0.09, "num_tokens": 10093013.0, "reward": 0.994473934173584, "reward_std": 0.28420084714889526, "rewards/accuracy_reward_step": 0.33984375, "rewards/final_brier_reward_step": 0.6810207366943359, "rewards/format_reward_step_strict": 0.96875, "step": 47 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.772790537874386e-07, "aux_brier/mean_group_std": 0.05807757377194304, "aux_brier/mean_r": 0.9566157598240606, "aux_brier/n_active_tok": 167.625, "aux_brier/n_groups": 9.28125, "aux_brier/n_step_records": 41.90625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4553491572067424, "calib/avg_num_step_conf": 5.2421875, "calib/ece": 0.24847656250000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0257784657722738, "calib/mean_conf": 0.1291015625, "calib/mu_c": 0.11188235294117649, "calib/mu_w": 0.1376608187134503, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.022773437500000004, "calib/std_conf": 0.1452704019580678, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1096.0, "completions/max_terminated_length": 1096.0, "completions/mean_length": 289.5078125, "completions/mean_terminated_length": 290.6431579589844, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.0512, "grad_norm": 0.02429216168820858, "learning_rate": 4.222222222222223e-06, "loss": -0.0063, "num_tokens": 10270815.0, "reward": 1.0027838945388794, "reward_std": 0.26423102617263794, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.6986355185508728, "rewards/format_reward_step_strict": 0.9921875, "step": 48 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.090116954540534e-07, "aux_brier/mean_group_std": 0.06381136482834818, "aux_brier/mean_r": 0.9547274216868711, "aux_brier/n_active_tok": 192.125, "aux_brier/n_groups": 10.71875, "aux_brier/n_step_records": 48.03125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5651565622918054, "calib/avg_num_step_conf": 6.015625, "calib/ece": 0.2537154150197628, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.03445436375749497, "calib/mean_conf": 0.13711462450592884, "calib/mu_c": 0.1586315789473684, "calib/mu_w": 0.12417721518987343, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007667984189723321, "calib/std_conf": 0.15052731654829482, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2251.0, "completions/max_terminated_length": 2251.0, "completions/mean_length": 330.765625, "completions/mean_terminated_length": 330.765625, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.05226666666666667, "grad_norm": 0.10576039552688599, "learning_rate": 4.194444444444445e-06, "loss": 0.0535, "num_tokens": 10460027.0, "reward": 1.0348154306411743, "reward_std": 0.24864214658737183, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.6939488649368286, "rewards/format_reward_step_strict": 0.98046875, "step": 49 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.1354192910872918e-07, "aux_brier/mean_group_std": 0.053774767738826584, "aux_brier/mean_r": 0.9551484619635056, "aux_brier/n_active_tok": 218.25, "aux_brier/n_groups": 16.40625, "aux_brier/n_step_records": 54.5625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5377824858757062, "calib/avg_num_step_conf": 6.8203125, "calib/ece": 0.40528000000000003, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0036312275295326407, "calib/mean_conf": 0.10768000000000001, "calib/mu_c": 0.10576271186440676, "calib/mu_w": 0.1093939393939394, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.020479999999999998, "calib/std_conf": 0.15038689304590344, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2664.0, "completions/max_terminated_length": 2664.0, "completions/mean_length": 406.77734375, "completions/mean_terminated_length": 406.77734375, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.05333333333333334, "grad_norm": 0.2878992557525635, "learning_rate": 4.166666666666667e-06, "loss": 0.1021, "num_tokens": 10669522.0, "reward": 1.0921945571899414, "reward_std": 0.2993980646133423, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5797156095504761, "rewards/format_reward_step_strict": 0.97265625, "step": 50 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.598089822672625e-07, "aux_brier/mean_group_std": 0.051267485836996016, "aux_brier/mean_r": 0.964020473786176, "aux_brier/n_active_tok": 185.375, "aux_brier/n_groups": 11.09375, "aux_brier/n_step_records": 46.34375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5456861413043478, "calib/avg_num_step_conf": 5.79296875, "calib/ece": 0.2904365079365079, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.007936507936507936, "calib/gap": 0.004682065217391285, "calib/mean_conf": 0.0928968253968254, "calib/mu_c": 0.09586956521739129, "calib/mu_w": 0.0911875, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009126984126984128, "calib/std_conf": 0.13054199162424127, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1443.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 324.91015625, "completions/mean_terminated_length": 327.468505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.0544, "grad_norm": 0.20882609486579895, "learning_rate": 4.138888888888889e-06, "loss": -0.0007, "num_tokens": 10861995.0, "reward": 1.016768455505371, "reward_std": 0.27805376052856445, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.6686363220214844, "rewards/format_reward_step_strict": 0.98046875, "step": 51 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.149199491822799e-07, "aux_brier/mean_group_std": 0.07735836416445827, "aux_brier/mean_r": 0.9498911083880336, "aux_brier/n_active_tok": 158.125, "aux_brier/n_groups": 8.8125, "aux_brier/n_step_records": 39.53125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5679389312977099, "calib/avg_num_step_conf": 4.9453125, "calib/ece": 0.421328125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.020738931297709942, "calib/mean_conf": 0.0721875, "calib/mu_c": 0.08280000000000001, "calib/mu_w": 0.06206106870229007, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0026171874999999997, "calib/std_conf": 0.1045409421889338, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 827.0, "completions/max_terminated_length": 827.0, "completions/mean_length": 302.4609375, "completions/mean_terminated_length": 303.6470642089844, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.055466666666666664, "grad_norm": 0.09126332402229309, "learning_rate": 4.111111111111111e-06, "loss": -0.0247, "num_tokens": 11047377.0, "reward": 1.126570701599121, "reward_std": 0.3329008221626282, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5687824487686157, "rewards/format_reward_step_strict": 0.9921875, "step": 52 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.0911899479836293e-06, "aux_brier/mean_group_std": 0.07043554288810829, "aux_brier/mean_r": 0.9518235448680586, "aux_brier/n_active_tok": 189.25, "aux_brier/n_groups": 10.59375, "aux_brier/n_step_records": 47.3125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.48905018145413587, "calib/avg_num_step_conf": 5.92578125, "calib/ece": 0.40984189723320164, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0008803654110874637, "calib/mean_conf": 0.08430830039525691, "calib/mu_c": 0.08385245901639345, "calib/mu_w": 0.08473282442748091, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.005968379446640316, "calib/std_conf": 0.1234133123383727, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1142.0, "completions/max_terminated_length": 1142.0, "completions/mean_length": 351.15234375, "completions/mean_terminated_length": 352.5294189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.05653333333333333, "grad_norm": 0.18907392024993896, "learning_rate": 4.083333333333334e-06, "loss": -0.0111, "num_tokens": 11243096.0, "reward": 1.1107137203216553, "reward_std": 0.2957611083984375, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.567854642868042, "rewards/format_reward_step_strict": 0.984375, "step": 53 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.94792701869018e-07, "aux_brier/mean_group_std": 0.06574302311170417, "aux_brier/mean_r": 0.9539206846222074, "aux_brier/n_active_tok": 173.25, "aux_brier/n_groups": 10.40625, "aux_brier/n_step_records": 43.3125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5529012345679012, "calib/avg_num_step_conf": 5.4140625, "calib/ece": 0.46313725490196084, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02050925925925927, "calib/mean_conf": 0.06627450980392156, "calib/mu_c": 0.07592592592592594, "calib/mu_w": 0.05541666666666667, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.09302668359161163, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1197.0, "completions/max_terminated_length": 1197.0, "completions/mean_length": 310.359375, "completions/mean_terminated_length": 310.359375, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 0.0576, "grad_norm": 0.07939843833446503, "learning_rate": 4.055555555555556e-06, "loss": -0.0116, "num_tokens": 11428780.0, "reward": 1.156419038772583, "reward_std": 0.26543140411376953, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5319265723228455, "rewards/format_reward_step_strict": 0.9921875, "step": 54 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.9486765046039523e-07, "aux_brier/mean_group_std": 0.049335625154782355, "aux_brier/mean_r": 0.9636705985066784, "aux_brier/n_active_tok": 181.75, "aux_brier/n_groups": 10.59375, "aux_brier/n_step_records": 45.4375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5019778746228629, "calib/avg_num_step_conf": 5.78125, "calib/ece": 0.33662698412698405, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002472008045591688, "calib/mean_conf": 0.045119047619047614, "calib/mu_c": 0.04357894736842105, "calib/mu_w": 0.04605095541401274, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0023809523809523807, "calib/std_conf": 0.06821116921327605, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1660.0, "completions/max_terminated_length": 1660.0, "completions/mean_length": 339.85546875, "completions/mean_terminated_length": 341.1882629394531, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.058666666666666666, "grad_norm": 0.6068412661552429, "learning_rate": 4.027777777777779e-06, "loss": 0.0517, "num_tokens": 11623607.0, "reward": 1.0230414867401123, "reward_std": 0.25731363892555237, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.639041006565094, "rewards/format_reward_step_strict": 0.984375, "step": 55 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.337756533770218e-06, "aux_brier/mean_group_std": 0.059995697663266526, "aux_brier/mean_r": 0.9583495937558031, "aux_brier/n_active_tok": 213.5, "aux_brier/n_groups": 13.28125, "aux_brier/n_step_records": 53.375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5153618421052631, "calib/avg_num_step_conf": 6.69140625, "calib/ece": 0.36038888888888887, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.010719999999999986, "calib/mean_conf": 0.046753968253968256, "calib/mu_c": 0.05321999999999999, "calib/mu_w": 0.0425, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005158730158730159, "calib/std_conf": 0.09279754260121541, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2089.0, "completions/max_terminated_length": 2089.0, "completions/mean_length": 401.078125, "completions/mean_terminated_length": 402.6510009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.05973333333333333, "grad_norm": 0.4605119824409485, "learning_rate": 4.000000000000001e-06, "loss": 0.0343, "num_tokens": 11833123.0, "reward": 1.034104585647583, "reward_std": 0.27225345373153687, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.6207932233810425, "rewards/format_reward_step_strict": 0.9765625, "step": 56 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.5314033576796504e-06, "aux_brier/mean_group_std": 0.049375711026349946, "aux_brier/mean_r": 0.968344623238407, "aux_brier/n_active_tok": 188.875, "aux_brier/n_groups": 11.40625, "aux_brier/n_step_records": 47.21875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5041346153846153, "calib/avg_num_step_conf": 6.1484375, "calib/ece": 0.48672, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006641025641025637, "calib/mean_conf": 0.038880000000000005, "calib/mu_c": 0.0356923076923077, "calib/mu_w": 0.042333333333333334, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0028, "calib/std_conf": 0.07396989657962218, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2406.0, "completions/max_terminated_length": 2406.0, "completions/mean_length": 365.08203125, "completions/mean_terminated_length": 367.9566955566406, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.0608, "grad_norm": 1.6192066669464111, "learning_rate": 3.972222222222223e-06, "loss": 0.0277, "num_tokens": 12033376.0, "reward": 1.1186858415603638, "reward_std": 0.276128351688385, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.49818047881126404, "rewards/format_reward_step_strict": 0.97265625, "step": 57 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.279837858720768e-06, "aux_brier/mean_group_std": 0.03885044724419651, "aux_brier/mean_r": 0.9713866398942863, "aux_brier/n_active_tok": 206.75, "aux_brier/n_groups": 14.71875, "aux_brier/n_step_records": 51.6875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5091141564487575, "calib/avg_num_step_conf": 6.49609375, "calib/ece": 0.3799801587301588, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.008191921841190747, "calib/mean_conf": 0.031130952380952384, "calib/mu_c": 0.036039603960396044, "calib/mu_w": 0.027847682119205297, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.005158730158730159, "calib/std_conf": 0.08191174786790666, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2650.0, "completions/max_terminated_length": 2650.0, "completions/mean_length": 410.66015625, "completions/mean_terminated_length": 412.2705993652344, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.06186666666666667, "grad_norm": 2.518641948699951, "learning_rate": 3.944444444444445e-06, "loss": 0.022, "num_tokens": 12244825.0, "reward": 1.0325732231140137, "reward_std": 0.307841032743454, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.6068553924560547, "rewards/format_reward_step_strict": 0.97265625, "step": 58 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.8966270745046643e-06, "aux_brier/mean_group_std": 0.045400346765770226, "aux_brier/mean_r": 0.9650495315775414, "aux_brier/n_active_tok": 203.5, "aux_brier/n_groups": 13.09375, "aux_brier/n_step_records": 50.875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5511583011583011, "calib/avg_num_step_conf": 6.359375, "calib/ece": 0.4228685258964143, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014387387387387386, "calib/mean_conf": 0.019362549800796814, "calib/mu_c": 0.02738738738738739, "calib/mu_w": 0.013000000000000003, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.06028147812927871, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2176.0, "completions/max_terminated_length": 2176.0, "completions/mean_length": 383.21484375, "completions/mean_terminated_length": 386.2322692871094, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.06293333333333333, "grad_norm": 0.35520368814468384, "learning_rate": 3.916666666666667e-06, "loss": 0.0206, "num_tokens": 12449176.0, "reward": 1.0694079399108887, "reward_std": 0.27380746603012085, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.5666944980621338, "rewards/format_reward_step_strict": 0.98046875, "step": 59 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.7207030169608117e-06, "aux_brier/mean_group_std": 0.05149790445141064, "aux_brier/mean_r": 0.9679355183235356, "aux_brier/n_active_tok": 182.125, "aux_brier/n_groups": 10.375, "aux_brier/n_step_records": 45.53125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4766156462585034, "calib/avg_num_step_conf": 5.69140625, "calib/ece": 0.40848605577689245, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.016946624803767664, "calib/mean_conf": 0.023386454183266934, "calib/mu_c": 0.013461538461538462, "calib/mu_w": 0.030408163265306126, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.008764940239043825, "calib/std_conf": 0.07845168823060447, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2509.0, "completions/max_terminated_length": 2509.0, "completions/mean_length": 365.5078125, "completions/mean_terminated_length": 365.5078125, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.064, "grad_norm": 1.4717086553573608, "learning_rate": 3.88888888888889e-06, "loss": 0.0392, "num_tokens": 12651602.0, "reward": 1.0382107496261597, "reward_std": 0.25028032064437866, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.5747183561325073, "rewards/format_reward_step_strict": 0.9765625, "step": 60 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.028031238334506e-06, "aux_brier/mean_group_std": 0.05582616345583301, "aux_brier/mean_r": 0.9608498901411248, "aux_brier/n_active_tok": 178.0, "aux_brier/n_groups": 11.1875, "aux_brier/n_step_records": 44.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4845317992969, "calib/avg_num_step_conf": 5.5703125, "calib/ece": 0.5752362204724408, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005680409076382231, "calib/mean_conf": 0.018858267716535433, "calib/mu_c": 0.01651006711409396, "calib/mu_w": 0.02219047619047619, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0037401574803149606, "calib/std_conf": 0.06028047625451905, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1030.0, "completions/max_terminated_length": 1030.0, "completions/mean_length": 311.8671875, "completions/mean_terminated_length": 313.0902099609375, "completions/min_length": 0.0, "completions/min_terminated_length": 57.0, "epoch": 0.06506666666666666, "grad_norm": 0.7951279878616333, "learning_rate": 3.861111111111112e-06, "loss": 0.0155, "num_tokens": 12835504.0, "reward": 1.1844792366027832, "reward_std": 0.1937452107667923, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.4254167675971985, "rewards/format_reward_step_strict": 0.9921875, "step": 61 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.7647091771211265e-06, "aux_brier/mean_group_std": 0.05182514719913517, "aux_brier/mean_r": 0.9683787715187064, "aux_brier/n_active_tok": 216.5, "aux_brier/n_groups": 13.40625, "aux_brier/n_step_records": 54.125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.52943717190511, "calib/avg_num_step_conf": 6.796875, "calib/ece": 0.39504000000000006, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003007508804571731, "calib/mean_conf": 0.012960000000000001, "calib/mu_c": 0.014752475247524752, "calib/mu_w": 0.01174496644295302, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.002, "calib/std_conf": 0.03341913224486836, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2200.0, "completions/max_terminated_length": 2200.0, "completions/mean_length": 422.99609375, "completions/mean_terminated_length": 424.6549377441406, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.06613333333333334, "grad_norm": 0.06681760400533676, "learning_rate": 3.833333333333334e-06, "loss": 0.0156, "num_tokens": 13050871.0, "reward": 1.0221278667449951, "reward_std": 0.28524404764175415, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.5806984305381775, "rewards/format_reward_step_strict": 0.96484375, "step": 62 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.99389647912779e-06, "aux_brier/mean_group_std": 0.05313575891634672, "aux_brier/mean_r": 0.9664477263550149, "aux_brier/n_active_tok": 204.875, "aux_brier/n_groups": 13.5, "aux_brier/n_step_records": 51.21875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.48502690724912945, "calib/avg_num_step_conf": 6.5, "calib/ece": 0.44896825396825385, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008319088319088327, "calib/mean_conf": 0.018015873015873016, "calib/mu_c": 0.018461538461538463, "calib/mu_w": 0.01762962962962963, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0013492063492063493, "calib/std_conf": 0.061133731125956145, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2593.0, "completions/max_terminated_length": 2593.0, "completions/mean_length": 421.80859375, "completions/mean_terminated_length": 425.1299133300781, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.0672, "grad_norm": 0.08206135034561157, "learning_rate": 3.8055555555555556e-06, "loss": 0.022, "num_tokens": 13267494.0, "reward": 1.0754882097244263, "reward_std": 0.3273842930793762, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.528515636920929, "rewards/format_reward_step_strict": 0.97265625, "step": 63 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.847803295320418e-06, "aux_brier/mean_group_std": 0.04095292747955301, "aux_brier/mean_r": 0.9677550824873439, "aux_brier/n_active_tok": 215.0, "aux_brier/n_groups": 15.1875, "aux_brier/n_step_records": 53.75, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5263414634146342, "calib/avg_num_step_conf": 6.71875, "calib/ece": 0.4842338709677419, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003051056910569108, "calib/mean_conf": 0.014153225806451614, "calib/mu_c": 0.015691056910569108, "calib/mu_w": 0.01264, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0012096774193548385, "calib/std_conf": 0.03741776884152827, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2435.0, "completions/max_terminated_length": 2435.0, "completions/mean_length": 412.7734375, "completions/mean_terminated_length": 412.7734375, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.06826666666666667, "grad_norm": 0.052142687141895294, "learning_rate": 3.777777777777778e-06, "loss": 0.1098, "num_tokens": 13476940.0, "reward": 1.0854132175445557, "reward_std": 0.32813769578933716, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.4979027211666107, "rewards/format_reward_step_strict": 0.9609375, "step": 64 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.4041878437787148e-07, "aux_brier/mean_group_std": 0.05316383888763907, "aux_brier/mean_r": 0.9674401294258691, "aux_brier/n_active_tok": 175.875, "aux_brier/n_groups": 9.9375, "aux_brier/n_step_records": 43.96875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4761935483870968, "calib/avg_num_step_conf": 5.54296875, "calib/ece": 0.48520080321285136, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005476451612903227, "calib/mean_conf": 0.016807228915662654, "calib/mu_c": 0.01408, "calib/mu_w": 0.019556451612903227, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.03152653927513252, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1922.0, "completions/max_terminated_length": 1922.0, "completions/mean_length": 326.38671875, "completions/mean_terminated_length": 328.9566955566406, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.06933333333333333, "grad_norm": 0.11075668781995773, "learning_rate": 3.7500000000000005e-06, "loss": 0.0136, "num_tokens": 13665519.0, "reward": 1.100783348083496, "reward_std": 0.23851731419563293, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.4968835115432739, "rewards/format_reward_step_strict": 0.96875, "step": 65 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.95382818737977e-06, "aux_brier/mean_group_std": 0.04170639229763923, "aux_brier/mean_r": 0.9752149906375521, "aux_brier/n_active_tok": 217.625, "aux_brier/n_groups": 14.8125, "aux_brier/n_step_records": 54.40625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5184893267651888, "calib/avg_num_step_conf": 6.80078125, "calib/ece": 0.39896000000000004, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": 0.018896551724137935, "calib/mean_conf": 0.02104, "calib/mu_c": 0.032, "calib/mu_w": 0.013103448275862068, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.07690330552063417, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2430.0, "completions/max_terminated_length": 2430.0, "completions/mean_length": 429.39453125, "completions/mean_terminated_length": 429.39453125, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.0704, "grad_norm": 0.019984150305390358, "learning_rate": 3.7222222222222225e-06, "loss": 0.0783, "num_tokens": 13881796.0, "reward": 1.0489559173583984, "reward_std": 0.24764403700828552, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.5864484310150146, "rewards/format_reward_step_strict": 0.9765625, "step": 66 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.49393127735409e-07, "aux_brier/mean_group_std": 0.06469581027960165, "aux_brier/mean_r": 0.9589652238567143, "aux_brier/n_active_tok": 187.625, "aux_brier/n_groups": 11.78125, "aux_brier/n_step_records": 46.90625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.43666605177396545, "calib/avg_num_step_conf": 5.89453125, "calib/ece": 0.5223828125000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006274980015987211, "calib/mean_conf": 0.0221484375, "calib/mu_c": 0.019280575539568346, "calib/mu_w": 0.025555555555555557, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.00078125, "calib/std_conf": 0.05436128991579021, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1291.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 382.62890625, "completions/mean_terminated_length": 384.1294250488281, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.07146666666666666, "grad_norm": 0.11290935426950455, "learning_rate": 3.694444444444445e-06, "loss": -0.0363, "num_tokens": 14084757.0, "reward": 1.1530730724334717, "reward_std": 0.1719469279050827, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.46385470032691956, "rewards/format_reward_step_strict": 0.98828125, "step": 67 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.7287329072710484e-06, "aux_brier/mean_group_std": 0.03377295447585385, "aux_brier/mean_r": 0.9799369483727938, "aux_brier/n_active_tok": 186.125, "aux_brier/n_groups": 11.46875, "aux_brier/n_step_records": 46.53125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49965473948524797, "calib/avg_num_step_conf": 5.87109375, "calib/ece": 0.45184189723320156, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0010290018832391692, "calib/mean_conf": 0.01614229249011858, "calib/mu_c": 0.015593220338983053, "calib/mu_w": 0.016622222222222222, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0007905138339920949, "calib/std_conf": 0.03126141704297812, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1688.0, "completions/max_terminated_length": 1688.0, "completions/mean_length": 371.45703125, "completions/mean_terminated_length": 372.91375732421875, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.07253333333333334, "grad_norm": 0.18110527098178864, "learning_rate": 3.6666666666666666e-06, "loss": 0.0146, "num_tokens": 14283938.0, "reward": 1.0882487297058105, "reward_std": 0.241624116897583, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5404953956604004, "rewards/format_reward_step_strict": 0.984375, "step": 68 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.227063449240397e-06, "aux_brier/mean_group_std": 0.04779126202984317, "aux_brier/mean_r": 0.9652886564493792, "aux_brier/n_active_tok": 201.25, "aux_brier/n_groups": 13.1875, "aux_brier/n_step_records": 50.3125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4581994623303389, "calib/avg_num_step_conf": 6.2890625, "calib/ece": 0.38337301587301587, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004948527965379323, "calib/mean_conf": 0.017420634920634923, "calib/mu_c": 0.014455445544554454, "calib/mu_w": 0.019403973509933777, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.029935341213440846, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2844.0, "completions/max_terminated_length": 2844.0, "completions/mean_length": 452.0546875, "completions/mean_terminated_length": 452.0546875, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 0.0736, "grad_norm": 0.014755764044821262, "learning_rate": 3.638888888888889e-06, "loss": 0.0802, "num_tokens": 14504160.0, "reward": 1.030881643295288, "reward_std": 0.258872926235199, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.5922762155532837, "rewards/format_reward_step_strict": 0.9765625, "step": 69 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.408656835153124e-06, "aux_brier/mean_group_std": 0.04061929032654197, "aux_brier/mean_r": 0.9700416407596402, "aux_brier/n_active_tok": 212.625, "aux_brier/n_groups": 15.0, "aux_brier/n_step_records": 53.15625, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.6369284954811271, "calib/avg_num_step_conf": 6.64453125, "calib/ece": 0.4481300813008131, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.018680223285486443, "calib/mean_conf": 0.025853658536585365, "calib/mu_c": 0.03587719298245614, "calib/mu_w": 0.017196969696969697, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.005284552845528456, "calib/std_conf": 0.08678472411820899, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2556.0, "completions/max_terminated_length": 2556.0, "completions/mean_length": 447.1796875, "completions/mean_terminated_length": 448.933349609375, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.07466666666666667, "grad_norm": 0.028785645961761475, "learning_rate": 3.6111111111111115e-06, "loss": 0.0586, "num_tokens": 14725630.0, "reward": 1.0579323768615723, "reward_std": 0.2267770767211914, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5364171266555786, "rewards/format_reward_step_strict": 0.95703125, "step": 70 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.029969959545854e-06, "aux_brier/mean_group_std": 0.048995203603129825, "aux_brier/mean_r": 0.9659952706389444, "aux_brier/n_active_tok": 227.625, "aux_brier/n_groups": 14.6875, "aux_brier/n_step_records": 56.90625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.534349173553719, "calib/avg_num_step_conf": 7.46875, "calib/ece": 0.4715662650602409, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00507941632231405, "calib/mean_conf": 0.01598393574297189, "calib/mu_c": 0.01859504132231405, "calib/mu_w": 0.013515625, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0008032128514056224, "calib/std_conf": 0.04218832970889454, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1952.0, "completions/max_terminated_length": 1952.0, "completions/mean_length": 447.92578125, "completions/mean_terminated_length": 451.4527587890625, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.07573333333333333, "grad_norm": 0.346962034702301, "learning_rate": 3.5833333333333335e-06, "loss": 0.0354, "num_tokens": 14944707.0, "reward": 1.0917901992797852, "reward_std": 0.28287506103515625, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5155984163284302, "rewards/format_reward_step_strict": 0.97265625, "step": 71 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.3916517893619584e-06, "aux_brier/mean_group_std": 0.033061452835828356, "aux_brier/mean_r": 0.976291224687943, "aux_brier/n_active_tok": 225.875, "aux_brier/n_groups": 16.25, "aux_brier/n_step_records": 56.46875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5433564652702616, "calib/avg_num_step_conf": 7.05859375, "calib/ece": 0.43376984126984125, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0021041573820589562, "calib/mean_conf": 0.016626984126984126, "calib/mu_c": 0.017787610619469027, "calib/mu_w": 0.01568345323741007, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.000992063492063492, "calib/std_conf": 0.029869619891987435, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2809.0, "completions/max_terminated_length": 2809.0, "completions/mean_length": 427.97265625, "completions/mean_terminated_length": 427.97265625, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.0768, "grad_norm": 0.05484065040946007, "learning_rate": 3.555555555555556e-06, "loss": 0.1103, "num_tokens": 15158676.0, "reward": 1.0729742050170898, "reward_std": 0.21148447692394257, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5575214624404907, "rewards/format_reward_step_strict": 0.984375, "step": 72 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.360505024725292e-06, "aux_brier/mean_group_std": 0.04531713227609749, "aux_brier/mean_r": 0.972327532595925, "aux_brier/n_active_tok": 198.75, "aux_brier/n_groups": 11.3125, "aux_brier/n_step_records": 49.6875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5806263073704934, "calib/avg_num_step_conf": 6.25, "calib/ece": 0.4771764705882352, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005575858250276854, "calib/mean_conf": 0.016941176470588237, "calib/mu_c": 0.019761904761904762, "calib/mu_w": 0.014186046511627907, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02482994062733726, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1189.0, "completions/max_terminated_length": 1189.0, "completions/mean_length": 391.87890625, "completions/mean_terminated_length": 393.41571044921875, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.07786666666666667, "grad_norm": 0.04808742180466652, "learning_rate": 3.5277777777777784e-06, "loss": 0.0091, "num_tokens": 15366029.0, "reward": 1.1169428825378418, "reward_std": 0.25438421964645386, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5224593877792358, "rewards/format_reward_step_strict": 0.98828125, "step": 73 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.7314468178385454e-06, "aux_brier/mean_group_std": 0.03765971292555707, "aux_brier/mean_r": 0.9796891334346636, "aux_brier/n_active_tok": 221.125, "aux_brier/n_groups": 13.15625, "aux_brier/n_step_records": 55.28125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5207613606165293, "calib/avg_num_step_conf": 6.93359375, "calib/ece": 0.40467741935483875, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0060855700239170855, "calib/mean_conf": 0.02274193548387097, "calib/mu_c": 0.026226415094339622, "calib/mu_w": 0.020140845070422537, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.036935836031565285, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2389.0, "completions/max_terminated_length": 2389.0, "completions/mean_length": 453.0078125, "completions/mean_terminated_length": 454.7843322753906, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.07893333333333333, "grad_norm": 0.05028804391622543, "learning_rate": 3.5e-06, "loss": 0.08, "num_tokens": 15585927.0, "reward": 1.0399447679519653, "reward_std": 0.2993197441101074, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.573841392993927, "rewards/format_reward_step_strict": 0.96484375, "step": 74 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.2505642699187653e-06, "aux_brier/mean_group_std": 0.06429856926379476, "aux_brier/mean_r": 0.9510935457446775, "aux_brier/n_active_tok": 198.375, "aux_brier/n_groups": 11.40625, "aux_brier/n_step_records": 49.59375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.513403425706726, "calib/avg_num_step_conf": 6.19921875, "calib/ece": 0.6353359683794466, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00225316808243977, "calib/mean_conf": 0.02474308300395257, "calib/mu_c": 0.025508982035928142, "calib/mu_w": 0.023255813953488372, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.029762452165010417, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2159.0, "completions/max_terminated_length": 2159.0, "completions/mean_length": 395.53515625, "completions/mean_terminated_length": 395.53515625, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 0.08, "grad_norm": 0.007172565907239914, "learning_rate": 3.4722222222222224e-06, "loss": 0.0416, "num_tokens": 15791936.0, "reward": 1.2384189367294312, "reward_std": 0.2703310549259186, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.3677383065223694, "rewards/format_reward_step_strict": 0.98828125, "step": 75 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.346894645987589e-07, "aux_brier/mean_group_std": 0.05394622553427382, "aux_brier/mean_r": 0.9652903386673772, "aux_brier/n_active_tok": 205.625, "aux_brier/n_groups": 13.1875, "aux_brier/n_step_records": 51.40625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.47307063851181497, "calib/avg_num_step_conf": 6.42578125, "calib/ece": 0.5091699604743082, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0025377073906485623, "calib/mean_conf": 0.028379446640316205, "calib/mu_c": 0.027205882352941177, "calib/mu_w": 0.02974358974358974, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.023543381296148785, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 412.6171875, "completions/mean_terminated_length": 412.6171875, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 0.08106666666666666, "grad_norm": 0.006502797827124596, "learning_rate": 3.444444444444445e-06, "loss": 0.0562, "num_tokens": 16000622.0, "reward": 1.1465389728546143, "reward_std": 0.2662979066371918, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.4845937490463257, "rewards/format_reward_step_strict": 0.98828125, "step": 76 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.569284396319871e-06, "aux_brier/mean_group_std": 0.0676945337044517, "aux_brier/mean_r": 0.9563684466021387, "aux_brier/n_active_tok": 205.75, "aux_brier/n_groups": 14.0625, "aux_brier/n_step_records": 51.4375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5506756756756757, "calib/avg_num_step_conf": 6.4296875, "calib/ece": 0.5623199999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0032684154742978304, "calib/mean_conf": 0.029679999999999998, "calib/mu_c": 0.031013513513513512, "calib/mu_w": 0.027745098039215682, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.025375137438051445, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2751.0, "completions/max_terminated_length": 2751.0, "completions/mean_length": 425.17578125, "completions/mean_terminated_length": 426.8431701660156, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.08213333333333334, "grad_norm": 0.01168504822999239, "learning_rate": 3.416666666666667e-06, "loss": 0.0847, "num_tokens": 16214131.0, "reward": 1.172635555267334, "reward_std": 0.28494516015052795, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.4327300786972046, "rewards/format_reward_step_strict": 0.97265625, "step": 77 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.763075152098729e-07, "aux_brier/mean_group_std": 0.05472211652629763, "aux_brier/mean_r": 0.9655651246092493, "aux_brier/n_active_tok": 220.125, "aux_brier/n_groups": 14.40625, "aux_brier/n_step_records": 55.03125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5013574660633484, "calib/avg_num_step_conf": 6.91796875, "calib/ece": 0.4926746987951807, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.007744214608920497, "calib/mean_conf": 0.03857028112449799, "calib/mu_c": 0.034869230769230763, "calib/mu_w": 0.04261344537815126, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.004578313253012048, "calib/std_conf": 0.054301738420637125, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2512.0, "completions/max_terminated_length": 2512.0, "completions/mean_length": 497.328125, "completions/mean_terminated_length": 497.328125, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.0832, "grad_norm": 0.006679881364107132, "learning_rate": 3.3888888888888893e-06, "loss": 0.098, "num_tokens": 16449471.0, "reward": 1.1172987222671509, "reward_std": 0.3159688115119934, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.4926322102546692, "rewards/format_reward_step_strict": 0.95703125, "step": 78 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.383033536478845e-07, "aux_brier/mean_group_std": 0.05742701776696072, "aux_brier/mean_r": 0.9586039216655239, "aux_brier/n_active_tok": 224.5, "aux_brier/n_groups": 14.1875, "aux_brier/n_step_records": 56.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5595282089184528, "calib/avg_num_step_conf": 7.01953125, "calib/ece": 0.48423529411764704, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.0005728011825572865, "calib/mean_conf": 0.04078431372549019, "calib/mu_c": 0.04106060606060606, "calib/mu_w": 0.040487804878048775, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003686274509803921, "calib/std_conf": 0.06920396501320307, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2365.0, "completions/max_terminated_length": 2365.0, "completions/mean_length": 465.0546875, "completions/mean_terminated_length": 465.0546875, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.08426666666666667, "grad_norm": 0.006277347914874554, "learning_rate": 3.3611111111111117e-06, "loss": 0.0475, "num_tokens": 16674901.0, "reward": 1.1378858089447021, "reward_std": 0.2390265166759491, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5124804973602295, "rewards/format_reward_step_strict": 0.98828125, "step": 79 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.460258663630979e-06, "aux_brier/mean_group_std": 0.06699746246123675, "aux_brier/mean_r": 0.9554511037314575, "aux_brier/n_active_tok": 219.75, "aux_brier/n_groups": 13.09375, "aux_brier/n_step_records": 54.9375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5270473574045003, "calib/avg_num_step_conf": 6.9765625, "calib/ece": 0.5416334661354582, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0019453165881737344, "calib/mean_conf": 0.04402390438247012, "calib/mu_c": 0.04482993197278912, "calib/mu_w": 0.04288461538461538, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.029527207830841, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2527.0, "completions/max_terminated_length": 2527.0, "completions/mean_length": 441.390625, "completions/mean_terminated_length": 444.86614990234375, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.08533333333333333, "grad_norm": 0.12150449305772781, "learning_rate": 3.3333333333333333e-06, "loss": -0.005, "num_tokens": 16890057.0, "reward": 1.1750924587249756, "reward_std": 0.2941782474517822, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.4503699243068695, "rewards/format_reward_step_strict": 0.96875, "step": 80 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.3112985259032826e-06, "aux_brier/mean_group_std": 0.05724071403940858, "aux_brier/mean_r": 0.9653533296743093, "aux_brier/n_active_tok": 222.875, "aux_brier/n_groups": 15.84375, "aux_brier/n_step_records": 55.71875, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.4776960784313725, "calib/avg_num_step_conf": 7.1328125, "calib/ece": 0.5206970954356845, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00030238095238095675, "calib/mean_conf": 0.04361825726141079, "calib/mu_c": 0.043750000000000004, "calib/mu_w": 0.04344761904761905, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0, "calib/std_conf": 0.0361915649871068, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2729.0, "completions/max_terminated_length": 2729.0, "completions/mean_length": 470.4609375, "completions/mean_terminated_length": 474.16534423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.0864, "grad_norm": 0.022036930546164513, "learning_rate": 3.3055555555555558e-06, "loss": 0.0753, "num_tokens": 17116743.0, "reward": 1.110398769378662, "reward_std": 0.315701425075531, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.4494074881076813, "rewards/format_reward_step_strict": 0.92578125, "step": 81 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.6606594932522256e-08, "aux_brier/mean_group_std": 0.057497007152077374, "aux_brier/mean_r": 0.963235195565649, "aux_brier/n_active_tok": 194.625, "aux_brier/n_groups": 11.71875, "aux_brier/n_step_records": 48.65625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4693302387267905, "calib/avg_num_step_conf": 6.18359375, "calib/ece": 0.5404016064257028, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0028474801061007943, "calib/mean_conf": 0.04353413654618474, "calib/mu_c": 0.0423448275862069, "calib/mu_w": 0.04519230769230769, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.0008032128514056224, "calib/std_conf": 0.025085393103278526, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2273.0, "completions/max_terminated_length": 2273.0, "completions/mean_length": 408.19921875, "completions/mean_terminated_length": 413.03955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.08746666666666666, "grad_norm": 0.1904134601354599, "learning_rate": 3.277777777777778e-06, "loss": 0.0472, "num_tokens": 17326794.0, "reward": 1.151876449584961, "reward_std": 0.29784828424453735, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.4356308579444885, "rewards/format_reward_step_strict": 0.953125, "step": 82 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.218257767805376e-06, "aux_brier/mean_group_std": 0.06747656495244791, "aux_brier/mean_r": 0.9456673557669775, "aux_brier/n_active_tok": 245.5, "aux_brier/n_groups": 19.6875, "aux_brier/n_step_records": 61.375, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5392962072649573, "calib/avg_num_step_conf": 7.82421875, "calib/ece": 0.48138775510204085, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004119925213675324, "calib/mean_conf": 0.042693877551020415, "calib/mu_c": 0.04289062500000001, "calib/mu_w": 0.042478632478632476, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.000816326530612245, "calib/std_conf": 0.027878895596104156, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2879.0, "completions/max_terminated_length": 2879.0, "completions/mean_length": 536.4609375, "completions/mean_terminated_length": 542.8221435546875, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.08853333333333334, "grad_norm": 0.13237819075584412, "learning_rate": 3.2500000000000002e-06, "loss": 0.0673, "num_tokens": 17571392.0, "reward": 1.0950324535369873, "reward_std": 0.2291625440120697, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.4895046651363373, "rewards/format_reward_step_strict": 0.9375, "step": 83 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.204469538666601e-07, "aux_brier/mean_group_std": 0.0631882685984794, "aux_brier/mean_r": 0.9567116047566644, "aux_brier/n_active_tok": 198.375, "aux_brier/n_groups": 12.09375, "aux_brier/n_step_records": 49.59375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5419250645994833, "calib/avg_num_step_conf": 6.34375, "calib/ece": 0.4406024096385542, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0035542635658914815, "calib/mean_conf": 0.04132530120481928, "calib/mu_c": 0.04316666666666667, "calib/mu_w": 0.03961240310077519, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.02680156521551764, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2960.0, "completions/max_terminated_length": 2960.0, "completions/mean_length": 428.19921875, "completions/mean_terminated_length": 431.57086181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.0896, "grad_norm": 0.0071330503560602665, "learning_rate": 3.2222222222222227e-06, "loss": 0.0068, "num_tokens": 17786931.0, "reward": 1.0847227573394775, "reward_std": 0.23574988543987274, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5342034697532654, "rewards/format_reward_step_strict": 0.95703125, "step": 84 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.4682940519559224e-08, "aux_brier/mean_group_std": 0.07396974394006378, "aux_brier/mean_r": 0.9495267329428589, "aux_brier/n_active_tok": 202.75, "aux_brier/n_groups": 12.46875, "aux_brier/n_step_records": 50.6875, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5214947089947091, "calib/avg_num_step_conf": 6.3359375, "calib/ece": 0.4397967479674797, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0009603174603174669, "calib/mean_conf": 0.04800813008130081, "calib/mu_c": 0.0485, "calib/mu_w": 0.047539682539682535, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.028720972177920225, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2823.0, "completions/max_terminated_length": 2823.0, "completions/mean_length": 485.11328125, "completions/mean_terminated_length": 488.9330749511719, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.09066666666666667, "grad_norm": 0.011264825239777565, "learning_rate": 3.1944444444444443e-06, "loss": 0.0655, "num_tokens": 18018944.0, "reward": 1.0750786066055298, "reward_std": 0.27728450298309326, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5268769264221191, "rewards/format_reward_step_strict": 0.94921875, "step": 85 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.8439633884061455e-06, "aux_brier/mean_group_std": 0.061103717383855255, "aux_brier/mean_r": 0.960463549216192, "aux_brier/n_active_tok": 206.625, "aux_brier/n_groups": 13.1875, "aux_brier/n_step_records": 51.65625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5642167314687068, "calib/avg_num_step_conf": 6.484375, "calib/ece": 0.42150197628458497, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011526401605418402, "calib/mean_conf": 0.05501976284584981, "calib/mu_c": 0.05563025210084034, "calib/mu_w": 0.0544776119402985, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0030830039525691698, "calib/std_conf": 0.0402818578013471, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2693.0, "completions/max_terminated_length": 2693.0, "completions/mean_length": 455.29296875, "completions/mean_terminated_length": 457.0784606933594, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.09173333333333333, "grad_norm": 0.04765058308839798, "learning_rate": 3.1666666666666667e-06, "loss": 0.01, "num_tokens": 18241011.0, "reward": 1.0938220024108887, "reward_std": 0.24476972222328186, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5627878904342651, "rewards/format_reward_step_strict": 0.9765625, "step": 86 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.4273639888928713e-06, "aux_brier/mean_group_std": 0.07217300149636746, "aux_brier/mean_r": 0.9548417209835198, "aux_brier/n_active_tok": 210.0, "aux_brier/n_groups": 15.28125, "aux_brier/n_step_records": 52.5, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5291983657368273, "calib/avg_num_step_conf": 6.8515625, "calib/ece": 0.5777327935222673, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004048582995951417, "calib/gap": 0.007298534798534803, "calib/mean_conf": 0.05570850202429149, "calib/mu_c": 0.058397435897435894, "calib/mu_w": 0.05109890109890109, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.000931174089068826, "calib/std_conf": 0.06676598239708885, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2618.0, "completions/max_terminated_length": 2618.0, "completions/mean_length": 445.9609375, "completions/mean_terminated_length": 453.0397033691406, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.0928, "grad_norm": 0.07679977267980576, "learning_rate": 3.138888888888889e-06, "loss": 0.0397, "num_tokens": 18460673.0, "reward": 1.1927268505096436, "reward_std": 0.2760070264339447, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.41934531927108765, "rewards/format_reward_step_strict": 0.95703125, "step": 87 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.4337857021938802e-06, "aux_brier/mean_group_std": 0.06245186857284721, "aux_brier/mean_r": 0.9612910237050545, "aux_brier/n_active_tok": 230.625, "aux_brier/n_groups": 14.65625, "aux_brier/n_step_records": 57.65625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.576890945311998, "calib/avg_num_step_conf": 7.21484375, "calib/ece": 0.47091999999999995, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00030139451192083555, "calib/mean_conf": 0.06828000000000001, "calib/mu_c": 0.06842105263157895, "calib/mu_w": 0.06811965811965812, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0036000000000000003, "calib/std_conf": 0.06230763677110535, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2893.0, "completions/max_terminated_length": 2893.0, "completions/mean_length": 505.69140625, "completions/mean_terminated_length": 505.69140625, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.09386666666666667, "grad_norm": 0.006025264505296946, "learning_rate": 3.1111111111111116e-06, "loss": 0.0901, "num_tokens": 18699978.0, "reward": 1.1336658000946045, "reward_std": 0.24178946018218994, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5190386772155762, "rewards/format_reward_step_strict": 0.96875, "step": 88 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.287836486733699e-06, "aux_brier/mean_group_std": 0.062280394224279924, "aux_brier/mean_r": 0.9556622252107899, "aux_brier/n_active_tok": 206.25, "aux_brier/n_groups": 15.34375, "aux_brier/n_step_records": 51.5625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4993795715778474, "calib/avg_num_step_conf": 6.4609375, "calib/ece": 0.47518145161290326, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.003880616509926868, "calib/mean_conf": 0.059012096774193556, "calib/mu_c": 0.05719696969696969, "calib/mu_w": 0.06107758620689656, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.0009677419354838709, "calib/std_conf": 0.03091239988372433, "calib/step_conf_rate": 0.9609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2629.0, "completions/max_terminated_length": 2629.0, "completions/mean_length": 488.9765625, "completions/mean_terminated_length": 490.8941345214844, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.09493333333333333, "grad_norm": 0.006578805856406689, "learning_rate": 3.0833333333333336e-06, "loss": 0.0908, "num_tokens": 18934044.0, "reward": 1.1073076725006104, "reward_std": 0.2533990442752838, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.4917307496070862, "rewards/format_reward_step_strict": 0.9375, "step": 89 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.406338775022391e-07, "aux_brier/mean_group_std": 0.08968575593167973, "aux_brier/mean_r": 0.9227656757281388, "aux_brier/n_active_tok": 233.25, "aux_brier/n_groups": 15.5, "aux_brier/n_step_records": 58.3125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48957646127457455, "calib/avg_num_step_conf": 7.2890625, "calib/ece": 0.5078313253012049, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 8.840216387386024e-05, "calib/mean_conf": 0.06646586345381526, "calib/mu_c": 0.0665034965034965, "calib/mu_w": 0.06641509433962264, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03282483386447449, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2614.0, "completions/max_terminated_length": 2614.0, "completions/mean_length": 481.171875, "completions/mean_terminated_length": 481.171875, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.096, "grad_norm": 0.006786589976400137, "learning_rate": 3.055555555555556e-06, "loss": 0.0124, "num_tokens": 19160544.0, "reward": 1.165675401687622, "reward_std": 0.2502981722354889, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.48301443457603455, "rewards/format_reward_step_strict": 0.97265625, "step": 90 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.2981149173363349e-06, "aux_brier/mean_group_std": 0.05019438075960755, "aux_brier/mean_r": 0.970150317796923, "aux_brier/n_active_tok": 221.0, "aux_brier/n_groups": 14.6875, "aux_brier/n_step_records": 55.25, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.48671215074723845, "calib/avg_num_step_conf": 6.90625, "calib/ece": 0.472289156626506, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -6.042884990255237e-05, "calib/mean_conf": 0.06987951807228916, "calib/mu_c": 0.06985185185185185, "calib/mu_w": 0.0699122807017544, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.027249876425950875, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2833.0, "completions/max_terminated_length": 2833.0, "completions/mean_length": 488.1796875, "completions/mean_terminated_length": 490.0941467285156, "completions/min_length": 0.0, "completions/min_terminated_length": 203.0, "epoch": 0.09706666666666666, "grad_norm": 0.009234710596501827, "learning_rate": 3.0277777777777776e-06, "loss": 0.0621, "num_tokens": 19393230.0, "reward": 1.130971908569336, "reward_std": 0.26993629336357117, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5004504323005676, "rewards/format_reward_step_strict": 0.95703125, "step": 91 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.5199346654615198e-06, "aux_brier/mean_group_std": 0.0317642007873023, "aux_brier/mean_r": 0.9793554388894053, "aux_brier/n_active_tok": 214.0, "aux_brier/n_groups": 13.71875, "aux_brier/n_step_records": 53.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6035353535353535, "calib/avg_num_step_conf": 6.71875, "calib/ece": 0.5366798418972333, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013600288600288613, "calib/mean_conf": 0.07201581027667983, "calib/mu_c": 0.07733766233766234, "calib/mu_w": 0.06373737373737373, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0, "calib/std_conf": 0.04199905592571995, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1933.0, "completions/max_terminated_length": 1933.0, "completions/mean_length": 435.40234375, "completions/mean_terminated_length": 435.40234375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.09813333333333334, "grad_norm": 0.007359606679528952, "learning_rate": 3e-06, "loss": 0.009, "num_tokens": 19611413.0, "reward": 1.2064805030822754, "reward_std": 0.22201576828956604, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.46654728055000305, "rewards/format_reward_step_strict": 0.96875, "step": 92 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.9570681371039456e-06, "aux_brier/mean_group_std": 0.04964086610747109, "aux_brier/mean_r": 0.9658705880275835, "aux_brier/n_active_tok": 250.5, "aux_brier/n_groups": 16.71875, "aux_brier/n_step_records": 62.625, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.40741106719367587, "calib/avg_num_step_conf": 7.8359375, "calib/ece": 0.4591902834008097, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.004048582995951417, "calib/gap": -0.020566534914361, "calib/mean_conf": 0.08987854251012146, "calib/mu_c": 0.08030303030303032, "calib/mu_w": 0.10086956521739132, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0073279352226720655, "calib/std_conf": 0.0665243663701703, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2702.0, "completions/max_terminated_length": 2702.0, "completions/mean_length": 524.30078125, "completions/mean_terminated_length": 524.30078125, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.0992, "grad_norm": 0.005457157269120216, "learning_rate": 2.9722222222222225e-06, "loss": 0.04, "num_tokens": 19851410.0, "reward": 1.124093770980835, "reward_std": 0.25590553879737854, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5198125243186951, "rewards/format_reward_step_strict": 0.95703125, "step": 93 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.2300667046327707e-06, "aux_brier/mean_group_std": 0.06045114795667379, "aux_brier/mean_r": 0.9547563204393912, "aux_brier/n_active_tok": 214.75, "aux_brier/n_groups": 14.71875, "aux_brier/n_step_records": 53.6875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5974820143884892, "calib/avg_num_step_conf": 6.7109375, "calib/ece": 0.48429718875502015, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009275343361674293, "calib/mean_conf": 0.07899598393574297, "calib/mu_c": 0.08309352517985612, "calib/mu_w": 0.07381818181818182, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0025301204819277107, "calib/std_conf": 0.04541857960603195, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2859.0, "completions/max_terminated_length": 2859.0, "completions/mean_length": 497.46484375, "completions/mean_terminated_length": 497.46484375, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.10026666666666667, "grad_norm": 0.006252513267099857, "learning_rate": 2.944444444444445e-06, "loss": 0.0681, "num_tokens": 20087441.0, "reward": 1.1524341106414795, "reward_std": 0.21631355583667755, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5081738233566284, "rewards/format_reward_step_strict": 0.96484375, "step": 94 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.0933209314853993e-07, "aux_brier/mean_group_std": 0.07138223201401671, "aux_brier/mean_r": 0.9512899567899513, "aux_brier/n_active_tok": 218.75, "aux_brier/n_groups": 14.5, "aux_brier/n_step_records": 54.6875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5237669645812644, "calib/avg_num_step_conf": 6.8359375, "calib/ece": 0.5448425196850394, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003000993048659417, "calib/mean_conf": 0.08114173228346458, "calib/mu_c": 0.08226415094339624, "calib/mu_w": 0.07926315789473683, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.036064732791605464, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2831.0, "completions/max_terminated_length": 2831.0, "completions/mean_length": 467.0234375, "completions/mean_terminated_length": 467.0234375, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.10133333333333333, "grad_norm": 0.00672187888994813, "learning_rate": 2.916666666666667e-06, "loss": 0.0163, "num_tokens": 20313127.0, "reward": 1.228598952293396, "reward_std": 0.22712072730064392, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.46127110719680786, "rewards/format_reward_step_strict": 0.984375, "step": 95 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.233655969074988e-06, "aux_brier/mean_group_std": 0.06829223796434122, "aux_brier/mean_r": 0.947683284225488, "aux_brier/n_active_tok": 212.625, "aux_brier/n_groups": 13.375, "aux_brier/n_step_records": 53.15625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.48457300275482096, "calib/avg_num_step_conf": 6.64453125, "calib/ece": 0.5717391304347826, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005257575757575753, "calib/mean_conf": 0.08043478260869566, "calib/mu_c": 0.07860606060606061, "calib/mu_w": 0.08386363636363636, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.034242345667469226, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2602.0, "completions/max_terminated_length": 2602.0, "completions/mean_length": 431.0234375, "completions/mean_terminated_length": 431.0234375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.1024, "grad_norm": 0.006833321414887905, "learning_rate": 2.888888888888889e-06, "loss": 0.0386, "num_tokens": 20529285.0, "reward": 1.245131492614746, "reward_std": 0.22330452501773834, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.43365079164505005, "rewards/format_reward_step_strict": 0.984375, "step": 96 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.4035530246925276e-06, "aux_brier/mean_group_std": 0.034228858761962536, "aux_brier/mean_r": 0.9738376620580753, "aux_brier/n_active_tok": 231.5, "aux_brier/n_groups": 15.34375, "aux_brier/n_step_records": 57.875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5391290527654163, "calib/avg_num_step_conf": 7.23828125, "calib/ece": 0.4316733067729083, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0016357279084551896, "calib/mean_conf": 0.08944223107569722, "calib/mu_c": 0.09023076923076924, "calib/mu_w": 0.08859504132231405, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0015936254980079682, "calib/std_conf": 0.04328732469770397, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2969.0, "completions/max_terminated_length": 2969.0, "completions/mean_length": 476.63671875, "completions/mean_terminated_length": 476.63671875, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.10346666666666667, "grad_norm": 0.006699815392494202, "learning_rate": 2.861111111111111e-06, "loss": 0.0761, "num_tokens": 20756376.0, "reward": 1.1296970844268799, "reward_std": 0.26899388432502747, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5500386953353882, "rewards/format_reward_step_strict": 0.96875, "step": 97 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.5524656547279996e-07, "aux_brier/mean_group_std": 0.08662462380554938, "aux_brier/mean_r": 0.936917259113745, "aux_brier/n_active_tok": 211.875, "aux_brier/n_groups": 12.6875, "aux_brier/n_step_records": 52.96875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5064566115702479, "calib/avg_num_step_conf": 6.62109375, "calib/ece": 0.43232931726907625, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0014811466942148688, "calib/mean_conf": 0.08775100401606427, "calib/mu_c": 0.08703125000000002, "calib/mu_w": 0.08851239669421489, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0030120481927710845, "calib/std_conf": 0.03406470115021999, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2557.0, "completions/max_terminated_length": 2557.0, "completions/mean_length": 479.90625, "completions/mean_terminated_length": 479.90625, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 0.10453333333333334, "grad_norm": 0.006359133403748274, "learning_rate": 2.8333333333333335e-06, "loss": 0.0563, "num_tokens": 20985416.0, "reward": 1.1240954399108887, "reward_std": 0.2709805369377136, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5510691404342651, "rewards/format_reward_step_strict": 0.97265625, "step": 98 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.861180691844268e-07, "aux_brier/mean_group_std": 0.05330940022385636, "aux_brier/mean_r": 0.967148880055956, "aux_brier/n_active_tok": 247.5, "aux_brier/n_groups": 16.46875, "aux_brier/n_step_records": 61.875, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5039298245614036, "calib/avg_num_step_conf": 7.8828125, "calib/ece": 0.2914285714285714, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0027333333333333515, "calib/mean_conf": 0.0963265306122449, "calib/mu_c": 0.09800000000000002, "calib/mu_w": 0.09526666666666667, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.040329959368405346, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2812.0, "completions/max_terminated_length": 2812.0, "completions/mean_length": 539.828125, "completions/mean_terminated_length": 548.3968505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.1056, "grad_norm": 0.03724941611289978, "learning_rate": 2.805555555555556e-06, "loss": 0.0629, "num_tokens": 21229412.0, "reward": 1.0067994594573975, "reward_std": 0.3061428666114807, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.6443851590156555, "rewards/format_reward_step_strict": 0.94921875, "step": 99 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.1245468185410177e-06, "aux_brier/mean_group_std": 0.07750098171818107, "aux_brier/mean_r": 0.9422343609838857, "aux_brier/n_active_tok": 244.75, "aux_brier/n_groups": 18.90625, "aux_brier/n_step_records": 61.1875, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.557468220338983, "calib/avg_num_step_conf": 7.6484375, "calib/ece": 0.39268292682926825, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005009269067796598, "calib/mean_conf": 0.08951219512195123, "calib/mu_c": 0.0921186440677966, "calib/mu_w": 0.087109375, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0012601626016260164, "calib/std_conf": 0.034871509990285446, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3013.0, "completions/max_terminated_length": 3013.0, "completions/mean_length": 536.05859375, "completions/mean_terminated_length": 536.05859375, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.10666666666666667, "grad_norm": 0.006149076856672764, "learning_rate": 2.7777777777777783e-06, "loss": 0.2026, "num_tokens": 21474051.0, "reward": 1.0854196548461914, "reward_std": 0.27186161279678345, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5760538578033447, "rewards/format_reward_step_strict": 0.9609375, "step": 100 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.7394725093655694e-06, "aux_brier/mean_group_std": 0.04651070172327584, "aux_brier/mean_r": 0.9688452527490977, "aux_brier/n_active_tok": 271.375, "aux_brier/n_groups": 18.25, "aux_brier/n_step_records": 67.84375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5565034122842231, "calib/avg_num_step_conf": 8.5546875, "calib/ece": 0.3412550607287449, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 2.1410410812258696e-05, "calib/mean_conf": 0.10093117408906883, "calib/mu_c": 0.1009433962264151, "calib/mu_w": 0.10092198581560284, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006518218623481782, "calib/std_conf": 0.048486645348632076, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2964.0, "completions/max_terminated_length": 2964.0, "completions/mean_length": 529.94921875, "completions/mean_terminated_length": 532.0274658203125, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.10773333333333333, "grad_norm": 0.015220129862427711, "learning_rate": 2.7500000000000004e-06, "loss": 0.1063, "num_tokens": 21716710.0, "reward": 1.0520538091659546, "reward_std": 0.25686049461364746, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6222777366638184, "rewards/format_reward_step_strict": 0.96484375, "step": 101 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.7109194594411292e-07, "aux_brier/mean_group_std": 0.057648837572275936, "aux_brier/mean_r": 0.9555693066286415, "aux_brier/n_active_tok": 236.25, "aux_brier/n_groups": 15.1875, "aux_brier/n_step_records": 59.0625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.47846601390662685, "calib/avg_num_step_conf": 7.6953125, "calib/ece": 0.5576706827309238, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0022137079608344212, "calib/mean_conf": 0.0953413654618474, "calib/mu_c": 0.0945679012345679, "calib/mu_w": 0.09678160919540232, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0012048192771084336, "calib/std_conf": 0.038495604711073925, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2461.0, "completions/max_terminated_length": 2461.0, "completions/mean_length": 433.44140625, "completions/mean_terminated_length": 438.5810546875, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.1088, "grad_norm": 0.16762180626392365, "learning_rate": 2.7222222222222224e-06, "loss": -0.0229, "num_tokens": 21934367.0, "reward": 1.2294995784759521, "reward_std": 0.21669785678386688, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.44924843311309814, "rewards/format_reward_step_strict": 0.96875, "step": 102 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.797633503748287e-07, "aux_brier/mean_group_std": 0.05695505996071269, "aux_brier/mean_r": 0.961754498336844, "aux_brier/n_active_tok": 238.75, "aux_brier/n_groups": 14.90625, "aux_brier/n_step_records": 59.6875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.48091553836234685, "calib/avg_num_step_conf": 7.4609375, "calib/ece": 0.4811952191235059, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.011633784655061277, "calib/mean_conf": 0.09928286852589642, "calib/mu_c": 0.09418439716312055, "calib/mu_w": 0.10581818181818183, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009362549800796814, "calib/std_conf": 0.04089004380785392, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2426.0, "completions/max_terminated_length": 2426.0, "completions/mean_length": 501.71484375, "completions/mean_terminated_length": 505.66534423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.10986666666666667, "grad_norm": 0.01944098249077797, "learning_rate": 2.6944444444444444e-06, "loss": 0.0345, "num_tokens": 22167358.0, "reward": 1.1715490818023682, "reward_std": 0.21027979254722595, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.5221335887908936, "rewards/format_reward_step_strict": 0.98046875, "step": 103 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.3577439715594117e-06, "aux_brier/mean_group_std": 0.047835558453891036, "aux_brier/mean_r": 0.9681153416870975, "aux_brier/n_active_tok": 240.5, "aux_brier/n_groups": 14.0, "aux_brier/n_step_records": 60.125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.618741196055833, "calib/avg_num_step_conf": 7.62890625, "calib/ece": 0.35426294820717125, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013605455243949321, "calib/mean_conf": 0.10231075697211155, "calib/mu_c": 0.10973684210526319, "calib/mu_w": 0.09613138686131387, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.001195219123505976, "calib/std_conf": 0.041479595183155624, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2482.0, "completions/max_terminated_length": 2482.0, "completions/mean_length": 482.046875, "completions/mean_terminated_length": 483.9372863769531, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.11093333333333333, "grad_norm": 0.05234473571181297, "learning_rate": 2.666666666666667e-06, "loss": 0.0765, "num_tokens": 22397442.0, "reward": 1.0830793380737305, "reward_std": 0.25311779975891113, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.613567590713501, "rewards/format_reward_step_strict": 0.96875, "step": 104 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.8971290646763173e-06, "aux_brier/mean_group_std": 0.04680792486013509, "aux_brier/mean_r": 0.9673522841144343, "aux_brier/n_active_tok": 258.0, "aux_brier/n_groups": 17.8125, "aux_brier/n_step_records": 64.5, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.4296706989247312, "calib/avg_num_step_conf": 8.19921875, "calib/ece": 0.392172131147541, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.009276881720430105, "calib/mean_conf": 0.09963114754098362, "calib/mu_c": 0.09491666666666669, "calib/mu_w": 0.1041935483870968, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0401465962911567, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2904.0, "completions/max_terminated_length": 2904.0, "completions/mean_length": 526.13671875, "completions/mean_terminated_length": 528.2000122070312, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.112, "grad_norm": 0.008446063846349716, "learning_rate": 2.6388888888888893e-06, "loss": 0.1038, "num_tokens": 22637893.0, "reward": 1.0859030485153198, "reward_std": 0.3250919580459595, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5623620748519897, "rewards/format_reward_step_strict": 0.953125, "step": 105 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.0323029602531406e-06, "aux_brier/mean_group_std": 0.0635028179003855, "aux_brier/mean_r": 0.9553198005022946, "aux_brier/n_active_tok": 238.375, "aux_brier/n_groups": 14.75, "aux_brier/n_step_records": 59.59375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49169165417291355, "calib/avg_num_step_conf": 7.44921875, "calib/ece": 0.359251968503937, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0051586706646676445, "calib/mean_conf": 0.10271653543307087, "calib/mu_c": 0.0999137931034483, "calib/mu_w": 0.10507246376811595, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002637795275590551, "calib/std_conf": 0.041289528450326476, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3003.0, "completions/max_terminated_length": 3003.0, "completions/mean_length": 467.91796875, "completions/mean_terminated_length": 467.91796875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.11306666666666666, "grad_norm": 0.007161939051002264, "learning_rate": 2.6111111111111113e-06, "loss": 0.0688, "num_tokens": 22862264.0, "reward": 1.103581190109253, "reward_std": 0.21279966831207275, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6174496412277222, "rewards/format_reward_step_strict": 0.9921875, "step": 106 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.6140966701569965e-06, "aux_brier/mean_group_std": 0.05417005835351656, "aux_brier/mean_r": 0.9619935437258145, "aux_brier/n_active_tok": 244.375, "aux_brier/n_groups": 14.0, "aux_brier/n_step_records": 61.09375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5812308868501529, "calib/avg_num_step_conf": 7.63671875, "calib/ece": 0.4644268774703557, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010589959225280304, "calib/mean_conf": 0.10474308300395258, "calib/mu_c": 0.10930555555555554, "calib/mu_w": 0.09871559633027524, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.035302660907854076, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2515.0, "completions/max_terminated_length": 2515.0, "completions/mean_length": 463.1796875, "completions/mean_terminated_length": 464.99609375, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.11413333333333334, "grad_norm": 0.00802396610379219, "learning_rate": 2.5833333333333337e-06, "loss": 0.0051, "num_tokens": 23085454.0, "reward": 1.1886709928512573, "reward_std": 0.2658335566520691, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5359336137771606, "rewards/format_reward_step_strict": 0.984375, "step": 107 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.3157730937039e-06, "aux_brier/mean_group_std": 0.07684176567918097, "aux_brier/mean_r": 0.9511278745127235, "aux_brier/n_active_tok": 269.5, "aux_brier/n_groups": 17.90625, "aux_brier/n_step_records": 67.375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5467066295582673, "calib/avg_num_step_conf": 8.4296875, "calib/ece": 0.5717637795275591, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003560408192392797, "calib/mean_conf": 0.10933858267716537, "calib/mu_c": 0.11047398843930635, "calib/mu_w": 0.10691358024691355, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03944261664038043, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2982.0, "completions/max_terminated_length": 2982.0, "completions/mean_length": 507.61328125, "completions/mean_terminated_length": 507.61328125, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.1152, "grad_norm": 0.005916001740843058, "learning_rate": 2.5555555555555557e-06, "loss": -0.0125, "num_tokens": 23318635.0, "reward": 1.2800846099853516, "reward_std": 0.25666752457618713, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.44846364855766296, "rewards/format_reward_step_strict": 0.984375, "step": 108 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.2602374175885167e-06, "aux_brier/mean_group_std": 0.03728945680070545, "aux_brier/mean_r": 0.9738470179694128, "aux_brier/n_active_tok": 266.625, "aux_brier/n_groups": 18.40625, "aux_brier/n_step_records": 66.65625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4810134620269241, "calib/avg_num_step_conf": 8.4140625, "calib/ece": 0.4011418326693227, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0035152654305308367, "calib/mean_conf": 0.10483426294820719, "calib/mu_c": 0.10309763779527562, "calib/mu_w": 0.10661290322580645, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04087527844312951, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3026.0, "completions/max_terminated_length": 3026.0, "completions/mean_length": 490.609375, "completions/mean_terminated_length": 494.4724426269531, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.11626666666666667, "grad_norm": 0.020067334175109863, "learning_rate": 2.5277777777777778e-06, "loss": 0.0151, "num_tokens": 23548831.0, "reward": 1.1201258897781372, "reward_std": 0.1723223328590393, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5742534399032593, "rewards/format_reward_step_strict": 0.9609375, "step": 109 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.8512170134044048e-06, "aux_brier/mean_group_std": 0.0374497605813675, "aux_brier/mean_r": 0.9703611325446959, "aux_brier/n_active_tok": 232.125, "aux_brier/n_groups": 15.34375, "aux_brier/n_step_records": 58.03125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4751144164759725, "calib/avg_num_step_conf": 7.328125, "calib/ece": 0.3577777777777778, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006224256292906169, "calib/mean_conf": 0.09761904761904762, "calib/mu_c": 0.09421052631578948, "calib/mu_w": 0.10043478260869565, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0015079365079365076, "calib/std_conf": 0.03454761084309574, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2616.0, "completions/max_terminated_length": 2616.0, "completions/mean_length": 445.58203125, "completions/mean_terminated_length": 447.3294372558594, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.11733333333333333, "grad_norm": 0.008129195310175419, "learning_rate": 2.5e-06, "loss": 0.0431, "num_tokens": 23767820.0, "reward": 1.0906033515930176, "reward_std": 0.27544230222702026, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6124132871627808, "rewards/format_reward_step_strict": 0.984375, "step": 110 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.165272429631825e-06, "aux_brier/mean_group_std": 0.060017062456761895, "aux_brier/mean_r": 0.9527771586354886, "aux_brier/n_active_tok": 257.75, "aux_brier/n_groups": 18.09375, "aux_brier/n_step_records": 64.4375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.3436869959677419, "calib/avg_num_step_conf": 8.0546875, "calib/ece": 0.4167857142857143, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.02026965725806451, "calib/mean_conf": 0.10583333333333333, "calib/mu_c": 0.09585937500000002, "calib/mu_w": 0.11612903225806454, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00734126984126984, "calib/std_conf": 0.04434375996258152, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2782.0, "completions/max_terminated_length": 2782.0, "completions/mean_length": 504.375, "completions/mean_terminated_length": 504.375, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.1184, "grad_norm": 0.007123699877411127, "learning_rate": 2.4722222222222226e-06, "loss": 0.076, "num_tokens": 24004348.0, "reward": 1.1340057849884033, "reward_std": 0.23329788446426392, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5672730207443237, "rewards/format_reward_step_strict": 0.984375, "step": 111 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.877805763805256e-07, "aux_brier/mean_group_std": 0.08182111997655978, "aux_brier/mean_r": 0.935912907754222, "aux_brier/n_active_tok": 246.0, "aux_brier/n_groups": 16.0625, "aux_brier/n_step_records": 61.5, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.47402510683760685, "calib/avg_num_step_conf": 7.91015625, "calib/ece": 0.4161224489795919, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004250133547008539, "calib/mean_conf": 0.10632653061224491, "calib/mu_c": 0.10429687500000001, "calib/mu_w": 0.10854700854700855, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.03697179716922796, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 525.2421875, "completions/mean_terminated_length": 537.8480224609375, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.11946666666666667, "grad_norm": 0.009797528386116028, "learning_rate": 2.4444444444444447e-06, "loss": -0.0052, "num_tokens": 24246730.0, "reward": 1.1099687814712524, "reward_std": 0.2601466178894043, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5414378643035889, "rewards/format_reward_step_strict": 0.94921875, "step": 112 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.8922507378534874e-08, "aux_brier/mean_group_std": 0.07131799118428014, "aux_brier/mean_r": 0.9567913860997896, "aux_brier/n_active_tok": 249.5, "aux_brier/n_groups": 15.71875, "aux_brier/n_step_records": 62.375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.536098554784491, "calib/avg_num_step_conf": 7.9921875, "calib/ece": 0.44539682539682535, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005447252817215226, "calib/mean_conf": 0.1061904761904762, "calib/mu_c": 0.10863309352517984, "calib/mu_w": 0.10318584070796462, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04030355340703298, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2731.0, "completions/max_terminated_length": 2731.0, "completions/mean_length": 451.68359375, "completions/mean_terminated_length": 453.4549255371094, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.12053333333333334, "grad_norm": 0.03129152953624725, "learning_rate": 2.4166666666666667e-06, "loss": 0.0157, "num_tokens": 24467561.0, "reward": 1.1718251705169678, "reward_std": 0.26482635736465454, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5466758012771606, "rewards/format_reward_step_strict": 0.984375, "step": 113 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.0491426657165412e-06, "aux_brier/mean_group_std": 0.03986719759002069, "aux_brier/mean_r": 0.9700618380518691, "aux_brier/n_active_tok": 254.875, "aux_brier/n_groups": 16.75, "aux_brier/n_step_records": 63.71875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49836076322864076, "calib/avg_num_step_conf": 8.68359375, "calib/ece": 0.5009126984126984, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004325617992262845, "calib/mean_conf": 0.10107142857142858, "calib/mu_c": 0.09933774834437084, "calib/mu_w": 0.10366336633663369, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0013888888888888887, "calib/std_conf": 0.034469374161740174, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2687.0, "completions/max_terminated_length": 2687.0, "completions/mean_length": 451.46875, "completions/mean_terminated_length": 456.8221435546875, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.1216, "grad_norm": 0.044061869382858276, "learning_rate": 2.388888888888889e-06, "loss": 0.0078, "num_tokens": 24688161.0, "reward": 1.2050158977508545, "reward_std": 0.209433913230896, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.49975115060806274, "rewards/format_reward_step_strict": 0.98046875, "step": 114 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.494877079274981e-06, "aux_brier/mean_group_std": 0.04775773662434711, "aux_brier/mean_r": 0.967077989380165, "aux_brier/n_active_tok": 239.25, "aux_brier/n_groups": 13.21875, "aux_brier/n_step_records": 59.8125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5518178410794602, "calib/avg_num_step_conf": 7.53125, "calib/ece": 0.43562992125984246, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0017541229385307083, "calib/mean_conf": 0.10767716535433071, "calib/mu_c": 0.1084782608695652, "calib/mu_w": 0.1067241379310345, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.036978900536362214, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1323.0, "completions/max_terminated_length": 1323.0, "completions/mean_length": 447.9140625, "completions/mean_terminated_length": 449.6706237792969, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.12266666666666666, "grad_norm": 0.02731587551534176, "learning_rate": 2.361111111111111e-06, "loss": 0.0108, "num_tokens": 24908091.0, "reward": 1.1723220348358154, "reward_std": 0.26217222213745117, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5564754009246826, "rewards/format_reward_step_strict": 0.98828125, "step": 115 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.5188563396459926e-07, "aux_brier/mean_group_std": 0.032075458344831345, "aux_brier/mean_r": 0.9742124829395283, "aux_brier/n_active_tok": 253.375, "aux_brier/n_groups": 18.03125, "aux_brier/n_step_records": 63.34375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5113386742473188, "calib/avg_num_step_conf": 7.93359375, "calib/ece": 0.4633466135458168, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.008309859154929586, "calib/mean_conf": 0.11529880478087651, "calib/mu_c": 0.11169014084507044, "calib/mu_w": 0.12000000000000002, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006454183266932271, "calib/std_conf": 0.06931229028266792, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2718.0, "completions/max_terminated_length": 2718.0, "completions/mean_length": 525.6953125, "completions/mean_terminated_length": 529.8346557617188, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.12373333333333333, "grad_norm": 0.006843527778983116, "learning_rate": 2.3333333333333336e-06, "loss": 0.0429, "num_tokens": 25147189.0, "reward": 1.1779075860977173, "reward_std": 0.20672045648097992, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.5319429636001587, "rewards/format_reward_step_strict": 0.98046875, "step": 116 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.3370292073489054e-07, "aux_brier/mean_group_std": 0.03460002135801631, "aux_brier/mean_r": 0.973786716451061, "aux_brier/n_active_tok": 264.125, "aux_brier/n_groups": 15.25, "aux_brier/n_step_records": 66.03125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.44812527294279114, "calib/avg_num_step_conf": 8.37109375, "calib/ece": 0.3456692913385827, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.009207062199762917, "calib/mean_conf": 0.1188976377952756, "calib/mu_c": 0.11393162393162395, "calib/mu_w": 0.12313868613138687, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.001968503937007874, "calib/std_conf": 0.04697508715604077, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1738.0, "completions/max_terminated_length": 1738.0, "completions/mean_length": 473.359375, "completions/mean_terminated_length": 475.2157287597656, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.1248, "grad_norm": 0.04809485003352165, "learning_rate": 2.305555555555556e-06, "loss": 0.0031, "num_tokens": 25374969.0, "reward": 1.1069035530090332, "reward_std": 0.25470712780952454, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6229265928268433, "rewards/format_reward_step_strict": 0.98828125, "step": 117 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.5123708263509172e-07, "aux_brier/mean_group_std": 0.040068772319806684, "aux_brier/mean_r": 0.9707969090019554, "aux_brier/n_active_tok": 265.375, "aux_brier/n_groups": 15.25, "aux_brier/n_step_records": 66.34375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.3718217645563183, "calib/avg_num_step_conf": 8.41015625, "calib/ece": 0.4444047619047619, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.02241418764302064, "calib/mean_conf": 0.11869047619047621, "calib/mu_c": 0.10855072463768115, "calib/mu_w": 0.13096491228070178, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00773809523809524, "calib/std_conf": 0.04176026901286929, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2097.0, "completions/max_terminated_length": 2097.0, "completions/mean_length": 479.99609375, "completions/mean_terminated_length": 483.77557373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.12586666666666665, "grad_norm": 0.008244968950748444, "learning_rate": 2.277777777777778e-06, "loss": 0.02, "num_tokens": 25601856.0, "reward": 1.1679399013519287, "reward_std": 0.18270915746688843, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5467597246170044, "rewards/format_reward_step_strict": 0.984375, "step": 118 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 9.787899220015106e-07, "aux_brier/mean_group_std": 0.06009293258223417, "aux_brier/mean_r": 0.9545985269931022, "aux_brier/n_active_tok": 296.625, "aux_brier/n_groups": 20.0625, "aux_brier/n_step_records": 74.15625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4995509654243377, "calib/avg_num_step_conf": 9.26953125, "calib/ece": 0.40492, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.007665020206555892, "calib/mean_conf": 0.12716, "calib/mu_c": 0.12351145038167939, "calib/mu_w": 0.13117647058823528, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00404, "calib/std_conf": 0.04804512878534097, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2787.0, "completions/max_terminated_length": 2787.0, "completions/mean_length": 570.80859375, "completions/mean_terminated_length": 570.80859375, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.12693333333333334, "grad_norm": 0.0063473815098404884, "learning_rate": 2.25e-06, "loss": 0.1005, "num_tokens": 25853047.0, "reward": 1.1375501155853271, "reward_std": 0.2780386507511139, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5658254027366638, "rewards/format_reward_step_strict": 0.96875, "step": 119 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.697449643766618e-07, "aux_brier/mean_group_std": 0.07134942448460872, "aux_brier/mean_r": 0.9538399584539912, "aux_brier/n_active_tok": 260.5, "aux_brier/n_groups": 17.4375, "aux_brier/n_step_records": 65.125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.466510067114094, "calib/avg_num_step_conf": 8.140625, "calib/ece": 0.4835341365461847, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004579865771812103, "calib/mean_conf": 0.11485943775100402, "calib/mu_c": 0.1130201342281879, "calib/mu_w": 0.1176, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.034988241301939595, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2618.0, "completions/max_terminated_length": 2618.0, "completions/mean_length": 493.08984375, "completions/mean_terminated_length": 495.0235595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.128, "grad_norm": 0.009857230819761753, "learning_rate": 2.222222222222222e-06, "loss": 0.0882, "num_tokens": 26085966.0, "reward": 1.1971681118011475, "reward_std": 0.28364866971969604, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.507422685623169, "rewards/format_reward_step_strict": 0.96875, "step": 120 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.9463558026687764e-07, "aux_brier/mean_group_std": 0.04828574374391937, "aux_brier/mean_r": 0.963195378810623, "aux_brier/n_active_tok": 265.875, "aux_brier/n_groups": 16.3125, "aux_brier/n_step_records": 66.46875, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5067996077149395, "calib/avg_num_step_conf": 8.57421875, "calib/ece": 0.34354838709677415, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004456358287021872, "calib/mean_conf": 0.12612903225806454, "calib/mu_c": 0.12373913043478263, "calib/mu_w": 0.1281954887218045, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0029838709677419356, "calib/std_conf": 0.04520936381417936, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2846.0, "completions/max_terminated_length": 2846.0, "completions/mean_length": 549.9609375, "completions/mean_terminated_length": 556.4822387695312, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.12906666666666666, "grad_norm": 0.02184765599668026, "learning_rate": 2.1944444444444445e-06, "loss": 0.0568, "num_tokens": 26331812.0, "reward": 1.081617832183838, "reward_std": 0.28432029485702515, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6077219247817993, "rewards/format_reward_step_strict": 0.9609375, "step": 121 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.333253980666086e-07, "aux_brier/mean_group_std": 0.04972777550477182, "aux_brier/mean_r": 0.9600536819027194, "aux_brier/n_active_tok": 267.5, "aux_brier/n_groups": 18.34375, "aux_brier/n_step_records": 66.875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.53106489403712, "calib/avg_num_step_conf": 8.6796875, "calib/ece": 0.453293172690763, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 6.778991707252768e-05, "calib/mean_conf": 0.11947791164658635, "calib/mu_c": 0.11950704225352111, "calib/mu_w": 0.11943925233644859, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0012449799196787147, "calib/std_conf": 0.0331984496973194, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3028.0, "completions/max_terminated_length": 3028.0, "completions/mean_length": 481.359375, "completions/mean_terminated_length": 485.14959716796875, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.13013333333333332, "grad_norm": 0.012736966833472252, "learning_rate": 2.166666666666667e-06, "loss": 0.0392, "num_tokens": 26562384.0, "reward": 1.1788194179534912, "reward_std": 0.24727436900138855, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.5355902910232544, "rewards/format_reward_step_strict": 0.97265625, "step": 122 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -8.735640291690494e-07, "aux_brier/mean_group_std": 0.07085951931389238, "aux_brier/mean_r": 0.9459146764310977, "aux_brier/n_active_tok": 281.875, "aux_brier/n_groups": 18.0, "aux_brier/n_step_records": 70.46875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4719491035280509, "calib/avg_num_step_conf": 8.85546875, "calib/ece": 0.34584, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004656513077565705, "calib/mean_conf": 0.12752000000000002, "calib/mu_c": 0.12504273504273505, "calib/mu_w": 0.12969924812030076, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0026799999999999997, "calib/std_conf": 0.04530176155515368, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2819.0, "completions/max_terminated_length": 2819.0, "completions/mean_length": 558.65625, "completions/mean_terminated_length": 563.0551147460938, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.1312, "grad_norm": 0.02399447374045849, "learning_rate": 2.138888888888889e-06, "loss": -0.0337, "num_tokens": 26810688.0, "reward": 1.0906023979187012, "reward_std": 0.2960851788520813, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.604597270488739, "rewards/format_reward_step_strict": 0.96484375, "step": 123 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.1318501275114912e-07, "aux_brier/mean_group_std": 0.059568114978127334, "aux_brier/mean_r": 0.9554816319526316, "aux_brier/n_active_tok": 259.75, "aux_brier/n_groups": 16.46875, "aux_brier/n_step_records": 64.9375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4354725415070243, "calib/avg_num_step_conf": 8.34765625, "calib/ece": 0.4167330677290837, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.016418263090676885, "calib/mean_conf": 0.12677290836653388, "calib/mu_c": 0.11918518518518519, "calib/mu_w": 0.13560344827586207, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002828685258964143, "calib/std_conf": 0.044564562403536456, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2628.0, "completions/max_terminated_length": 2628.0, "completions/mean_length": 505.5625, "completions/mean_terminated_length": 509.5433044433594, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.13226666666666667, "grad_norm": 0.05152898281812668, "learning_rate": 2.1111111111111114e-06, "loss": 0.0187, "num_tokens": 27046928.0, "reward": 1.1520527601242065, "reward_std": 0.19784283638000488, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5535234212875366, "rewards/format_reward_step_strict": 0.97265625, "step": 124 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.2310242481802263e-07, "aux_brier/mean_group_std": 0.06881672184727762, "aux_brier/mean_r": 0.9462462088289638, "aux_brier/n_active_tok": 270.75, "aux_brier/n_groups": 18.46875, "aux_brier/n_step_records": 67.6875, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.43086204671570516, "calib/avg_num_step_conf": 8.55078125, "calib/ece": 0.39590240963855416, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01998019744483155, "calib/mean_conf": 0.12915783132530123, "calib/mu_c": 0.11928809523809523, "calib/mu_w": 0.1392682926829268, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00951807228915663, "calib/std_conf": 0.05250532530761328, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3014.0, "completions/max_terminated_length": 3014.0, "completions/mean_length": 546.67578125, "completions/mean_terminated_length": 548.8196411132812, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.13333333333333333, "grad_norm": 0.026436125859618187, "learning_rate": 2.0833333333333334e-06, "loss": 0.0631, "num_tokens": 27291685.0, "reward": 1.1183891296386719, "reward_std": 0.259613573551178, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5751187801361084, "rewards/format_reward_step_strict": 0.96484375, "step": 125 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.0745253512522979e-06, "aux_brier/mean_group_std": 0.06444103998597421, "aux_brier/mean_r": 0.9498170013024771, "aux_brier/n_active_tok": 279.5, "aux_brier/n_groups": 18.1875, "aux_brier/n_step_records": 69.875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5129921259842519, "calib/avg_num_step_conf": 8.78515625, "calib/ece": 0.3904858299595142, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.004048582995951417, "calib/gap": -0.008677165354330746, "calib/mean_conf": 0.13153846153846152, "calib/mu_c": 0.12732283464566926, "calib/mu_w": 0.136, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.003927125506072875, "calib/std_conf": 0.06646101364315203, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2899.0, "completions/max_terminated_length": 2899.0, "completions/mean_length": 535.74609375, "completions/mean_terminated_length": 539.9645385742188, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.1344, "grad_norm": 0.02762473188340664, "learning_rate": 2.0555555555555555e-06, "loss": 0.0681, "num_tokens": 27534300.0, "reward": 1.113364577293396, "reward_std": 0.27454179525375366, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5628331899642944, "rewards/format_reward_step_strict": 0.953125, "step": 126 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.333506252476635e-06, "aux_brier/mean_group_std": 0.0493533218266499, "aux_brier/mean_r": 0.9629633992998795, "aux_brier/n_active_tok": 270.625, "aux_brier/n_groups": 16.28125, "aux_brier/n_step_records": 67.65625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.43862990702479343, "calib/avg_num_step_conf": 8.5234375, "calib/ece": 0.387710843373494, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.007109374999999987, "calib/mean_conf": 0.12634538152610442, "calib/mu_c": 0.12289062500000002, "calib/mu_w": 0.13, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03911543503255671, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2598.0, "completions/max_terminated_length": 2598.0, "completions/mean_length": 523.8359375, "completions/mean_terminated_length": 525.8901977539062, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.13546666666666668, "grad_norm": 0.007691785227507353, "learning_rate": 2.027777777777778e-06, "loss": 0.075, "num_tokens": 27772074.0, "reward": 1.1309611797332764, "reward_std": 0.24632525444030762, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5785320401191711, "rewards/format_reward_step_strict": 0.97265625, "step": 127 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.216938798678015e-07, "aux_brier/mean_group_std": 0.06190802379627205, "aux_brier/mean_r": 0.9577295168062887, "aux_brier/n_active_tok": 237.5, "aux_brier/n_groups": 15.5, "aux_brier/n_step_records": 59.375, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.6191532258064515, "calib/avg_num_step_conf": 7.56640625, "calib/ece": 0.37377049180327876, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.019284946236559145, "calib/mean_conf": 0.11803278688524592, "calib/mu_c": 0.12783333333333335, "calib/mu_w": 0.10854838709677421, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.05938398724751983, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2934.0, "completions/max_terminated_length": 2934.0, "completions/mean_length": 515.67578125, "completions/mean_terminated_length": 523.8611450195312, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.13653333333333334, "grad_norm": 0.03205028548836708, "learning_rate": 2.0000000000000003e-06, "loss": 0.0908, "num_tokens": 28010751.0, "reward": 1.087334156036377, "reward_std": 0.29081636667251587, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5837117433547974, "rewards/format_reward_step_strict": 0.9453125, "step": 128 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.2448030372324013e-06, "aux_brier/mean_group_std": 0.07537504328134718, "aux_brier/mean_r": 0.9467429778290359, "aux_brier/n_active_tok": 258.375, "aux_brier/n_groups": 18.375, "aux_brier/n_step_records": 64.59375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.589994098747623, "calib/avg_num_step_conf": 8.1484375, "calib/ece": 0.47690476190476194, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005471116648088636, "calib/mean_conf": 0.12555555555555556, "calib/mu_c": 0.1277483443708609, "calib/mu_w": 0.12227722772277227, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0016269841269841267, "calib/std_conf": 0.04094238219268626, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2865.0, "completions/max_terminated_length": 2865.0, "completions/mean_length": 480.96484375, "completions/mean_terminated_length": 482.85101318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.1376, "grad_norm": 0.008966000750660896, "learning_rate": 1.9722222222222224e-06, "loss": 0.0879, "num_tokens": 28236262.0, "reward": 1.214047908782959, "reward_std": 0.2387232780456543, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.5280663967132568, "rewards/format_reward_step_strict": 0.984375, "step": 129 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.5649928951931855e-06, "aux_brier/mean_group_std": 0.05652320397212803, "aux_brier/mean_r": 0.9549481129361472, "aux_brier/n_active_tok": 242.5, "aux_brier/n_groups": 13.28125, "aux_brier/n_step_records": 60.625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4777851679414699, "calib/avg_num_step_conf": 7.578125, "calib/ece": 0.4907539682539682, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006510808114399719, "calib/mean_conf": 0.13011904761904763, "calib/mu_c": 0.12761290322580646, "calib/mu_w": 0.13412371134020618, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0028968253968253968, "calib/std_conf": 0.043365209364419705, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2437.0, "completions/max_terminated_length": 2437.0, "completions/mean_length": 464.40234375, "completions/mean_terminated_length": 464.40234375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.13866666666666666, "grad_norm": 0.0086931511759758, "learning_rate": 1.944444444444445e-06, "loss": 0.0319, "num_tokens": 28460437.0, "reward": 1.2212738990783691, "reward_std": 0.18101811408996582, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.5100957155227661, "rewards/format_reward_step_strict": 0.9765625, "step": 130 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.616686539114177e-07, "aux_brier/mean_group_std": 0.05609984517634906, "aux_brier/mean_r": 0.9611126952734949, "aux_brier/n_active_tok": 234.75, "aux_brier/n_groups": 13.34375, "aux_brier/n_step_records": 58.6875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5641874681609782, "calib/avg_num_step_conf": 7.4375, "calib/ece": 0.2830980392156862, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011147478349465098, "calib/mean_conf": 0.1247450980392157, "calib/mu_c": 0.13134615384615383, "calib/mu_w": 0.12019867549668874, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.042202055120135286, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1240.0, "completions/max_terminated_length": 1240.0, "completions/mean_length": 440.3359375, "completions/mean_terminated_length": 442.0627746582031, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.13973333333333332, "grad_norm": 0.012002948671579361, "learning_rate": 1.916666666666667e-06, "loss": 0.0208, "num_tokens": 28679371.0, "reward": 1.0712454319000244, "reward_std": 0.20326244831085205, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.6756066083908081, "rewards/format_reward_step_strict": 0.9921875, "step": 131 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.3460213122161733e-07, "aux_brier/mean_group_std": 0.09031914227052233, "aux_brier/mean_r": 0.9348492362155827, "aux_brier/n_active_tok": 263.125, "aux_brier/n_groups": 15.375, "aux_brier/n_step_records": 65.78125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4654584491033089, "calib/avg_num_step_conf": 8.2890625, "calib/ece": 0.43913725490196087, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0034737307400858897, "calib/mean_conf": 0.14125490196078433, "calib/mu_c": 0.1397972972972973, "calib/mu_w": 0.1432710280373832, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.05363633650444295, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1533.0, "completions/max_terminated_length": 1533.0, "completions/mean_length": 493.8046875, "completions/mean_terminated_length": 495.7412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.1408, "grad_norm": 0.05939352884888649, "learning_rate": 1.888888888888889e-06, "loss": -0.0142, "num_tokens": 28911377.0, "reward": 1.2067489624023438, "reward_std": 0.2673734426498413, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.5457457304000854, "rewards/format_reward_step_strict": 0.984375, "step": 132 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.0812276646232633e-06, "aux_brier/mean_group_std": 0.05434612503982542, "aux_brier/mean_r": 0.9616093050296004, "aux_brier/n_active_tok": 285.25, "aux_brier/n_groups": 16.90625, "aux_brier/n_step_records": 71.3125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.45672076570952974, "calib/avg_num_step_conf": 9.12109375, "calib/ece": 0.21940239043824702, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.010070744902205608, "calib/mean_conf": 0.14346613545816736, "calib/mu_c": 0.13696629213483144, "calib/mu_w": 0.14703703703703705, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004143426294820717, "calib/std_conf": 0.05152976404250724, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2577.0, "completions/max_terminated_length": 2577.0, "completions/mean_length": 579.0390625, "completions/mean_terminated_length": 581.309814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.14186666666666667, "grad_norm": 0.02066822350025177, "learning_rate": 1.8611111111111113e-06, "loss": 0.0487, "num_tokens": 29165955.0, "reward": 1.0142062902450562, "reward_std": 0.2862761914730072, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.7052628993988037, "rewards/format_reward_step_strict": 0.98046875, "step": 133 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.850857843827441e-07, "aux_brier/mean_group_std": 0.053333546563048284, "aux_brier/mean_r": 0.9595509305591835, "aux_brier/n_active_tok": 279.125, "aux_brier/n_groups": 16.96875, "aux_brier/n_step_records": 69.78125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5063343519225871, "calib/avg_num_step_conf": 8.91015625, "calib/ece": 0.33087649402390445, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00042653425006375367, "calib/mean_conf": 0.14641434262948208, "calib/mu_c": 0.14663865546218494, "calib/mu_w": 0.14621212121212118, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0015936254980079682, "calib/std_conf": 0.0563912382066595, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2657.0, "completions/max_terminated_length": 2657.0, "completions/mean_length": 564.0625, "completions/mean_terminated_length": 568.50390625, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.14293333333333333, "grad_norm": 0.03287725895643234, "learning_rate": 1.8333333333333333e-06, "loss": -0.0112, "num_tokens": 29419307.0, "reward": 1.1120322942733765, "reward_std": 0.29269343614578247, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6278167963027954, "rewards/format_reward_step_strict": 0.98046875, "step": 134 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.5976514672620965e-07, "aux_brier/mean_group_std": 0.027995954789728373, "aux_brier/mean_r": 0.9751566209296453, "aux_brier/n_active_tok": 270.375, "aux_brier/n_groups": 17.03125, "aux_brier/n_step_records": 67.59375, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5068783068783069, "calib/avg_num_step_conf": 8.72265625, "calib/ece": 0.38109756097560976, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0036071428571428366, "calib/mean_conf": 0.13109756097560976, "calib/mu_c": 0.13285714285714284, "calib/mu_w": 0.12925, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04556794527217112, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3023.0, "completions/max_terminated_length": 3023.0, "completions/mean_length": 534.2265625, "completions/mean_terminated_length": 540.561279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.144, "grad_norm": 0.026167117059230804, "learning_rate": 1.8055555555555557e-06, "loss": 0.0611, "num_tokens": 29661949.0, "reward": 1.115006685256958, "reward_std": 0.31411486864089966, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5772144794464111, "rewards/format_reward_step_strict": 0.95703125, "step": 135 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.419795309826505e-07, "aux_brier/mean_group_std": 0.03866174836164761, "aux_brier/mean_r": 0.9682558963961708, "aux_brier/n_active_tok": 283.0, "aux_brier/n_groups": 17.8125, "aux_brier/n_step_records": 70.75, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5845853658536586, "calib/avg_num_step_conf": 9.01171875, "calib/ece": 0.35963709677419353, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011139512195121881, "calib/mean_conf": 0.1443951612903226, "calib/mu_c": 0.14991999999999997, "calib/mu_w": 0.1387804878048781, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.05304856547699979, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3041.0, "completions/max_terminated_length": 3041.0, "completions/mean_length": 515.52734375, "completions/mean_terminated_length": 521.6403198242188, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.14506666666666668, "grad_norm": 0.09230382740497589, "learning_rate": 1.777777777777778e-06, "loss": 0.0336, "num_tokens": 29902412.0, "reward": 1.1248645782470703, "reward_std": 0.2411046326160431, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.6010206937789917, "rewards/format_reward_step_strict": 0.96484375, "step": 136 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.368074245948318e-07, "aux_brier/mean_group_std": 0.06382341056104424, "aux_brier/mean_r": 0.9450567836904806, "aux_brier/n_active_tok": 277.25, "aux_brier/n_groups": 17.03125, "aux_brier/n_step_records": 69.3125, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.537962962962963, "calib/avg_num_step_conf": 8.99609375, "calib/ece": 0.343739837398374, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004063492063492075, "calib/mean_conf": 0.14658536585365856, "calib/mu_c": 0.1486666666666667, "calib/mu_w": 0.14460317460317462, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0012601626016260164, "calib/std_conf": 0.04126711063290114, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2812.0, "completions/max_terminated_length": 2812.0, "completions/mean_length": 491.6484375, "completions/mean_terminated_length": 499.4524230957031, "completions/min_length": 0.0, "completions/min_terminated_length": 193.0, "epoch": 0.14613333333333334, "grad_norm": 0.07573703676462173, "learning_rate": 1.75e-06, "loss": 0.0318, "num_tokens": 30135258.0, "reward": 1.1015383005142212, "reward_std": 0.272910475730896, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.60927814245224, "rewards/format_reward_step_strict": 0.9609375, "step": 137 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.365847257019716e-07, "aux_brier/mean_group_std": 0.03673627551638543, "aux_brier/mean_r": 0.9593570070056029, "aux_brier/n_active_tok": 288.125, "aux_brier/n_groups": 20.25, "aux_brier/n_step_records": 72.03125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5379941284904759, "calib/avg_num_step_conf": 9.21875, "calib/ece": 0.46395161290322584, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005709018911722569, "calib/mean_conf": 0.14491935483870966, "calib/mu_c": 0.14715231788079472, "calib/mu_w": 0.14144329896907215, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04999993496353731, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2899.0, "completions/max_terminated_length": 2899.0, "completions/mean_length": 521.3984375, "completions/mean_terminated_length": 523.4431762695312, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.1472, "grad_norm": 0.009818018414080143, "learning_rate": 1.7222222222222224e-06, "loss": 0.0436, "num_tokens": 30373072.0, "reward": 1.2066519260406494, "reward_std": 0.2799428701400757, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.5297328233718872, "rewards/format_reward_step_strict": 0.96875, "step": 138 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.0452663158395694e-08, "aux_brier/mean_group_std": 0.05374665811390989, "aux_brier/mean_r": 0.9614916759713343, "aux_brier/n_active_tok": 250.375, "aux_brier/n_groups": 13.8125, "aux_brier/n_step_records": 62.59375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5967126124377143, "calib/avg_num_step_conf": 7.82421875, "calib/ece": 0.4651181102362204, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01449168446256388, "calib/mean_conf": 0.13724409448818897, "calib/mu_c": 0.1430065359477124, "calib/mu_w": 0.12851485148514852, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.04110055204630173, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2799.0, "completions/max_terminated_length": 2799.0, "completions/mean_length": 455.1171875, "completions/mean_terminated_length": 455.1171875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.14826666666666666, "grad_norm": 0.008437794633209705, "learning_rate": 1.6944444444444446e-06, "loss": 0.0459, "num_tokens": 30592678.0, "reward": 1.2212949991226196, "reward_std": 0.2543236017227173, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.5336176156997681, "rewards/format_reward_step_strict": 0.98046875, "step": 139 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.9473522832047507e-06, "aux_brier/mean_group_std": 0.042736197051407236, "aux_brier/mean_r": 0.9649691936762791, "aux_brier/n_active_tok": 235.125, "aux_brier/n_groups": 12.90625, "aux_brier/n_step_records": 58.78125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5880708929788684, "calib/avg_num_step_conf": 7.34765625, "calib/ece": 0.5006719367588933, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.018517382413087924, "calib/mean_conf": 0.14359683794466402, "calib/mu_c": 0.15018404907975458, "calib/mu_w": 0.13166666666666665, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.05844584255308957, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2557.0, "completions/max_terminated_length": 2557.0, "completions/mean_length": 492.02734375, "completions/mean_terminated_length": 492.02734375, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.14933333333333335, "grad_norm": 0.007128539029508829, "learning_rate": 1.6666666666666667e-06, "loss": 0.0045, "num_tokens": 30823653.0, "reward": 1.2577022314071655, "reward_std": 0.23854640126228333, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.5151835680007935, "rewards/format_reward_step_strict": 0.984375, "step": 140 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.102467689963031e-07, "aux_brier/mean_group_std": 0.06433482313202449, "aux_brier/mean_r": 0.9463781406931842, "aux_brier/n_active_tok": 245.0, "aux_brier/n_groups": 13.875, "aux_brier/n_step_records": 61.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5789543524416136, "calib/avg_num_step_conf": 7.66015625, "calib/ece": 0.4796442687747035, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004854697452229301, "calib/mean_conf": 0.14707509881422925, "calib/mu_c": 0.1489171974522293, "calib/mu_w": 0.1440625, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0030830039525691698, "calib/std_conf": 0.06790005364555529, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1896.0, "completions/max_terminated_length": 1896.0, "completions/mean_length": 504.4765625, "completions/mean_terminated_length": 506.4549255371094, "completions/min_length": 0.0, "completions/min_terminated_length": 191.0, "epoch": 0.1504, "grad_norm": 0.013903039507567883, "learning_rate": 1.638888888888889e-06, "loss": -0.0088, "num_tokens": 31059895.0, "reward": 1.2403523921966553, "reward_std": 0.23382900655269623, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.5317223072052002, "rewards/format_reward_step_strict": 0.98828125, "step": 141 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.0547654013190466e-06, "aux_brier/mean_group_std": 0.05228157119585481, "aux_brier/mean_r": 0.9612131276230842, "aux_brier/n_active_tok": 253.75, "aux_brier/n_groups": 13.5, "aux_brier/n_step_records": 63.4375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5245987654320988, "calib/avg_num_step_conf": 7.953125, "calib/ece": 0.33168627450980387, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0026759259259259427, "calib/mean_conf": 0.14666666666666667, "calib/mu_c": 0.14808333333333332, "calib/mu_w": 0.14540740740740737, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003882352941176471, "calib/std_conf": 0.0520507545166411, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1383.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 497.546875, "completions/mean_terminated_length": 499.4980773925781, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.15146666666666667, "grad_norm": 0.014494812116026878, "learning_rate": 1.6111111111111113e-06, "loss": 0.035, "num_tokens": 31292427.0, "reward": 1.1244139671325684, "reward_std": 0.20455797016620636, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6382808685302734, "rewards/format_reward_step_strict": 0.9921875, "step": 142 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 6.215460009867391e-07, "aux_brier/mean_group_std": 0.04666321619680647, "aux_brier/mean_r": 0.9616438497821584, "aux_brier/n_active_tok": 267.25, "aux_brier/n_groups": 17.0, "aux_brier/n_step_records": 66.8125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5603998158742685, "calib/avg_num_step_conf": 8.49609375, "calib/ece": 0.4161290322580645, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0043348457946998376, "calib/mean_conf": 0.1403225806451613, "calib/mu_c": 0.14226277372262774, "calib/mu_w": 0.1379279279279279, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0020161290322580645, "calib/std_conf": 0.05181286868223189, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3030.0, "completions/max_terminated_length": 3030.0, "completions/mean_length": 533.7109375, "completions/mean_terminated_length": 535.803955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.15253333333333333, "grad_norm": 0.012073071673512459, "learning_rate": 1.5833333333333333e-06, "loss": 0.0206, "num_tokens": 31536393.0, "reward": 1.1518467664718628, "reward_std": 0.21789219975471497, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.5605117082595825, "rewards/format_reward_step_strict": 0.953125, "step": 143 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.017924849440078e-08, "aux_brier/mean_group_std": 0.02846353088420098, "aux_brier/mean_r": 0.9751172320338265, "aux_brier/n_active_tok": 264.75, "aux_brier/n_groups": 15.4375, "aux_brier/n_step_records": 66.1875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5559006211180124, "calib/avg_num_step_conf": 8.3671875, "calib/ece": 0.49158730158730146, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007300525561395127, "calib/mean_conf": 0.1473015873015873, "calib/mu_c": 0.14993788819875778, "calib/mu_w": 0.14263736263736265, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.04959218925975856, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2553.0, "completions/max_terminated_length": 2553.0, "completions/mean_length": 507.02734375, "completions/mean_terminated_length": 509.0157165527344, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.1536, "grad_norm": 0.061257150024175644, "learning_rate": 1.5555555555555558e-06, "loss": 0.0094, "num_tokens": 31770320.0, "reward": 1.249025821685791, "reward_std": 0.22299563884735107, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.5195406675338745, "rewards/format_reward_step_strict": 0.98046875, "step": 144 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.059240721495991e-06, "aux_brier/mean_group_std": 0.06043944022073585, "aux_brier/mean_r": 0.9429568436991247, "aux_brier/n_active_tok": 275.5, "aux_brier/n_groups": 17.53125, "aux_brier/n_step_records": 68.875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4821592271348987, "calib/avg_num_step_conf": 8.61328125, "calib/ece": 0.48932000000000003, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0031175516536210146, "calib/mean_conf": 0.16268000000000002, "calib/mu_c": 0.16159509202453987, "calib/mu_w": 0.16471264367816088, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.05991007928554259, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2742.0, "completions/max_terminated_length": 2742.0, "completions/mean_length": 514.234375, "completions/mean_terminated_length": 514.234375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.15466666666666667, "grad_norm": 0.007433212827891111, "learning_rate": 1.527777777777778e-06, "loss": 0.0847, "num_tokens": 32004668.0, "reward": 1.2540688514709473, "reward_std": 0.26964327692985535, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.5162754058837891, "rewards/format_reward_step_strict": 0.9765625, "step": 145 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.627102511231776e-07, "aux_brier/mean_group_std": 0.027543769427134806, "aux_brier/mean_r": 0.9743984052820087, "aux_brier/n_active_tok": 253.125, "aux_brier/n_groups": 15.1875, "aux_brier/n_step_records": 63.28125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6105024927840462, "calib/avg_num_step_conf": 7.94140625, "calib/ece": 0.26549800796812756, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014973104172133322, "calib/mean_conf": 0.14486055776892431, "calib/mu_c": 0.15368932038834954, "calib/mu_w": 0.13871621621621621, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04644622129085265, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2520.0, "completions/max_terminated_length": 2520.0, "completions/mean_length": 501.9375, "completions/mean_terminated_length": 503.9059143066406, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.15573333333333333, "grad_norm": 0.024848168715834618, "learning_rate": 1.5e-06, "loss": 0.0479, "num_tokens": 32240380.0, "reward": 1.0565755367279053, "reward_std": 0.24422100186347961, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.6716148853302002, "rewards/format_reward_step_strict": 0.97265625, "step": 146 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.208359558639408e-07, "aux_brier/mean_group_std": 0.042396414436749914, "aux_brier/mean_r": 0.961728043766565, "aux_brier/n_active_tok": 262.25, "aux_brier/n_groups": 14.8125, "aux_brier/n_step_records": 65.5625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6199466056445461, "calib/avg_num_step_conf": 8.25, "calib/ece": 0.2928174603174603, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.018577421815408085, "calib/mean_conf": 0.15956349206349207, "calib/mu_c": 0.16973684210526316, "calib/mu_w": 0.15115942028985507, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0482574109830772, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2741.0, "completions/max_terminated_length": 2741.0, "completions/mean_length": 506.20703125, "completions/mean_terminated_length": 512.2095336914062, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.1568, "grad_norm": 0.01770801469683647, "learning_rate": 1.4722222222222225e-06, "loss": 0.0015, "num_tokens": 32473649.0, "reward": 1.0973799228668213, "reward_std": 0.2255202680826187, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6551449298858643, "rewards/format_reward_step_strict": 0.9765625, "step": 147 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.9145170915280119e-07, "aux_brier/mean_group_std": 0.04804261957314319, "aux_brier/mean_r": 0.9610529469897078, "aux_brier/n_active_tok": 250.25, "aux_brier/n_groups": 14.34375, "aux_brier/n_step_records": 62.5625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6382622368061253, "calib/avg_num_step_conf": 7.828125, "calib/ece": 0.4787649402390438, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01874624008750342, "calib/mean_conf": 0.1547011952191235, "calib/mu_c": 0.16157232704402513, "calib/mu_w": 0.14282608695652171, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.05005385902557692, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2682.0, "completions/max_terminated_length": 2682.0, "completions/mean_length": 496.4140625, "completions/mean_terminated_length": 498.3608093261719, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.15786666666666666, "grad_norm": 0.029627349227666855, "learning_rate": 1.4444444444444445e-06, "loss": 0.0217, "num_tokens": 32705843.0, "reward": 1.2390427589416504, "reward_std": 0.31594008207321167, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.5264832377433777, "rewards/format_reward_step_strict": 0.97265625, "step": 148 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.4705874335804197e-07, "aux_brier/mean_group_std": 0.05452732319415469, "aux_brier/mean_r": 0.9559296365957464, "aux_brier/n_active_tok": 282.625, "aux_brier/n_groups": 16.34375, "aux_brier/n_step_records": 70.65625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5492105933282403, "calib/avg_num_step_conf": 8.921875, "calib/ece": 0.3125498007968127, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008390628978864195, "calib/mean_conf": 0.16155378486055777, "calib/mu_c": 0.16596638655462181, "calib/mu_w": 0.15757575757575762, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.048650883463411516, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3023.0, "completions/max_terminated_length": 3023.0, "completions/mean_length": 572.5, "completions/mean_terminated_length": 574.7451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.15893333333333334, "grad_norm": 0.01007721945643425, "learning_rate": 1.4166666666666667e-06, "loss": 0.0684, "num_tokens": 32956859.0, "reward": 1.1155809164047241, "reward_std": 0.25350266695022583, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6420113444328308, "rewards/format_reward_step_strict": 0.98046875, "step": 149 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.075938205838625e-07, "aux_brier/mean_group_std": 0.06084920416650598, "aux_brier/mean_r": 0.9450912648550661, "aux_brier/n_active_tok": 255.0, "aux_brier/n_groups": 16.03125, "aux_brier/n_step_records": 63.75, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5419854947102268, "calib/avg_num_step_conf": 8.390625, "calib/ece": 0.38699186991869916, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005154035531306195, "calib/mean_conf": 0.1595121951219512, "calib/mu_c": 0.16187969924812032, "calib/mu_w": 0.15672566371681412, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0029268292682926834, "calib/std_conf": 0.05698172091070668, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2674.0, "completions/max_terminated_length": 2674.0, "completions/mean_length": 478.140625, "completions/mean_terminated_length": 483.810302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.16, "grad_norm": 0.04251770302653313, "learning_rate": 1.3888888888888892e-06, "loss": -0.0007, "num_tokens": 33184223.0, "reward": 1.1404184103012085, "reward_std": 0.24159850180149078, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5772988796234131, "rewards/format_reward_step_strict": 0.953125, "step": 150 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.0426429335602672e-06, "aux_brier/mean_group_std": 0.060192081828735935, "aux_brier/mean_r": 0.9468632518120125, "aux_brier/n_active_tok": 281.375, "aux_brier/n_groups": 19.21875, "aux_brier/n_step_records": 70.34375, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5349037823490378, "calib/avg_num_step_conf": 8.97265625, "calib/ece": 0.28433198380566804, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000797611147976135, "calib/mean_conf": 0.16101214574898787, "calib/mu_c": 0.1614545454545455, "calib/mu_w": 0.16065693430656935, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0, "calib/std_conf": 0.059527388435393944, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3053.0, "completions/max_terminated_length": 3053.0, "completions/mean_length": 566.61328125, "completions/mean_terminated_length": 571.0748291015625, "completions/min_length": 0.0, "completions/min_terminated_length": 177.0, "epoch": 0.16106666666666666, "grad_norm": 0.024479515850543976, "learning_rate": 1.3611111111111112e-06, "loss": 0.0408, "num_tokens": 33436300.0, "reward": 1.0677103996276855, "reward_std": 0.26120448112487793, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.6380289196968079, "rewards/format_reward_step_strict": 0.95703125, "step": 151 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.760061240756009e-07, "aux_brier/mean_group_std": 0.03143787467963727, "aux_brier/mean_r": 0.972342106949101, "aux_brier/n_active_tok": 260.25, "aux_brier/n_groups": 15.46875, "aux_brier/n_step_records": 65.0625, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5108595800524934, "calib/avg_num_step_conf": 8.171875, "calib/ece": 0.3569230769230769, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0024251968503936905, "calib/mean_conf": 0.15724696356275306, "calib/mu_c": 0.1584251968503937, "calib/mu_w": 0.156, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04700451934610316, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2543.0, "completions/max_terminated_length": 2543.0, "completions/mean_length": 495.3984375, "completions/mean_terminated_length": 499.2992248535156, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.16213333333333332, "grad_norm": 0.02353908307850361, "learning_rate": 1.3333333333333334e-06, "loss": 0.0081, "num_tokens": 33668514.0, "reward": 1.1226801872253418, "reward_std": 0.33606499433517456, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5922832489013672, "rewards/format_reward_step_strict": 0.95703125, "step": 152 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.107078837607304e-08, "aux_brier/mean_group_std": 0.06549836477067371, "aux_brier/mean_r": 0.9423625538808893, "aux_brier/n_active_tok": 251.375, "aux_brier/n_groups": 14.28125, "aux_brier/n_step_records": 62.84375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4867467581998475, "calib/avg_num_step_conf": 7.9609375, "calib/ece": 0.3948809523809523, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.008867276887871839, "calib/mean_conf": 0.1632142857142857, "calib/mu_c": 0.15920289855072464, "calib/mu_w": 0.16807017543859648, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.005238095238095236, "calib/std_conf": 0.06122740686550364, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2310.0, "completions/max_terminated_length": 2310.0, "completions/mean_length": 514.62890625, "completions/mean_terminated_length": 516.6470947265625, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.1632, "grad_norm": 0.027405643835663795, "learning_rate": 1.3055555555555556e-06, "loss": 0.0451, "num_tokens": 33907579.0, "reward": 1.175753116607666, "reward_std": 0.24725493788719177, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5858250260353088, "rewards/format_reward_step_strict": 0.98046875, "step": 153 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.40159171024834e-06, "aux_brier/mean_group_std": 0.049404520346966, "aux_brier/mean_r": 0.9571009186646207, "aux_brier/n_active_tok": 245.375, "aux_brier/n_groups": 12.875, "aux_brier/n_step_records": 61.34375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5975159554498811, "calib/avg_num_step_conf": 7.94140625, "calib/ece": 0.32177865612648215, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.015465523714178503, "calib/mean_conf": 0.16043478260869568, "calib/mu_c": 0.16844262295081971, "calib/mu_w": 0.1529770992366412, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0448472230430833, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1195.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 457.48046875, "completions/mean_terminated_length": 461.0826721191406, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.16426666666666667, "grad_norm": 0.029376503080129623, "learning_rate": 1.2777777777777779e-06, "loss": -0.0002, "num_tokens": 34129134.0, "reward": 1.1298280954360962, "reward_std": 0.2767148017883301, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6443125009536743, "rewards/format_reward_step_strict": 0.984375, "step": 154 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.8444394972449984e-07, "aux_brier/mean_group_std": 0.06876151737155475, "aux_brier/mean_r": 0.9484107735269428, "aux_brier/n_active_tok": 236.875, "aux_brier/n_groups": 12.6875, "aux_brier/n_step_records": 59.21875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6465105779153766, "calib/avg_num_step_conf": 7.46484375, "calib/ece": 0.3039599999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.032853457172342615, "calib/mean_conf": 0.15204, "calib/mu_c": 0.16991228070175438, "calib/mu_w": 0.13705882352941176, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.06402060918173147, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1101.0, "completions/max_terminated_length": 1101.0, "completions/mean_length": 443.41796875, "completions/mean_terminated_length": 445.1568908691406, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.16533333333333333, "grad_norm": 0.012148459441959858, "learning_rate": 1.25e-06, "loss": -0.0043, "num_tokens": 34349865.0, "reward": 1.1015002727508545, "reward_std": 0.23070210218429565, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6560012102127075, "rewards/format_reward_step_strict": 0.9765625, "step": 155 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.743661991517655e-08, "aux_brier/mean_group_std": 0.04543403264020979, "aux_brier/mean_r": 0.9614790204493904, "aux_brier/n_active_tok": 259.75, "aux_brier/n_groups": 14.40625, "aux_brier/n_step_records": 64.9375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5598499061913697, "calib/avg_num_step_conf": 8.21484375, "calib/ece": 0.3351383399209486, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01092432770481555, "calib/mean_conf": 0.15869565217391307, "calib/mu_c": 0.16430894308943092, "calib/mu_w": 0.15338461538461537, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0038339920948616594, "calib/std_conf": 0.05606565762823203, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2092.0, "completions/max_terminated_length": 2092.0, "completions/mean_length": 496.07421875, "completions/mean_terminated_length": 499.9803161621094, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.1664, "grad_norm": 0.016778623685240746, "learning_rate": 1.2222222222222223e-06, "loss": -0.0275, "num_tokens": 34581620.0, "reward": 1.1311582326889038, "reward_std": 0.2194337248802185, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6340078115463257, "rewards/format_reward_step_strict": 0.984375, "step": 156 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.2148936446298997e-06, "aux_brier/mean_group_std": 0.07013445319637046, "aux_brier/mean_r": 0.9377018502358946, "aux_brier/n_active_tok": 285.875, "aux_brier/n_groups": 18.96875, "aux_brier/n_step_records": 71.46875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5923857216230097, "calib/avg_num_step_conf": 9.10546875, "calib/ece": 0.36272, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.008, "calib/gap": 0.016565228556753964, "calib/mean_conf": 0.17256, "calib/mu_c": 0.1803787878787879, "calib/mu_w": 0.16381355932203392, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0036399999999999996, "calib/std_conf": 0.08917312599656917, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2753.0, "completions/max_terminated_length": 2753.0, "completions/mean_length": 517.30078125, "completions/mean_terminated_length": 521.3740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 129.0, "epoch": 0.16746666666666668, "grad_norm": 0.012949691154062748, "learning_rate": 1.1944444444444446e-06, "loss": 0.0313, "num_tokens": 34817777.0, "reward": 1.1535099744796753, "reward_std": 0.25031453371047974, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.6062273979187012, "rewards/format_reward_step_strict": 0.97265625, "step": 157 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.4456085095648774e-07, "aux_brier/mean_group_std": 0.049263813028915936, "aux_brier/mean_r": 0.9545683363001688, "aux_brier/n_active_tok": 250.375, "aux_brier/n_groups": 13.6875, "aux_brier/n_step_records": 62.59375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5115191017036654, "calib/avg_num_step_conf": 8.00390625, "calib/ece": 0.4301185770750989, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010886680433660756, "calib/mean_conf": 0.15881422924901187, "calib/mu_c": 0.15926174496644296, "calib/mu_w": 0.1581730769230769, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.05077440879067236, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2420.0, "completions/max_terminated_length": 2420.0, "completions/mean_length": 466.625, "completions/mean_terminated_length": 468.4549255371094, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.16853333333333334, "grad_norm": 0.01752115599811077, "learning_rate": 1.1666666666666668e-06, "loss": 0.0593, "num_tokens": 35042473.0, "reward": 1.2172133922576904, "reward_std": 0.2381628155708313, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.5641664266586304, "rewards/format_reward_step_strict": 0.98828125, "step": 158 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.8741938578870077e-07, "aux_brier/mean_group_std": 0.0356345215848251, "aux_brier/mean_r": 0.9655458395128108, "aux_brier/n_active_tok": 270.25, "aux_brier/n_groups": 17.3125, "aux_brier/n_step_records": 67.5625, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4752791068580542, "calib/avg_num_step_conf": 8.69140625, "calib/ece": 0.39800813008130076, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.007448165869218509, "calib/mean_conf": 0.1445121951219512, "calib/mu_c": 0.14106060606060605, "calib/mu_w": 0.14850877192982456, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0029674796747967483, "calib/std_conf": 0.05210571896481207, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3057.0, "completions/max_terminated_length": 3057.0, "completions/mean_length": 508.8125, "completions/mean_terminated_length": 510.807861328125, "completions/min_length": 0.0, "completions/min_terminated_length": 176.0, "epoch": 0.1696, "grad_norm": 0.017778778448700905, "learning_rate": 1.138888888888889e-06, "loss": 0.1077, "num_tokens": 35277513.0, "reward": 1.1381198167800903, "reward_std": 0.29481813311576843, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5681042671203613, "rewards/format_reward_step_strict": 0.9609375, "step": 159 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.1538541261634236e-07, "aux_brier/mean_group_std": 0.03308633600277228, "aux_brier/mean_r": 0.9712904034975596, "aux_brier/n_active_tok": 282.5, "aux_brier/n_groups": 16.03125, "aux_brier/n_step_records": 70.625, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5847333333333333, "calib/avg_num_step_conf": 9.5625, "calib/ece": 0.36269387755102034, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009526666666666628, "calib/mean_conf": 0.1586938775510204, "calib/mu_c": 0.16335999999999998, "calib/mu_w": 0.15383333333333335, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005591836734693879, "calib/std_conf": 0.05835786993134501, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2456.0, "completions/max_terminated_length": 2456.0, "completions/mean_length": 486.5390625, "completions/mean_terminated_length": 498.2160339355469, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.17066666666666666, "grad_norm": 0.0668088048696518, "learning_rate": 1.111111111111111e-06, "loss": -0.0019, "num_tokens": 35506907.0, "reward": 1.1119287014007568, "reward_std": 0.29088109731674194, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5961523652076721, "rewards/format_reward_step_strict": 0.94921875, "step": 160 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.257408504042658e-07, "aux_brier/mean_group_std": 0.060142646147707056, "aux_brier/mean_r": 0.9541991361035498, "aux_brier/n_active_tok": 252.25, "aux_brier/n_groups": 14.59375, "aux_brier/n_step_records": 63.0625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5834820101100209, "calib/avg_num_step_conf": 7.9921875, "calib/ece": 0.5429249011857709, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009742789176330674, "calib/mean_conf": 0.16023715415019762, "calib/mu_c": 0.16316384180790963, "calib/mu_w": 0.15342105263157896, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0017786561264822134, "calib/std_conf": 0.05992042716721184, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2286.0, "completions/max_terminated_length": 2286.0, "completions/mean_length": 474.49609375, "completions/mean_terminated_length": 476.3569030761719, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.17173333333333332, "grad_norm": 0.012965511530637741, "learning_rate": 1.0833333333333335e-06, "loss": 0.0217, "num_tokens": 35732298.0, "reward": 1.3089410066604614, "reward_std": 0.20046359300613403, "rewards/accuracy_reward_step": 0.69140625, "rewards/final_brier_reward_step": 0.4935765564441681, "rewards/format_reward_step_strict": 0.98828125, "step": 161 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.7168893418073594e-07, "aux_brier/mean_group_std": 0.04964035382205659, "aux_brier/mean_r": 0.9612997269895767, "aux_brier/n_active_tok": 244.25, "aux_brier/n_groups": 12.75, "aux_brier/n_step_records": 61.0625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49456446475557403, "calib/avg_num_step_conf": 7.9375, "calib/ece": 0.5091304347826087, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004753496745603103, "calib/mean_conf": 0.14699604743083003, "calib/mu_c": 0.14536144578313254, "calib/mu_w": 0.15011494252873564, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04871133801057367, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2237.0, "completions/max_terminated_length": 2237.0, "completions/mean_length": 468.48046875, "completions/mean_terminated_length": 470.31768798828125, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.1728, "grad_norm": 0.034487757831811905, "learning_rate": 1.0555555555555557e-06, "loss": -0.0076, "num_tokens": 35956373.0, "reward": 1.2667899131774902, "reward_std": 0.23064537346363068, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.5046597719192505, "rewards/format_reward_step_strict": 0.984375, "step": 162 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.471214591905692e-07, "aux_brier/mean_group_std": 0.05555076010570242, "aux_brier/mean_r": 0.9525882588155883, "aux_brier/n_active_tok": 308.0, "aux_brier/n_groups": 22.03125, "aux_brier/n_step_records": 77.0, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.518890881147541, "calib/avg_num_step_conf": 9.6484375, "calib/ece": 0.35147999999999996, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0071426741803278515, "calib/mean_conf": 0.16627999999999998, "calib/mu_c": 0.169765625, "calib/mu_w": 0.16262295081967215, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0028799999999999997, "calib/std_conf": 0.06980087105473684, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3003.0, "completions/max_terminated_length": 3003.0, "completions/mean_length": 577.58984375, "completions/mean_terminated_length": 579.8549194335938, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.17386666666666667, "grad_norm": 0.08797299116849899, "learning_rate": 1.0277777777777777e-06, "loss": 0.1231, "num_tokens": 36209068.0, "reward": 1.1419235467910767, "reward_std": 0.2853378355503082, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6145691871643066, "rewards/format_reward_step_strict": 0.9765625, "step": 163 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.9866855716088594e-07, "aux_brier/mean_group_std": 0.03667347612396784, "aux_brier/mean_r": 0.9707665289826557, "aux_brier/n_active_tok": 275.25, "aux_brier/n_groups": 15.625, "aux_brier/n_step_records": 68.8125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5839753466872111, "calib/avg_num_step_conf": 8.73046875, "calib/ece": 0.3771599999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012505136106830989, "calib/mean_conf": 0.15084, "calib/mu_c": 0.15674242424242424, "calib/mu_w": 0.14423728813559325, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.05185069334155523, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2610.0, "completions/max_terminated_length": 2610.0, "completions/mean_length": 547.1640625, "completions/mean_terminated_length": 551.472412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.17493333333333333, "grad_norm": 0.057448893785476685, "learning_rate": 1.0000000000000002e-06, "loss": 0.0145, "num_tokens": 36455278.0, "reward": 1.1483054161071777, "reward_std": 0.26751455664634705, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5932214856147766, "rewards/format_reward_step_strict": 0.96875, "step": 164 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.2906746038598982e-07, "aux_brier/mean_group_std": 0.03551182583994825, "aux_brier/mean_r": 0.9717435124993495, "aux_brier/n_active_tok": 271.5, "aux_brier/n_groups": 16.6875, "aux_brier/n_step_records": 67.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5469710272168569, "calib/avg_num_step_conf": 8.51953125, "calib/ece": 0.3275494071146245, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.000681048538818535, "calib/mean_conf": 0.15972332015810276, "calib/mu_c": 0.1600840336134454, "calib/mu_w": 0.15940298507462686, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.008458498023715413, "calib/std_conf": 0.07853602764329508, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1706.0, "completions/max_terminated_length": 1706.0, "completions/mean_length": 508.46875, "completions/mean_terminated_length": 510.4627685546875, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.176, "grad_norm": 0.050327152013778687, "learning_rate": 9.722222222222224e-07, "loss": -0.0101, "num_tokens": 36691022.0, "reward": 1.123129963874817, "reward_std": 0.2538028359413147, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6409574151039124, "rewards/format_reward_step_strict": 0.98828125, "step": 165 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.193520997120004e-08, "aux_brier/mean_group_std": 0.05831487297588019, "aux_brier/mean_r": 0.9546354778008198, "aux_brier/n_active_tok": 282.5, "aux_brier/n_groups": 17.5, "aux_brier/n_step_records": 70.625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5058467741935484, "calib/avg_num_step_conf": 8.90234375, "calib/ece": 0.4602390438247012, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0003360215053763438, "calib/mean_conf": 0.15729083665338647, "calib/mu_c": 0.15741935483870967, "calib/mu_w": 0.15708333333333332, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.052162700146282756, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2893.0, "completions/max_terminated_length": 2893.0, "completions/mean_length": 551.71484375, "completions/mean_terminated_length": 553.8784790039062, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.17706666666666668, "grad_norm": 0.03957146778702736, "learning_rate": 9.444444444444445e-07, "loss": 0.0656, "num_tokens": 36938445.0, "reward": 1.2245346307754517, "reward_std": 0.27831125259399414, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.5309507846832275, "rewards/format_reward_step_strict": 0.97265625, "step": 166 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.390739093695917e-07, "aux_brier/mean_group_std": 0.058610101023658656, "aux_brier/mean_r": 0.9450262725855368, "aux_brier/n_active_tok": 281.75, "aux_brier/n_groups": 19.03125, "aux_brier/n_step_records": 70.4375, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.6231026785714285, "calib/avg_num_step_conf": 8.90234375, "calib/ece": 0.5113709677419356, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01963095238095236, "calib/mean_conf": 0.1660483870967742, "calib/mu_c": 0.17238095238095238, "calib/mu_w": 0.15275000000000002, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.061445209640143016, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2698.0, "completions/max_terminated_length": 2698.0, "completions/mean_length": 531.41015625, "completions/mean_terminated_length": 539.8452758789062, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.17813333333333334, "grad_norm": 0.06966294348239899, "learning_rate": 9.166666666666666e-07, "loss": -0.018, "num_tokens": 37180094.0, "reward": 1.259904146194458, "reward_std": 0.23908957839012146, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.5005543231964111, "rewards/format_reward_step_strict": 0.95703125, "step": 167 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.1016560679265606e-08, "aux_brier/mean_group_std": 0.07045650564183857, "aux_brier/mean_r": 0.9434491784728122, "aux_brier/n_active_tok": 259.75, "aux_brier/n_groups": 13.59375, "aux_brier/n_step_records": 64.9375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.42066115702479334, "calib/avg_num_step_conf": 8.31640625, "calib/ece": 0.4157312252964427, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.015405594405594386, "calib/mean_conf": 0.15501976284584978, "calib/mu_c": 0.14832167832167834, "calib/mu_w": 0.16372727272727272, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0027667984189723317, "calib/std_conf": 0.046127718433160075, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3038.0, "completions/max_terminated_length": 3038.0, "completions/mean_length": 534.515625, "completions/mean_terminated_length": 536.61181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.1792, "grad_norm": 0.0411788746714592, "learning_rate": 8.88888888888889e-07, "loss": 0.0105, "num_tokens": 37421602.0, "reward": 1.1951189041137695, "reward_std": 0.2917238175868988, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.5695382952690125, "rewards/format_reward_step_strict": 0.98828125, "step": 168 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.2797146142773386e-07, "aux_brier/mean_group_std": 0.0398169424234487, "aux_brier/mean_r": 0.9645761018675307, "aux_brier/n_active_tok": 259.125, "aux_brier/n_groups": 14.90625, "aux_brier/n_step_records": 64.78125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5461334845906598, "calib/avg_num_step_conf": 8.22265625, "calib/ece": 0.3644444444444445, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007585554925316679, "calib/mean_conf": 0.14746031746031749, "calib/mu_c": 0.1511627906976744, "calib/mu_w": 0.14357723577235773, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.05206736338684675, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3066.0, "completions/max_terminated_length": 3066.0, "completions/mean_length": 527.26953125, "completions/mean_terminated_length": 529.3372802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.18026666666666666, "grad_norm": 0.024509698152542114, "learning_rate": 8.611111111111112e-07, "loss": 0.0458, "num_tokens": 37660767.0, "reward": 1.1482784748077393, "reward_std": 0.23623031377792358, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6087390780448914, "rewards/format_reward_step_strict": 0.984375, "step": 169 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.465195546729753e-07, "aux_brier/mean_group_std": 0.04840349667762759, "aux_brier/mean_r": 0.9621910303766671, "aux_brier/n_active_tok": 280.25, "aux_brier/n_groups": 14.96875, "aux_brier/n_step_records": 70.0625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5712414542201776, "calib/avg_num_step_conf": 8.7578125, "calib/ece": 0.39841269841269844, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010841479777649987, "calib/mean_conf": 0.1611111111111111, "calib/mu_c": 0.16588652482269503, "calib/mu_w": 0.15504504504504504, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.05118007769370506, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2377.0, "completions/max_terminated_length": 2377.0, "completions/mean_length": 535.265625, "completions/mean_terminated_length": 537.36474609375, "completions/min_length": 0.0, "completions/min_terminated_length": 191.0, "epoch": 0.18133333333333335, "grad_norm": 0.056401584297418594, "learning_rate": 8.333333333333333e-07, "loss": 0.0319, "num_tokens": 37901947.0, "reward": 1.1900184154510498, "reward_std": 0.23777510225772858, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.5881984233856201, "rewards/format_reward_step_strict": 0.984375, "step": 170 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.491154891786195e-07, "aux_brier/mean_group_std": 0.06174421707246917, "aux_brier/mean_r": 0.9496703379452144, "aux_brier/n_active_tok": 265.0, "aux_brier/n_groups": 14.125, "aux_brier/n_step_records": 66.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5812636165577342, "calib/avg_num_step_conf": 8.38671875, "calib/ece": 0.31582677165354334, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0152007469654529, "calib/mean_conf": 0.1526771653543307, "calib/mu_c": 0.16075630252100845, "calib/mu_w": 0.14555555555555555, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.05233069918321514, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1124.0, "completions/max_terminated_length": 1124.0, "completions/mean_length": 494.9375, "completions/mean_terminated_length": 496.8784484863281, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.1824, "grad_norm": 0.04185402765870094, "learning_rate": 8.055555555555557e-07, "loss": -0.0002, "num_tokens": 38135547.0, "reward": 1.1236753463745117, "reward_std": 0.27767056226730347, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6509515047073364, "rewards/format_reward_step_strict": 0.9921875, "step": 171 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.3899531909641354e-07, "aux_brier/mean_group_std": 0.05103582146149051, "aux_brier/mean_r": 0.9617057692948487, "aux_brier/n_active_tok": 267.125, "aux_brier/n_groups": 15.5625, "aux_brier/n_step_records": 66.78125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5143003064351379, "calib/avg_num_step_conf": 8.44921875, "calib/ece": 0.5061417322834646, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0026830098740211195, "calib/mean_conf": 0.14881889763779527, "calib/mu_c": 0.14787878787878786, "calib/mu_w": 0.15056179775280898, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0026771653543307085, "calib/std_conf": 0.05601763183097044, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1908.0, "completions/max_terminated_length": 1908.0, "completions/mean_length": 495.859375, "completions/mean_terminated_length": 497.803955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.18346666666666667, "grad_norm": 0.0242488831281662, "learning_rate": 7.777777777777779e-07, "loss": -0.0376, "num_tokens": 38365839.0, "reward": 1.2689235210418701, "reward_std": 0.22546443343162537, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.5131937265396118, "rewards/format_reward_step_strict": 0.9921875, "step": 172 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.995026681164916e-07, "aux_brier/mean_group_std": 0.059560432252318406, "aux_brier/mean_r": 0.9545917724846392, "aux_brier/n_active_tok": 280.75, "aux_brier/n_groups": 18.46875, "aux_brier/n_step_records": 70.1875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6645595023078467, "calib/avg_num_step_conf": 8.7734375, "calib/ece": 0.44155999999999995, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": 0.03430931834905343, "calib/mean_conf": 0.16244, "calib/mu_c": 0.17602649006622514, "calib/mu_w": 0.1417171717171717, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.07389753987786062, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2756.0, "completions/max_terminated_length": 2756.0, "completions/mean_length": 537.3359375, "completions/mean_terminated_length": 539.4431762695312, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.18453333333333333, "grad_norm": 0.057891733944416046, "learning_rate": 7.5e-07, "loss": 0.0231, "num_tokens": 38606557.0, "reward": 1.222849726676941, "reward_std": 0.2555616497993469, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.5632737874984741, "rewards/format_reward_step_strict": 0.9765625, "step": 173 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 4.628371158554856e-07, "aux_brier/mean_group_std": 0.06359103303081172, "aux_brier/mean_r": 0.9525573415400864, "aux_brier/n_active_tok": 277.0, "aux_brier/n_groups": 15.96875, "aux_brier/n_step_records": 69.25, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5182238843813387, "calib/avg_num_step_conf": 8.65625, "calib/ece": 0.3131349206349206, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0010547667342798994, "calib/mean_conf": 0.1528968253968254, "calib/mu_c": 0.15232758620689657, "calib/mu_w": 0.15338235294117647, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0028571428571428567, "calib/std_conf": 0.048810669105158994, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2637.0, "completions/max_terminated_length": 2637.0, "completions/mean_length": 565.14453125, "completions/mean_terminated_length": 567.36083984375, "completions/min_length": 0.0, "completions/min_terminated_length": 221.0, "epoch": 0.1856, "grad_norm": 0.04420608654618263, "learning_rate": 7.222222222222222e-07, "loss": 0.0278, "num_tokens": 38855466.0, "reward": 1.102353811264038, "reward_std": 0.29684698581695557, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6359777450561523, "rewards/format_reward_step_strict": 0.97265625, "step": 174 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -9.875406990489388e-07, "aux_brier/mean_group_std": 0.05294872455716927, "aux_brier/mean_r": 0.9574670859832486, "aux_brier/n_active_tok": 265.875, "aux_brier/n_groups": 16.09375, "aux_brier/n_step_records": 66.46875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5250064699792961, "calib/avg_num_step_conf": 8.51953125, "calib/ece": 0.31059999999999993, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004482401656314722, "calib/mean_conf": 0.14324, "calib/mu_c": 0.1457142857142857, "calib/mu_w": 0.141231884057971, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.00292, "calib/std_conf": 0.06464288359904748, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2578.0, "completions/max_terminated_length": 2578.0, "completions/mean_length": 533.72265625, "completions/mean_terminated_length": 540.0513916015625, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.18666666666666668, "grad_norm": 0.07568599283695221, "learning_rate": 6.944444444444446e-07, "loss": -0.0386, "num_tokens": 39097923.0, "reward": 1.0815097093582153, "reward_std": 0.30057209730148315, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6385387182235718, "rewards/format_reward_step_strict": 0.96875, "step": 175 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.1575440644895352e-07, "aux_brier/mean_group_std": 0.08753972842421873, "aux_brier/mean_r": 0.9381399282236724, "aux_brier/n_active_tok": 283.25, "aux_brier/n_groups": 17.0625, "aux_brier/n_step_records": 70.8125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5977443609022557, "calib/avg_num_step_conf": 8.8515625, "calib/ece": 0.32285714285714284, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013471915081822244, "calib/mean_conf": 0.15373015873015872, "calib/mu_c": 0.16084033613445384, "calib/mu_w": 0.1473684210526316, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0021825396825396826, "calib/std_conf": 0.05098605671494791, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2717.0, "completions/max_terminated_length": 2717.0, "completions/mean_length": 521.390625, "completions/mean_terminated_length": 521.390625, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 0.18773333333333334, "grad_norm": 0.054171666502952576, "learning_rate": 6.666666666666667e-07, "loss": 0.0236, "num_tokens": 39335463.0, "reward": 1.1178412437438965, "reward_std": 0.2490749955177307, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6432398557662964, "rewards/format_reward_step_strict": 0.984375, "step": 176 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.689186719504136e-07, "aux_brier/mean_group_std": 0.04862063620140382, "aux_brier/mean_r": 0.9607684011459631, "aux_brier/n_active_tok": 269.75, "aux_brier/n_groups": 16.375, "aux_brier/n_step_records": 67.4375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4508516905737705, "calib/avg_num_step_conf": 8.51171875, "calib/ece": 0.38048, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": -0.0166265368852459, "calib/mean_conf": 0.14952, "calib/mu_c": 0.14140625, "calib/mu_w": 0.1580327868852459, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009000000000000001, "calib/std_conf": 0.07035744168174395, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2750.0, "completions/max_terminated_length": 2750.0, "completions/mean_length": 538.55859375, "completions/mean_terminated_length": 542.7991943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 223.0, "epoch": 0.1888, "grad_norm": 0.045605529099702835, "learning_rate": 6.388888888888889e-07, "loss": 0.0226, "num_tokens": 39577166.0, "reward": 1.1320149898529053, "reward_std": 0.2338375747203827, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5905601382255554, "rewards/format_reward_step_strict": 0.96875, "step": 177 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -2.3104294180620322e-07, "aux_brier/mean_group_std": 0.06723367407909693, "aux_brier/mean_r": 0.9492157188413886, "aux_brier/n_active_tok": 267.5, "aux_brier/n_groups": 14.9375, "aux_brier/n_step_records": 66.875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5969959946595461, "calib/avg_num_step_conf": 9.0078125, "calib/ece": 0.4315384615384616, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009666889185580785, "calib/mean_conf": 0.14174089068825912, "calib/mu_c": 0.14592857142857144, "calib/mu_w": 0.13626168224299065, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0032388663967611335, "calib/std_conf": 0.0500182726531682, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2538.0, "completions/max_terminated_length": 2538.0, "completions/mean_length": 497.9921875, "completions/mean_terminated_length": 507.912353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.18986666666666666, "grad_norm": 0.06169963628053665, "learning_rate": 6.111111111111112e-07, "loss": 0.0261, "num_tokens": 39810724.0, "reward": 1.1655563116073608, "reward_std": 0.2794933617115021, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.5528503656387329, "rewards/format_reward_step_strict": 0.9609375, "step": 178 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.433350124147026e-07, "aux_brier/mean_group_std": 0.05176466906621195, "aux_brier/mean_r": 0.9615216922126134, "aux_brier/n_active_tok": 264.625, "aux_brier/n_groups": 13.90625, "aux_brier/n_step_records": 66.15625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5617105263157894, "calib/avg_num_step_conf": 8.41796875, "calib/ece": 0.46158730158730155, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00498157894736842, "calib/mean_conf": 0.1469047619047619, "calib/mu_c": 0.14888157894736842, "calib/mu_w": 0.1439, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.002658730158730158, "calib/std_conf": 0.04943272301126441, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1943.0, "completions/max_terminated_length": 1943.0, "completions/mean_length": 500.34765625, "completions/mean_terminated_length": 502.3098449707031, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.19093333333333334, "grad_norm": 0.04521411284804344, "learning_rate": 5.833333333333334e-07, "loss": -0.0184, "num_tokens": 40045077.0, "reward": 1.2150800228118896, "reward_std": 0.2715832591056824, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.540007472038269, "rewards/format_reward_step_strict": 0.97265625, "step": 179 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.557433655350996e-07, "aux_brier/mean_group_std": 0.07278899981776597, "aux_brier/mean_r": 0.9373554568692914, "aux_brier/n_active_tok": 300.25, "aux_brier/n_groups": 17.125, "aux_brier/n_step_records": 75.0625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5727829543233206, "calib/avg_num_step_conf": 9.61328125, "calib/ece": 0.43198412698412697, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011193067048934607, "calib/mean_conf": 0.16166666666666665, "calib/mu_c": 0.166241610738255, "calib/mu_w": 0.1550485436893204, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0011904761904761904, "calib/std_conf": 0.04701823354523526, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2816.0, "completions/max_terminated_length": 2816.0, "completions/mean_length": 596.3359375, "completions/mean_terminated_length": 598.674560546875, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.192, "grad_norm": 0.01824566349387169, "learning_rate": 5.555555555555555e-07, "loss": -0.0015, "num_tokens": 40301595.0, "reward": 1.204345941543579, "reward_std": 0.22382229566574097, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.5517585873603821, "rewards/format_reward_step_strict": 0.9609375, "step": 180 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -3.7227140847750917e-07, "aux_brier/mean_group_std": 0.03710208709554556, "aux_brier/mean_r": 0.9678823743283032, "aux_brier/n_active_tok": 273.125, "aux_brier/n_groups": 17.25, "aux_brier/n_step_records": 68.28125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5787562125653116, "calib/avg_num_step_conf": 8.73828125, "calib/ece": 0.3343426294820717, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012076589779533603, "calib/mean_conf": 0.14334661354581674, "calib/mu_c": 0.14974576271186443, "calib/mu_w": 0.13766917293233083, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.003784860557768924, "calib/std_conf": 0.056680724768170314, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2814.0, "completions/max_terminated_length": 2814.0, "completions/mean_length": 496.55078125, "completions/mean_terminated_length": 500.46063232421875, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.19306666666666666, "grad_norm": 0.08360396325588226, "learning_rate": 5.277777777777779e-07, "loss": 0.0483, "num_tokens": 40534976.0, "reward": 1.104553461074829, "reward_std": 0.31083574891090393, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6291515827178955, "rewards/format_reward_step_strict": 0.97265625, "step": 181 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.922425588816613e-07, "aux_brier/mean_group_std": 0.06789074830965955, "aux_brier/mean_r": 0.954944220972813, "aux_brier/n_active_tok": 277.875, "aux_brier/n_groups": 16.5625, "aux_brier/n_step_records": 69.46875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5816562721018453, "calib/avg_num_step_conf": 9.1640625, "calib/ece": 0.4632283464566928, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0035594419083135043, "calib/mean_conf": 0.144251968503937, "calib/mu_c": 0.1456953642384106, "calib/mu_w": 0.1421359223300971, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.006496062992125986, "calib/std_conf": 0.0589076373674881, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1588.0, "completions/max_terminated_length": 1588.0, "completions/mean_length": 532.20703125, "completions/mean_terminated_length": 534.2941284179688, "completions/min_length": 0.0, "completions/min_terminated_length": 203.0, "epoch": 0.19413333333333332, "grad_norm": 0.04704170301556587, "learning_rate": 5.000000000000001e-07, "loss": -0.0073, "num_tokens": 40777381.0, "reward": 1.220548152923584, "reward_std": 0.2288506031036377, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.5462551116943359, "rewards/format_reward_step_strict": 0.98828125, "step": 182 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 5.345288603963994e-07, "aux_brier/mean_group_std": 0.05264424985501321, "aux_brier/mean_r": 0.9604438890020913, "aux_brier/n_active_tok": 277.25, "aux_brier/n_groups": 16.46875, "aux_brier/n_step_records": 69.3125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5346075692151384, "calib/avg_num_step_conf": 8.87890625, "calib/ece": 0.35828685258964144, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00028956057912110644, "calib/mean_conf": 0.14211155378486057, "calib/mu_c": 0.14225806451612905, "calib/mu_w": 0.14196850393700794, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0031872509960159364, "calib/std_conf": 0.050652352981139896, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2832.0, "completions/max_terminated_length": 2832.0, "completions/mean_length": 558.50390625, "completions/mean_terminated_length": 560.6941528320312, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.1952, "grad_norm": 0.02439642697572708, "learning_rate": 4.7222222222222226e-07, "loss": 0.0363, "num_tokens": 41027038.0, "reward": 1.1275067329406738, "reward_std": 0.28274720907211304, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.6115894317626953, "rewards/format_reward_step_strict": 0.98046875, "step": 183 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 3.103694012906999e-07, "aux_brier/mean_group_std": 0.06436204785975919, "aux_brier/mean_r": 0.9484624329261578, "aux_brier/n_active_tok": 295.25, "aux_brier/n_groups": 19.1875, "aux_brier/n_step_records": 73.8125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4677537681739362, "calib/avg_num_step_conf": 9.53515625, "calib/ece": 0.45848605577689244, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006673336001067065, "calib/mean_conf": 0.15776892430278885, "calib/mu_c": 0.15516339869281048, "calib/mu_w": 0.16183673469387755, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0033466135458167334, "calib/std_conf": 0.05640457857745456, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2917.0, "completions/max_terminated_length": 2917.0, "completions/mean_length": 547.421875, "completions/mean_terminated_length": 551.7322998046875, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.19626666666666667, "grad_norm": 0.03321665897965431, "learning_rate": 4.444444444444445e-07, "loss": 0.02, "num_tokens": 41272458.0, "reward": 1.2182791233062744, "reward_std": 0.31848832964897156, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.5371793508529663, "rewards/format_reward_step_strict": 0.97265625, "step": 184 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.4893604744224973e-07, "aux_brier/mean_group_std": 0.06732063692971484, "aux_brier/mean_r": 0.9515054704357953, "aux_brier/n_active_tok": 312.0, "aux_brier/n_groups": 21.53125, "aux_brier/n_step_records": 78.0, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5551517598761694, "calib/avg_num_step_conf": 10.41796875, "calib/ece": 0.36415573770491805, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008297866612827276, "calib/mean_conf": 0.1563360655737705, "calib/mu_c": 0.1603149606299213, "calib/mu_w": 0.152017094017094, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0536060478995191, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2945.0, "completions/max_terminated_length": 2945.0, "completions/mean_length": 563.51953125, "completions/mean_terminated_length": 574.7450561523438, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.19733333333333333, "grad_norm": 0.08410869538784027, "learning_rate": 4.1666666666666667e-07, "loss": 0.0669, "num_tokens": 41523639.0, "reward": 1.1133431196212769, "reward_std": 0.2527396082878113, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5861849784851074, "rewards/format_reward_step_strict": 0.94140625, "step": 185 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.1460473081648104e-07, "aux_brier/mean_group_std": 0.052871889902070625, "aux_brier/mean_r": 0.9604263236935935, "aux_brier/n_active_tok": 290.875, "aux_brier/n_groups": 16.5, "aux_brier/n_step_records": 72.71875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6115372220431841, "calib/avg_num_step_conf": 9.45703125, "calib/ece": 0.4247222222222222, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01968868836609733, "calib/mean_conf": 0.15067460317460318, "calib/mu_c": 0.1590344827586207, "calib/mu_w": 0.13934579439252337, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.05303432990669396, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2657.0, "completions/max_terminated_length": 2657.0, "completions/mean_length": 542.7109375, "completions/mean_terminated_length": 546.9842529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 193.0, "epoch": 0.1984, "grad_norm": 0.03420405834913254, "learning_rate": 3.8888888888888895e-07, "loss": 0.0136, "num_tokens": 41767613.0, "reward": 1.196563959121704, "reward_std": 0.2683338522911072, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.5675058364868164, "rewards/format_reward_step_strict": 0.9765625, "step": 186 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.9278789737130495e-07, "aux_brier/mean_group_std": 0.05675664947652703, "aux_brier/mean_r": 0.9555893828683435, "aux_brier/n_active_tok": 323.625, "aux_brier/n_groups": 24.78125, "aux_brier/n_step_records": 80.90625, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5007753505933118, "calib/avg_num_step_conf": 10.59375, "calib/ece": 0.4300404858299595, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0038592233009708543, "calib/mean_conf": 0.1591093117408907, "calib/mu_c": 0.1575, "calib/mu_w": 0.16135922330097086, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.003076923076923075, "calib/std_conf": 0.0782915771133442, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2369.0, "completions/max_terminated_length": 2369.0, "completions/mean_length": 568.6328125, "completions/mean_terminated_length": 577.6587524414062, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.19946666666666665, "grad_norm": 0.08125673234462738, "learning_rate": 3.611111111111111e-07, "loss": -0.0061, "num_tokens": 42014727.0, "reward": 1.179487705230713, "reward_std": 0.32495832443237305, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5460761785507202, "rewards/format_reward_step_strict": 0.9609375, "step": 187 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.0506600884307993e-06, "aux_brier/mean_group_std": 0.06685216259607211, "aux_brier/mean_r": 0.948845332300965, "aux_brier/n_active_tok": 284.875, "aux_brier/n_groups": 16.03125, "aux_brier/n_step_records": 71.21875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5872108843537416, "calib/avg_num_step_conf": 9.54296875, "calib/ece": 0.4564516129032259, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013360544217687065, "calib/mean_conf": 0.14838709677419357, "calib/mu_c": 0.15366666666666665, "calib/mu_w": 0.14030612244897958, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.0503197373059421, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2482.0, "completions/max_terminated_length": 2482.0, "completions/mean_length": 535.859375, "completions/mean_terminated_length": 546.5338745117188, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.20053333333333334, "grad_norm": 0.11312317103147507, "learning_rate": 3.3333333333333335e-07, "loss": -0.0073, "num_tokens": 42255979.0, "reward": 1.2021760940551758, "reward_std": 0.27750808000564575, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.5352667570114136, "rewards/format_reward_step_strict": 0.96484375, "step": 188 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -7.452008510544506e-07, "aux_brier/mean_group_std": 0.06444877414433915, "aux_brier/mean_r": 0.9399538708497756, "aux_brier/n_active_tok": 261.875, "aux_brier/n_groups": 14.21875, "aux_brier/n_step_records": 65.46875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6512432795698925, "calib/avg_num_step_conf": 8.33984375, "calib/ece": 0.4774501992031872, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.019141801075268833, "calib/mean_conf": 0.14254980079681276, "calib/mu_c": 0.14987096774193548, "calib/mu_w": 0.13072916666666665, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0012350597609561752, "calib/std_conf": 0.042910456544259845, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1524.0, "completions/max_terminated_length": 1524.0, "completions/mean_length": 498.23046875, "completions/mean_terminated_length": 500.1843566894531, "completions/min_length": 0.0, "completions/min_terminated_length": 176.0, "epoch": 0.2016, "grad_norm": 0.03259199112653732, "learning_rate": 3.055555555555556e-07, "loss": -0.0372, "num_tokens": 42491294.0, "reward": 1.2293919324874878, "reward_std": 0.23676735162734985, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.5347554683685303, "rewards/format_reward_step_strict": 0.98046875, "step": 189 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.5741437181526763e-08, "aux_brier/mean_group_std": 0.07738672575917957, "aux_brier/mean_r": 0.9435339355484508, "aux_brier/n_active_tok": 299.0, "aux_brier/n_groups": 18.1875, "aux_brier/n_step_records": 74.75, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5185710515899196, "calib/avg_num_step_conf": 9.6875, "calib/ece": 0.4170682730923695, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010780445969125252, "calib/mean_conf": 0.15722891566265063, "calib/mu_c": 0.16181818181818183, "calib/mu_w": 0.15103773584905658, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.07050557390045874, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2984.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 585.86328125, "completions/mean_terminated_length": 590.4763793945312, "completions/min_length": 0.0, "completions/min_terminated_length": 210.0, "epoch": 0.20266666666666666, "grad_norm": 0.02406657487154007, "learning_rate": 2.7777777777777776e-07, "loss": 0.0149, "num_tokens": 42746883.0, "reward": 1.1835261583328247, "reward_std": 0.2722218930721283, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5622296929359436, "rewards/format_reward_step_strict": 0.9609375, "step": 190 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.2340974625834065e-07, "aux_brier/mean_group_std": 0.037090143465623206, "aux_brier/mean_r": 0.96761951313658, "aux_brier/n_active_tok": 301.875, "aux_brier/n_groups": 15.8125, "aux_brier/n_step_records": 75.46875, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6338861788617887, "calib/avg_num_step_conf": 9.60546875, "calib/ece": 0.3501612903225806, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01989853658536586, "calib/mean_conf": 0.15637096774193548, "calib/mu_c": 0.16624, "calib/mu_w": 0.14634146341463414, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00125, "calib/std_conf": 0.04924158152086891, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2871.0, "completions/max_terminated_length": 2871.0, "completions/mean_length": 555.515625, "completions/mean_terminated_length": 559.8897705078125, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.20373333333333332, "grad_norm": 0.03271899372339249, "learning_rate": 2.5000000000000004e-07, "loss": 0.0392, "num_tokens": 42993263.0, "reward": 1.1268501281738281, "reward_std": 0.2318863868713379, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.6167757511138916, "rewards/format_reward_step_strict": 0.96875, "step": 191 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -1.861136611491787e-07, "aux_brier/mean_group_std": 0.055214087686892996, "aux_brier/mean_r": 0.9545805860948601, "aux_brier/n_active_tok": 285.125, "aux_brier/n_groups": 16.9375, "aux_brier/n_step_records": 71.28125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6114485075591161, "calib/avg_num_step_conf": 9.3046875, "calib/ece": 0.4221513944223108, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.016469182064866222, "calib/mean_conf": 0.14390438247011955, "calib/mu_c": 0.151056338028169, "calib/mu_w": 0.13458715596330278, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00015936254980079717, "calib/std_conf": 0.0577387199510932, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2050.0, "completions/max_terminated_length": 2050.0, "completions/mean_length": 556.12109375, "completions/mean_terminated_length": 560.5, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.2048, "grad_norm": 0.05804295092821121, "learning_rate": 2.2222222222222224e-07, "loss": 0.0509, "num_tokens": 43240606.0, "reward": 1.181755781173706, "reward_std": 0.28981447219848633, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.5629605054855347, "rewards/format_reward_step_strict": 0.97265625, "step": 192 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -6.803905278074396e-07, "aux_brier/mean_group_std": 0.0431717765249665, "aux_brier/mean_r": 0.9606889633754716, "aux_brier/n_active_tok": 309.75, "aux_brier/n_groups": 20.21875, "aux_brier/n_step_records": 77.4375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.43971282454760036, "calib/avg_num_step_conf": 10.140625, "calib/ece": 0.36020242914979755, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.017089562024652505, "calib/mean_conf": 0.15947368421052632, "calib/mu_c": 0.15089430894308944, "calib/mu_w": 0.16798387096774195, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.010850202429149799, "calib/std_conf": 0.06391516324710692, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2719.0, "completions/max_terminated_length": 2719.0, "completions/mean_length": 590.11328125, "completions/mean_terminated_length": 597.1107177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 205.0, "epoch": 0.20586666666666667, "grad_norm": 0.03458983823657036, "learning_rate": 1.9444444444444447e-07, "loss": 0.0748, "num_tokens": 43497387.0, "reward": 1.1143351793289185, "reward_std": 0.34169265627861023, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5979660749435425, "rewards/format_reward_step_strict": 0.9609375, "step": 193 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 1.0541671985331291e-06, "aux_brier/mean_group_std": 0.08417545681814177, "aux_brier/mean_r": 0.9387734846250257, "aux_brier/n_active_tok": 257.0, "aux_brier/n_groups": 13.90625, "aux_brier/n_step_records": 64.25, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6547619047619048, "calib/avg_num_step_conf": 8.24609375, "calib/ece": 0.4482868525896414, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.028838958660387257, "calib/mean_conf": 0.13737051792828686, "calib/mu_c": 0.1493197278911565, "calib/mu_w": 0.12048076923076924, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.05068709344166836, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1582.0, "completions/max_terminated_length": 1582.0, "completions/mean_length": 493.59375, "completions/mean_terminated_length": 497.4803161621094, "completions/min_length": 0.0, "completions/min_terminated_length": 195.0, "epoch": 0.20693333333333333, "grad_norm": 0.0344914086163044, "learning_rate": 1.6666666666666668e-07, "loss": -0.0077, "num_tokens": 43729691.0, "reward": 1.2007018327713013, "reward_std": 0.27053576707839966, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.5528074502944946, "rewards/format_reward_step_strict": 0.9765625, "step": 194 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.2498041210572417e-07, "aux_brier/mean_group_std": 0.068059353922213, "aux_brier/mean_r": 0.9491068867397771, "aux_brier/n_active_tok": 294.75, "aux_brier/n_groups": 18.625, "aux_brier/n_step_records": 73.6875, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5792780215396889, "calib/avg_num_step_conf": 9.890625, "calib/ece": 0.4136842105263157, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009962106102911844, "calib/mean_conf": 0.1471255060728745, "calib/mu_c": 0.1515217391304348, "calib/mu_w": 0.14155963302752295, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0010526315789473684, "calib/std_conf": 0.052602670354152316, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2718.0, "completions/max_terminated_length": 2718.0, "completions/mean_length": 544.3046875, "completions/mean_terminated_length": 557.3680419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.208, "grad_norm": 0.145261749625206, "learning_rate": 1.3888888888888888e-07, "loss": -0.0407, "num_tokens": 43975017.0, "reward": 1.1599828004837036, "reward_std": 0.259069561958313, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5618062615394592, "rewards/format_reward_step_strict": 0.9609375, "step": 195 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.4591270117169106e-06, "aux_brier/mean_group_std": 0.061708016616617144, "aux_brier/mean_r": 0.953872105484891, "aux_brier/n_active_tok": 255.75, "aux_brier/n_groups": 13.1875, "aux_brier/n_step_records": 63.9375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.534438612213545, "calib/avg_num_step_conf": 8.17578125, "calib/ece": 0.42758893280632415, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005587632825502509, "calib/mean_conf": 0.14948616600790515, "calib/mu_c": 0.15184931506849317, "calib/mu_w": 0.14626168224299066, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04645679711671892, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1719.0, "completions/max_terminated_length": 1719.0, "completions/mean_length": 459.8203125, "completions/mean_terminated_length": 461.6235656738281, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.20906666666666668, "grad_norm": 0.018504036590456963, "learning_rate": 1.1111111111111112e-07, "loss": -0.0032, "num_tokens": 44195275.0, "reward": 1.2061917781829834, "reward_std": 0.2512151896953583, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.5669547319412231, "rewards/format_reward_step_strict": 0.98828125, "step": 196 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -4.589672547850032e-07, "aux_brier/mean_group_std": 0.09339156506784498, "aux_brier/mean_r": 0.9286565395622205, "aux_brier/n_active_tok": 307.25, "aux_brier/n_groups": 21.875, "aux_brier/n_step_records": 76.8125, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.5016392322928762, "calib/avg_num_step_conf": 10.1640625, "calib/ece": 0.34995867768595046, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005371900826446313, "calib/mean_conf": 0.15260330578512393, "calib/mu_c": 0.15528925619834713, "calib/mu_w": 0.14991735537190082, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0012809917355371902, "calib/std_conf": 0.06047538661113296, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2845.0, "completions/max_terminated_length": 2845.0, "completions/mean_length": 572.05078125, "completions/mean_terminated_length": 583.4462280273438, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.21013333333333334, "grad_norm": 0.0341520756483078, "learning_rate": 8.333333333333334e-08, "loss": 0.0792, "num_tokens": 44446776.0, "reward": 1.0938079357147217, "reward_std": 0.2998431921005249, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5939816236495972, "rewards/format_reward_step_strict": 0.9453125, "step": 197 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": -5.442882395068072e-07, "aux_brier/mean_group_std": 0.0549987736354695, "aux_brier/mean_r": 0.9597876312209791, "aux_brier/n_active_tok": 291.625, "aux_brier/n_groups": 17.75, "aux_brier/n_step_records": 72.90625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6205846936044956, "calib/avg_num_step_conf": 9.453125, "calib/ece": 0.44405622489959834, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": 0.02602689322986354, "calib/mean_conf": 0.15032128514056228, "calib/mu_c": 0.1608783783783784, "calib/mu_w": 0.13485148514851486, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.07592466931371257, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2248.0, "completions/max_terminated_length": 2248.0, "completions/mean_length": 511.1484375, "completions/mean_terminated_length": 519.261962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.2112, "grad_norm": 0.059009380638599396, "learning_rate": 5.555555555555556e-08, "loss": -0.0015, "num_tokens": 44683014.0, "reward": 1.1997989416122437, "reward_std": 0.24536868929862976, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.5491960644721985, "rewards/format_reward_step_strict": 0.96875, "step": 198 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 2.0662572798244017e-07, "aux_brier/mean_group_std": 0.0600427922814054, "aux_brier/mean_r": 0.9515209018543054, "aux_brier/n_active_tok": 301.0, "aux_brier/n_groups": 16.8125, "aux_brier/n_step_records": 75.25, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5038126361655774, "calib/avg_num_step_conf": 9.54296875, "calib/ece": 0.4215040650406504, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0012745098039215752, "calib/mean_conf": 0.1638617886178862, "calib/mu_c": 0.16333333333333333, "calib/mu_w": 0.1646078431372549, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.05597732194746908, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2327.0, "completions/max_terminated_length": 2327.0, "completions/mean_length": 585.2265625, "completions/mean_terminated_length": 592.166015625, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.21226666666666666, "grad_norm": 0.04053623229265213, "learning_rate": 2.777777777777778e-08, "loss": 0.0188, "num_tokens": 44937032.0, "reward": 1.1813123226165771, "reward_std": 0.3064155578613281, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5533746480941772, "rewards/format_reward_step_strict": 0.9609375, "step": 199 }, { "aux_brier/lambda": 0.10000000000000005, "aux_brier/loss": 7.429067104963716e-07, "aux_brier/mean_group_std": 0.05776427918373577, "aux_brier/mean_r": 0.9539067025547618, "aux_brier/n_active_tok": 261.375, "aux_brier/n_groups": 15.53125, "aux_brier/n_step_records": 65.34375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6913060897435896, "calib/avg_num_step_conf": 8.19921875, "calib/ece": 0.43596774193548393, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.026597222222222217, "calib/mean_conf": 0.15169354838709678, "calib/mu_c": 0.16284722222222223, "calib/mu_w": 0.13625, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0035080645161290323, "calib/std_conf": 0.06901638157798934, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2382.0, "completions/max_terminated_length": 2382.0, "completions/mean_length": 528.5390625, "completions/mean_terminated_length": 532.7008056640625, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.21333333333333335, "grad_norm": 0.04022820293903351, "learning_rate": 0.0, "loss": -0.0159, "num_tokens": 45180386.0, "reward": 1.187511682510376, "reward_std": 0.22799977660179138, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5625468492507935, "rewards/format_reward_step_strict": 0.96875, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.035836528669169636, "train_runtime": 16174.3808, "train_samples_per_second": 3.165, "train_steps_per_second": 0.012 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 45180386, "num_train_epochs": 1, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }