{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.49919376007799904, "eval_steps": 50, "global_step": 208, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.4755090430338697, "calibration/batch_distribution_entropy": 0.26199859861521857, "calibration/batch_entropy_100bins": 0.3438930495423692, "calibration/batch_entropy_10bins": 0.26199859861521857, "calibration/batch_entropy_50bins": 0.3997214906203269, "calibration/batch_uniqueness": 0.4832166822381069, "calibration/confidence_entropy": 0.20919231184298712, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.26199859861521857, "calibration/distribution_entropy_100": 0.3438930495423692, "calibration/ece": 0.4549087624937119, "calibration/mean_confidence": 0.9209537398939647, "calibration/unique_confidence_per_question": 0.03177083333333333, "calibration/unique_confidences": 12.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019357638888888907, "completions/max_length": 3991.8, "completions/max_terminated_length": 3991.8, "completions/mean_length": 515.6087646484375, "completions/mean_terminated_length": 525.7942260742187, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011999850001874977, "grad_norm": 0.004724407568573952, "learning_rate": 5.952380952380953e-07, "loss": 0.0065, "num_tokens": 9054021.0, "reward": 0.5780223369598388, "reward_std": 0.5210743069648742, "rewards/accuracy_reward": 0.26449652314186095, "rewards/brier_reward": 0.3152239501476288, "rewards/confidence_uniqueness_reward": 0.2885810971260071, "rewards/format_reward": 0.6014756917953491, "rewards/frontier_aurc_reward": 0.27824242115020753, "rewards/frontier_coverage_0": 0.27824242115020753, "rewards/frontier_coverage_1": 0.27824242115020753, "rewards/frontier_coverage_10": 0.27824242115020753, "rewards/frontier_coverage_15": 0.27824242115020753, "rewards/frontier_coverage_20": 0.27824242115020753, "rewards/frontier_coverage_25": 0.27824242115020753, "rewards/frontier_coverage_5": 0.27824242115020753, "rewards/true_frontier_ece_gap_only_reward": 0.27824242115020753, "signal/accuracy_reward/centered_abs_mean": 0.31138780117034914, "signal/accuracy_reward/group_std_mean": 0.37181236147880553, "signal/accuracy_reward/group_zero_std_frac": 0.08055555745959282, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15569390058517457, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15569390058517457, "signal/advantage_abs_mean": 0.4485911726951599, "signal/advantage_pre_scale_abs_mean": 0.4485911726951599, "signal/advantage_pre_scale_std": 0.5264933466911316, "signal/advantage_std": 0.5264933466911316, "signal/brier_reward/centered_abs_mean": 0.3207183539867401, "signal/brier_reward/group_std_mean": 0.37424429655075075, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04008979424834251, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.04008979424834251, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.23610488772392274, "signal/confidence_uniqueness_reward/group_std_mean": 0.2880967080593109, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029513110965490343, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.029513110965490343, "signal/format_reward/centered_abs_mean": 0.43846028447151186, "signal/format_reward/group_std_mean": 0.4738844096660614, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.21923014223575593, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.21923014223575593, "signal/frontier_aurc_reward/centered_abs_mean": 0.3114172875881195, "signal/frontier_aurc_reward/group_std_mean": 0.36980949640274047, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.004865895118564367, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.004865895118564367, "signal/frontier_coverage_0/centered_abs_mean": 0.3114172875881195, "signal/frontier_coverage_0/group_std_mean": 0.36980949640274047, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004865895118564367, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004865895118564367, "signal/frontier_coverage_1/centered_abs_mean": 0.3114172875881195, "signal/frontier_coverage_1/group_std_mean": 0.36980949640274047, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004865895118564367, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004865895118564367, "signal/frontier_coverage_10/centered_abs_mean": 0.3114172875881195, "signal/frontier_coverage_10/group_std_mean": 0.36980949640274047, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004865895118564367, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004865895118564367, "signal/frontier_coverage_15/centered_abs_mean": 0.3114172875881195, "signal/frontier_coverage_15/group_std_mean": 0.36980949640274047, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004865895118564367, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004865895118564367, "signal/frontier_coverage_20/centered_abs_mean": 0.3114172875881195, "signal/frontier_coverage_20/group_std_mean": 0.36980949640274047, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004865895118564367, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004865895118564367, "signal/frontier_coverage_25/centered_abs_mean": 0.3114172875881195, "signal/frontier_coverage_25/group_std_mean": 0.36980949640274047, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004865895118564367, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004865895118564367, "signal/frontier_coverage_5/centered_abs_mean": 0.3114172875881195, "signal/frontier_coverage_5/group_std_mean": 0.36980949640274047, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004865895118564367, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004865895118564367, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.3114172875881195, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.36980949640274047, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.038927160948514936, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.038927160948514936, "step": 5 }, { "calibration/aurc": 0.5159006411390681, "calibration/batch_distribution_entropy": 0.23845090979417666, "calibration/batch_entropy_100bins": 0.33576880006525267, "calibration/batch_entropy_10bins": 0.23845090979417666, "calibration/batch_entropy_50bins": 0.3873401847509245, "calibration/batch_uniqueness": 0.4823728144800886, "calibration/confidence_entropy": 0.21192807755010623, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.23845090979417666, "calibration/distribution_entropy_100": 0.33576880006525267, "calibration/ece": 0.48419175646218493, "calibration/mean_confidence": 0.9245204458265471, "calibration/unique_confidence_per_question": 0.03072916666666666, "calibration/unique_confidences": 11.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018750000000000024, "completions/max_length": 4070.8, "completions/max_terminated_length": 4070.8, "completions/mean_length": 476.8085998535156, "completions/mean_terminated_length": 486.0776733398437, "completions/min_length": 0.0, "completions/min_terminated_length": 19.8, "epoch": 0.023999700003749954, "grad_norm": 0.06719768047332764, "learning_rate": 1.1904761904761906e-06, "loss": 0.0027, "num_tokens": 17629576.0, "reward": 0.6722566485404968, "reward_std": 0.48708855509758, "rewards/accuracy_reward": 0.29557291269302366, "rewards/brier_reward": 0.35851759910583497, "rewards/confidence_uniqueness_reward": 0.3507663607597351, "rewards/format_reward": 0.7157118082046509, "rewards/frontier_aurc_reward": 0.3118152379989624, "rewards/frontier_coverage_0": 0.3118152379989624, "rewards/frontier_coverage_1": 0.3118152379989624, "rewards/frontier_coverage_10": 0.3118152379989624, "rewards/frontier_coverage_15": 0.3118152379989624, "rewards/frontier_coverage_20": 0.3118152379989624, "rewards/frontier_coverage_25": 0.3118152379989624, "rewards/frontier_coverage_5": 0.3118152379989624, "rewards/true_frontier_ece_gap_only_reward": 0.3118152379989624, "signal/accuracy_reward/centered_abs_mean": 0.3207736611366272, "signal/accuracy_reward/group_std_mean": 0.37864009737968446, "signal/accuracy_reward/group_zero_std_frac": 0.08333333507180214, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1603868305683136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1603868305683136, "signal/advantage_abs_mean": 0.4065045177936554, "signal/advantage_pre_scale_abs_mean": 0.4065045177936554, "signal/advantage_pre_scale_std": 0.49197044372558596, "signal/advantage_std": 0.49197044372558596, "signal/brier_reward/centered_abs_mean": 0.31853480339050294, "signal/brier_reward/group_std_mean": 0.37187020778656005, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03981685042381287, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.03981685042381287, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.21813510358333588, "signal/confidence_uniqueness_reward/group_std_mean": 0.2754356682300568, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027266887947916985, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.027266887947916985, "signal/format_reward/centered_abs_mean": 0.353564453125, "signal/format_reward/group_std_mean": 0.41884335279464724, "signal/format_reward/group_zero_std_frac": 0.00555555559694767, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1767822265625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1767822265625, "signal/frontier_aurc_reward/centered_abs_mean": 0.3167228579521179, "signal/frontier_aurc_reward/group_std_mean": 0.37375251650810243, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.004948794655501842, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.004948794655501842, "signal/frontier_coverage_0/centered_abs_mean": 0.3167228579521179, "signal/frontier_coverage_0/group_std_mean": 0.37375251650810243, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004948794655501842, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004948794655501842, "signal/frontier_coverage_1/centered_abs_mean": 0.3167228579521179, "signal/frontier_coverage_1/group_std_mean": 0.37375251650810243, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004948794655501842, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004948794655501842, "signal/frontier_coverage_10/centered_abs_mean": 0.3167228579521179, "signal/frontier_coverage_10/group_std_mean": 0.37375251650810243, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004948794655501842, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004948794655501842, "signal/frontier_coverage_15/centered_abs_mean": 0.3167228579521179, "signal/frontier_coverage_15/group_std_mean": 0.37375251650810243, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004948794655501842, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004948794655501842, "signal/frontier_coverage_20/centered_abs_mean": 0.3167228579521179, "signal/frontier_coverage_20/group_std_mean": 0.37375251650810243, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004948794655501842, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004948794655501842, "signal/frontier_coverage_25/centered_abs_mean": 0.3167228579521179, "signal/frontier_coverage_25/group_std_mean": 0.37375251650810243, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004948794655501842, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004948794655501842, "signal/frontier_coverage_5/centered_abs_mean": 0.3167228579521179, "signal/frontier_coverage_5/group_std_mean": 0.37375251650810243, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004948794655501842, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004948794655501842, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.3167228579521179, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.37375251650810243, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.03959035724401474, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.03959035724401474, "step": 10 }, { "calibration/aurc": 0.5224229909940956, "calibration/batch_distribution_entropy": 0.2877403602460552, "calibration/batch_entropy_100bins": 0.3573186157704617, "calibration/batch_entropy_10bins": 0.2877403602460552, "calibration/batch_entropy_50bins": 0.41523268744576436, "calibration/batch_uniqueness": 0.5167436160191917, "calibration/confidence_entropy": 0.23103006074957716, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.2877403602460552, "calibration/distribution_entropy_100": 0.3573186157704617, "calibration/ece": 0.4970179613664881, "calibration/mean_confidence": 0.9129537058033463, "calibration/unique_confidence_per_question": 0.036979166666666674, "calibration/unique_confidences": 14.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010850694444444442, "completions/max_length": 3950.0, "completions/max_terminated_length": 3950.0, "completions/mean_length": 433.64210815429686, "completions/mean_terminated_length": 438.44232177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.03599955000562493, "grad_norm": 0.0016660373657941818, "learning_rate": 1.7857142857142859e-06, "loss": -0.0113, "num_tokens": 25727117.0, "reward": 0.8379699349403381, "reward_std": 0.3784303069114685, "rewards/accuracy_reward": 0.32907986640930176, "rewards/brier_reward": 0.431581848859787, "rewards/confidence_uniqueness_reward": 0.5059985220432281, "rewards/format_reward": 0.93359375, "rewards/frontier_aurc_reward": 0.3577423691749573, "rewards/frontier_coverage_0": 0.3577423691749573, "rewards/frontier_coverage_1": 0.3577423691749573, "rewards/frontier_coverage_10": 0.3577423691749573, "rewards/frontier_coverage_15": 0.3577423691749573, "rewards/frontier_coverage_20": 0.3577423691749573, "rewards/frontier_coverage_25": 0.3577423691749573, "rewards/frontier_coverage_5": 0.3577423691749573, "rewards/true_frontier_ece_gap_only_reward": 0.3577423691749573, "signal/accuracy_reward/centered_abs_mean": 0.3203721702098846, "signal/accuracy_reward/group_std_mean": 0.37649917006492617, "signal/accuracy_reward/group_zero_std_frac": 0.09722222462296486, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1601860851049423, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1601860851049423, "signal/advantage_abs_mean": 0.30626789927482606, "signal/advantage_pre_scale_abs_mean": 0.30626789927482606, "signal/advantage_pre_scale_std": 0.388842511177063, "signal/advantage_std": 0.388842511177063, "signal/brier_reward/centered_abs_mean": 0.30102636218070983, "signal/brier_reward/group_std_mean": 0.3518189787864685, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03762829527258873, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.03762829527258873, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.17830342054367065, "signal/confidence_uniqueness_reward/group_std_mean": 0.2288795828819275, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022287927567958832, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.022287927567958832, "signal/format_reward/centered_abs_mean": 0.1138617604970932, "signal/format_reward/group_std_mean": 0.19477857500314713, "signal/format_reward/group_zero_std_frac": 0.2944444492459297, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0569308802485466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0569308802485466, "signal/frontier_aurc_reward/centered_abs_mean": 0.31336275935173036, "signal/frontier_aurc_reward/group_std_mean": 0.3662181556224823, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.004896293114870787, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.004896293114870787, "signal/frontier_coverage_0/centered_abs_mean": 0.31336275935173036, "signal/frontier_coverage_0/group_std_mean": 0.3662181556224823, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004896293114870787, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004896293114870787, "signal/frontier_coverage_1/centered_abs_mean": 0.31336275935173036, "signal/frontier_coverage_1/group_std_mean": 0.3662181556224823, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004896293114870787, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004896293114870787, "signal/frontier_coverage_10/centered_abs_mean": 0.31336275935173036, "signal/frontier_coverage_10/group_std_mean": 0.3662181556224823, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004896293114870787, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004896293114870787, "signal/frontier_coverage_15/centered_abs_mean": 0.31336275935173036, "signal/frontier_coverage_15/group_std_mean": 0.3662181556224823, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004896293114870787, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004896293114870787, "signal/frontier_coverage_20/centered_abs_mean": 0.31336275935173036, "signal/frontier_coverage_20/group_std_mean": 0.3662181556224823, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004896293114870787, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004896293114870787, "signal/frontier_coverage_25/centered_abs_mean": 0.31336275935173036, "signal/frontier_coverage_25/group_std_mean": 0.3662181556224823, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004896293114870787, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004896293114870787, "signal/frontier_coverage_5/centered_abs_mean": 0.31336275935173036, "signal/frontier_coverage_5/group_std_mean": 0.3662181556224823, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004896293114870787, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004896293114870787, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.31336275935173036, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.3662181556224823, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.039170344918966295, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.039170344918966295, "step": 15 }, { "calibration/aurc": 0.44171379177515535, "calibration/batch_distribution_entropy": 0.3765147360973443, "calibration/batch_entropy_100bins": 0.39381071707061527, "calibration/batch_entropy_10bins": 0.3765147360973443, "calibration/batch_entropy_50bins": 0.4577038412907034, "calibration/batch_uniqueness": 0.5961709299135531, "calibration/buffer_distribution_entropy": 0.29230688761468687, "calibration/buffer_entropy_100bins": 0.36493243936626785, "calibration/buffer_entropy_10bins": 0.29230688761468687, "calibration/buffer_entropy_50bins": 0.42293687132466956, "calibration/confidence_entropy": 0.2898413619535891, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.034031413612565446, "calibration/coverage@30%": 0.07905759162303665, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.3765147360973443, "calibration/distribution_entropy_100": 0.39381071707061527, "calibration/ece": 0.3790869053038728, "calibration/mean_confidence": 0.8925066797565309, "calibration/unique_confidence_per_question": 0.035416666666666666, "calibration/unique_confidences": 13.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010763888888888884, "completions/max_length": 3739.4, "completions/max_terminated_length": 3739.4, "completions/mean_length": 471.2155456542969, "completions/mean_terminated_length": 476.41375732421875, "completions/min_length": 0.0, "completions/min_terminated_length": 91.8, "epoch": 0.04799940000749991, "grad_norm": 0.0008532739011570811, "learning_rate": 2.380952380952381e-06, "loss": -0.0093, "num_tokens": 34269216.0, "reward": 0.8856567025184632, "reward_std": 0.2739575058221817, "rewards/accuracy_reward": 0.4450520873069763, "rewards/brier_reward": 0.5553683876991272, "rewards/confidence_uniqueness_reward": 0.5914790272712708, "rewards/format_reward": 0.9831597328186035, "rewards/frontier_aurc_reward": 0.1789298068732023, "rewards/frontier_coverage_0": 0.18989355927333235, "rewards/frontier_coverage_1": 0.18989355927333235, "rewards/frontier_coverage_10": 0.18989355927333235, "rewards/frontier_coverage_15": 0.18989355927333235, "rewards/frontier_coverage_20": 0.18989355927333235, "rewards/frontier_coverage_25": 0.18989355927333235, "rewards/frontier_coverage_5": 0.18989355927333235, "rewards/true_frontier_ece_gap_only_reward": 0.03703599572181702, "signal/accuracy_reward/centered_abs_mean": 0.2997667074203491, "signal/accuracy_reward/group_std_mean": 0.36736690402030947, "signal/accuracy_reward/group_zero_std_frac": 0.06944444626569748, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14988335371017455, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.14988335371017455, "signal/advantage_abs_mean": 0.21930149793624878, "signal/advantage_pre_scale_abs_mean": 0.21930149793624878, "signal/advantage_pre_scale_std": 0.28236431181430816, "signal/advantage_std": 0.28236431181430816, "signal/brier_reward/centered_abs_mean": 0.2637813687324524, "signal/brier_reward/group_std_mean": 0.3207400619983673, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03297267109155655, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.03297267109155655, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1622892886400223, "signal/confidence_uniqueness_reward/group_std_mean": 0.19781720638275146, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020286161080002786, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020286161080002786, "signal/format_reward/centered_abs_mean": 0.03038194477558136, "signal/format_reward/group_std_mean": 0.06310995742678642, "signal/format_reward/group_zero_std_frac": 0.7222222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01519097238779068, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01519097238779068, "signal/frontier_aurc_reward/centered_abs_mean": 0.11971323965117335, "signal/frontier_aurc_reward/group_std_mean": 0.14516795333474874, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0018705193695495836, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0018705193695495836, "signal/frontier_coverage_0/centered_abs_mean": 0.13567787148058413, "signal/frontier_coverage_0/group_std_mean": 0.1734710790216923, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002119966741884127, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002119966741884127, "signal/frontier_coverage_1/centered_abs_mean": 0.13567787148058413, "signal/frontier_coverage_1/group_std_mean": 0.1734710790216923, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002119966741884127, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002119966741884127, "signal/frontier_coverage_10/centered_abs_mean": 0.13567787148058413, "signal/frontier_coverage_10/group_std_mean": 0.1734710790216923, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002119966741884127, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002119966741884127, "signal/frontier_coverage_15/centered_abs_mean": 0.13567787148058413, "signal/frontier_coverage_15/group_std_mean": 0.1734710790216923, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002119966741884127, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002119966741884127, "signal/frontier_coverage_20/centered_abs_mean": 0.13567787148058413, "signal/frontier_coverage_20/group_std_mean": 0.1734710790216923, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002119966741884127, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002119966741884127, "signal/frontier_coverage_25/centered_abs_mean": 0.13567787148058413, "signal/frontier_coverage_25/group_std_mean": 0.1734710790216923, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002119966741884127, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002119966741884127, "signal/frontier_coverage_5/centered_abs_mean": 0.13567787148058413, "signal/frontier_coverage_5/group_std_mean": 0.1734710790216923, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002119966741884127, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002119966741884127, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.1349403366446495, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.16668230146169663, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.016867542080581187, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.016867542080581187, "step": 20 }, { "calibration/aurc": 0.3429455132381247, "calibration/batch_distribution_entropy": 0.507851007126862, "calibration/batch_entropy_100bins": 0.44183174453138746, "calibration/batch_entropy_10bins": 0.507851007126862, "calibration/batch_entropy_50bins": 0.514838438261066, "calibration/batch_uniqueness": 0.6844458142688816, "calibration/buffer_distribution_entropy": 0.3345262025303308, "calibration/buffer_entropy_100bins": 0.3853994626063969, "calibration/buffer_entropy_10bins": 0.3345262025303308, "calibration/buffer_entropy_50bins": 0.44640286826484654, "calibration/confidence_entropy": 0.34430819143240965, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.05654450261780105, "calibration/coverage@20%": 0.09424083769633508, "calibration/coverage@25%": 0.22486700447763291, "calibration/coverage@30%": 0.43646112600536197, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.507851007126862, "calibration/distribution_entropy_100": 0.44183174453138746, "calibration/ece": 0.26461993450442634, "calibration/mean_confidence": 0.8628734329706985, "calibration/unique_confidence_per_question": 0.0421875, "calibration/unique_confidences": 16.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009895833333333348, "completions/max_length": 4003.6, "completions/max_terminated_length": 4003.6, "completions/mean_length": 524.5051208496094, "completions/mean_terminated_length": 529.7198486328125, "completions/min_length": 0.0, "completions/min_terminated_length": 107.6, "epoch": 0.05999925000937488, "grad_norm": 0.001525247236713767, "learning_rate": 2.9761904761904763e-06, "loss": -0.0065, "num_tokens": 43435963.0, "reward": 0.9095749855041504, "reward_std": 0.21235645115375518, "rewards/accuracy_reward": 0.5509548485279083, "rewards/brier_reward": 0.6560544490814209, "rewards/confidence_uniqueness_reward": 0.6728395104408265, "rewards/format_reward": 0.9865451335906983, "rewards/frontier_aurc_reward": -0.004245653562247753, "rewards/frontier_coverage_0": 0.003410888835787773, "rewards/frontier_coverage_1": 0.003410888835787773, "rewards/frontier_coverage_10": 0.003410888835787773, "rewards/frontier_coverage_15": 0.003410888835787773, "rewards/frontier_coverage_20": 0.003410888835787773, "rewards/frontier_coverage_25": 0.003410888835787773, "rewards/frontier_coverage_5": 0.003410888835787773, "rewards/true_frontier_ece_gap_only_reward": -0.20474808514118195, "signal/accuracy_reward/centered_abs_mean": 0.26726887822151185, "signal/accuracy_reward/group_std_mean": 0.333760267496109, "signal/accuracy_reward/group_zero_std_frac": 0.12777777910232543, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.13363443911075593, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.13363443911075593, "signal/advantage_abs_mean": 0.1648347020149231, "signal/advantage_pre_scale_abs_mean": 0.1648347020149231, "signal/advantage_pre_scale_std": 0.22817236185073853, "signal/advantage_std": 0.22817236185073853, "signal/brier_reward/centered_abs_mean": 0.21432596445083618, "signal/brier_reward/group_std_mean": 0.2680306822061539, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026790745556354523, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.026790745556354523, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.12359119206666946, "signal/confidence_uniqueness_reward/group_std_mean": 0.15292936861515044, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015448899008333683, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015448899008333683, "signal/format_reward/centered_abs_mean": 0.024397786147892474, "signal/format_reward/group_std_mean": 0.051703880354762075, "signal/format_reward/group_zero_std_frac": 0.7694444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012198893073946237, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012198893073946237, "signal/frontier_aurc_reward/centered_abs_mean": 0.0031622422859072684, "signal/frontier_aurc_reward/group_std_mean": 0.004759848862886429, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.941003571730107e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.941003571730107e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.03383462205529213, "signal/frontier_coverage_0/group_std_mean": 0.0554510623216629, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0005286659696139395, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0005286659696139395, "signal/frontier_coverage_1/centered_abs_mean": 0.03383462205529213, "signal/frontier_coverage_1/group_std_mean": 0.0554510623216629, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0005286659696139395, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0005286659696139395, "signal/frontier_coverage_10/centered_abs_mean": 0.03383462205529213, "signal/frontier_coverage_10/group_std_mean": 0.0554510623216629, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0005286659696139395, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0005286659696139395, "signal/frontier_coverage_15/centered_abs_mean": 0.03383462205529213, "signal/frontier_coverage_15/group_std_mean": 0.0554510623216629, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0005286659696139395, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0005286659696139395, "signal/frontier_coverage_20/centered_abs_mean": 0.03383462205529213, "signal/frontier_coverage_20/group_std_mean": 0.0554510623216629, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005286659696139395, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005286659696139395, "signal/frontier_coverage_25/centered_abs_mean": 0.03383462205529213, "signal/frontier_coverage_25/group_std_mean": 0.0554510623216629, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005286659696139395, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005286659696139395, "signal/frontier_coverage_5/centered_abs_mean": 0.03383462205529213, "signal/frontier_coverage_5/group_std_mean": 0.0554510623216629, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0005286659696139395, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0005286659696139395, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.03566240519285202, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.04645907133817673, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0044578006491065025, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0044578006491065025, "step": 25 }, { "calibration/aurc": 0.3084429936468077, "calibration/batch_distribution_entropy": 0.6357372712721332, "calibration/batch_entropy_100bins": 0.4579116256178331, "calibration/batch_entropy_10bins": 0.6357372712721332, "calibration/batch_entropy_50bins": 0.5364450128476048, "calibration/batch_uniqueness": 0.7160179009317768, "calibration/buffer_distribution_entropy": 0.4059973934246511, "calibration/buffer_entropy_100bins": 0.4151252075471626, "calibration/buffer_entropy_10bins": 0.4059973934246511, "calibration/buffer_entropy_50bins": 0.4819283775367138, "calibration/confidence_entropy": 0.46384456781483224, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.005759162303664921, "calibration/coverage@15%": 0.04192937506962237, "calibration/coverage@20%": 0.04298485017266347, "calibration/coverage@25%": 0.14497929495647485, "calibration/coverage@30%": 0.48877619011061546, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.6357372712721332, "calibration/distribution_entropy_100": 0.4579116256178331, "calibration/ece": 0.1707801967802031, "calibration/mean_confidence": 0.7917407206813284, "calibration/unique_confidence_per_question": 0.0359375, "calibration/unique_confidences": 13.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015711805555555534, "completions/max_length": 4078.4, "completions/max_terminated_length": 4078.4, "completions/mean_length": 598.9315185546875, "completions/mean_terminated_length": 608.5085327148438, "completions/min_length": 0.0, "completions/min_terminated_length": 125.6, "epoch": 0.07199910001124986, "grad_norm": 0.0004900748026557267, "learning_rate": 3.5714285714285718e-06, "loss": -0.0094, "num_tokens": 53445574.0, "reward": 0.9443552374839783, "reward_std": 0.1934140741825104, "rewards/accuracy_reward": 0.5914930582046509, "rewards/brier_reward": 0.7071029067039489, "rewards/confidence_uniqueness_reward": 0.6907591581344604, "rewards/format_reward": 0.9809895753860474, "rewards/frontier_aurc_reward": -0.0032980738673359157, "rewards/frontier_coverage_0": -0.006392185157164931, "rewards/frontier_coverage_1": -0.006392185157164931, "rewards/frontier_coverage_10": -0.006392185157164931, "rewards/frontier_coverage_15": -0.006392185157164931, "rewards/frontier_coverage_20": -0.006392185157164931, "rewards/frontier_coverage_25": -0.006392185157164931, "rewards/frontier_coverage_5": -0.006392185157164931, "rewards/true_frontier_ece_gap_only_reward": -0.1269455760717392, "signal/accuracy_reward/centered_abs_mean": 0.23853081464767456, "signal/accuracy_reward/group_std_mean": 0.2985024094581604, "signal/accuracy_reward/group_zero_std_frac": 0.21111111491918563, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11926540732383728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.11926540732383728, "signal/advantage_abs_mean": 0.14833838045597075, "signal/advantage_pre_scale_abs_mean": 0.14833838045597075, "signal/advantage_pre_scale_std": 0.21972199380397797, "signal/advantage_std": 0.21972199380397797, "signal/brier_reward/centered_abs_mean": 0.17149352431297302, "signal/brier_reward/group_std_mean": 0.21699302196502684, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021436690539121627, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021436690539121627, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.11525630950927734, "signal/confidence_uniqueness_reward/group_std_mean": 0.14727450013160706, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.014407038688659668, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014407038688659668, "signal/format_reward/centered_abs_mean": 0.03138563372194767, "signal/format_reward/group_std_mean": 0.06032953634858131, "signal/format_reward/group_zero_std_frac": 0.7444444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.015692816860973836, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.015692816860973836, "signal/frontier_aurc_reward/centered_abs_mean": 0.001896983222104609, "signal/frontier_aurc_reward/group_std_mean": 0.0030401549767702816, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9640362845384517e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9640362845384517e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.04802608713507652, "signal/frontier_coverage_0/group_std_mean": 0.06869390532374382, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0007504076114855706, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0007504076114855706, "signal/frontier_coverage_1/centered_abs_mean": 0.04802608713507652, "signal/frontier_coverage_1/group_std_mean": 0.06869390532374382, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0007504076114855706, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0007504076114855706, "signal/frontier_coverage_10/centered_abs_mean": 0.04802608713507652, "signal/frontier_coverage_10/group_std_mean": 0.06869390532374382, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0007504076114855706, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007504076114855706, "signal/frontier_coverage_15/centered_abs_mean": 0.04802608713507652, "signal/frontier_coverage_15/group_std_mean": 0.06869390532374382, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007504076114855706, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007504076114855706, "signal/frontier_coverage_20/centered_abs_mean": 0.04802608713507652, "signal/frontier_coverage_20/group_std_mean": 0.06869390532374382, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007504076114855706, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007504076114855706, "signal/frontier_coverage_25/centered_abs_mean": 0.04802608713507652, "signal/frontier_coverage_25/group_std_mean": 0.06869390532374382, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007504076114855706, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007504076114855706, "signal/frontier_coverage_5/centered_abs_mean": 0.04802608713507652, "signal/frontier_coverage_5/group_std_mean": 0.06869390532374382, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0007504076114855706, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0007504076114855706, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.03984055146574974, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.0505749449133873, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.004980068933218717, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.004980068933218717, "step": 30 }, { "calibration/aurc": 0.250599098148718, "calibration/batch_distribution_entropy": 0.6323867153697071, "calibration/batch_entropy_100bins": 0.42821853049324937, "calibration/batch_entropy_10bins": 0.6323867153697071, "calibration/batch_entropy_50bins": 0.5033903372089894, "calibration/batch_uniqueness": 0.6367832585438589, "calibration/buffer_distribution_entropy": 0.5053599848783283, "calibration/buffer_entropy_100bins": 0.45108017532774436, "calibration/buffer_entropy_10bins": 0.5053599848783283, "calibration/buffer_entropy_50bins": 0.5249452336686906, "calibration/confidence_entropy": 0.5795754506284359, "calibration/coverage@0%": 0.01114940403252757, "calibration/coverage@1%": 0.01114940403252757, "calibration/coverage@10%": 0.023336877784522418, "calibration/coverage@15%": 0.045042137534781396, "calibration/coverage@20%": 0.21714213102924745, "calibration/coverage@25%": 0.5910950020422562, "calibration/coverage@30%": 0.8375, "calibration/coverage@5%": 0.01114940403252757, "calibration/distribution_entropy_10": 0.6323867153697071, "calibration/distribution_entropy_100": 0.42821853049324937, "calibration/ece": 0.10070792051637412, "calibration/mean_confidence": 0.6918897482432823, "calibration/unique_confidence_per_question": 0.0375, "calibration/unique_confidences": 14.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017534722222222233, "completions/max_length": 4016.6, "completions/max_terminated_length": 4016.6, "completions/mean_length": 656.1796997070312, "completions/mean_terminated_length": 667.9824096679688, "completions/min_length": 0.0, "completions/min_terminated_length": 189.6, "epoch": 0.08399895001312484, "grad_norm": 0.0004635561490431428, "learning_rate": 4.166666666666667e-06, "loss": -0.0114, "num_tokens": 64082204.0, "reward": 0.9670976400375366, "reward_std": 0.17328265607357024, "rewards/accuracy_reward": 0.6295138835906983, "rewards/brier_reward": 0.7479526400566101, "rewards/confidence_uniqueness_reward": 0.6298671245574952, "rewards/format_reward": 0.9805555582046509, "rewards/frontier_aurc_reward": -0.002723962301388383, "rewards/frontier_coverage_0": -0.023209616425447166, "rewards/frontier_coverage_1": -0.023209616425447166, "rewards/frontier_coverage_10": -0.023209616425447166, "rewards/frontier_coverage_15": -0.023209616425447166, "rewards/frontier_coverage_20": -0.023209616425447166, "rewards/frontier_coverage_25": -0.023209616425447166, "rewards/frontier_coverage_5": -0.023209616425447166, "rewards/true_frontier_ece_gap_only_reward": -0.060667777061462404, "signal/accuracy_reward/centered_abs_mean": 0.21026475727558136, "signal/accuracy_reward/group_std_mean": 0.26806623935699464, "signal/accuracy_reward/group_zero_std_frac": 0.2722222238779068, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10513237863779068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10513237863779068, "signal/advantage_abs_mean": 0.13171991258859633, "signal/advantage_pre_scale_abs_mean": 0.13171991258859633, "signal/advantage_pre_scale_std": 0.19843848645687104, "signal/advantage_std": 0.19843848645687104, "signal/brier_reward/centered_abs_mean": 0.12574937492609023, "signal/brier_reward/group_std_mean": 0.16387327909469604, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01571867186576128, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01571867186576128, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.17439252138137817, "signal/confidence_uniqueness_reward/group_std_mean": 0.20724063515663146, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02179906517267227, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02179906517267227, "signal/format_reward/centered_abs_mean": 0.03100043386220932, "signal/format_reward/group_std_mean": 0.055044320225715634, "signal/format_reward/group_zero_std_frac": 0.7833333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01550021693110466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01550021693110466, "signal/frontier_aurc_reward/centered_abs_mean": 0.0009950165753252805, "signal/frontier_aurc_reward/group_std_mean": 0.001629676064476371, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5547133989457508e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5547133989457508e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.07657658159732819, "signal/frontier_coverage_0/group_std_mean": 0.09882448017597198, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001196509087458253, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001196509087458253, "signal/frontier_coverage_1/centered_abs_mean": 0.07657658159732819, "signal/frontier_coverage_1/group_std_mean": 0.09882448017597198, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001196509087458253, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001196509087458253, "signal/frontier_coverage_10/centered_abs_mean": 0.07657658159732819, "signal/frontier_coverage_10/group_std_mean": 0.09882448017597198, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001196509087458253, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001196509087458253, "signal/frontier_coverage_15/centered_abs_mean": 0.07657658159732819, "signal/frontier_coverage_15/group_std_mean": 0.09882448017597198, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001196509087458253, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001196509087458253, "signal/frontier_coverage_20/centered_abs_mean": 0.07657658159732819, "signal/frontier_coverage_20/group_std_mean": 0.09882448017597198, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001196509087458253, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001196509087458253, "signal/frontier_coverage_25/centered_abs_mean": 0.07657658159732819, "signal/frontier_coverage_25/group_std_mean": 0.09882448017597198, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001196509087458253, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001196509087458253, "signal/frontier_coverage_5/centered_abs_mean": 0.07657658159732819, "signal/frontier_coverage_5/group_std_mean": 0.09882448017597198, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001196509087458253, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001196509087458253, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.030044597759842872, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.04015489742159843, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.003755574719980359, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.003755574719980359, "step": 35 }, { "calibration/aurc": 0.33153256701336514, "calibration/batch_distribution_entropy": 0.5854273557012318, "calibration/batch_entropy_100bins": 0.4253124440379624, "calibration/batch_entropy_10bins": 0.5854273557012318, "calibration/batch_entropy_50bins": 0.4994252045234801, "calibration/batch_uniqueness": 0.6444065784609718, "calibration/buffer_distribution_entropy": 0.5848560770733751, "calibration/buffer_entropy_100bins": 0.4806630298602917, "calibration/buffer_entropy_10bins": 0.5848560770733751, "calibration/buffer_entropy_50bins": 0.5604947004501042, "calibration/confidence_entropy": 0.6171869593013805, "calibration/coverage@0%": 0.006288407488631675, "calibration/coverage@1%": 0.006288407488631675, "calibration/coverage@10%": 0.006288407488631675, "calibration/coverage@15%": 0.020387624198814444, "calibration/coverage@20%": 0.020387624198814444, "calibration/coverage@25%": 0.22147370367411084, "calibration/coverage@30%": 0.2721291791843428, "calibration/coverage@5%": 0.006288407488631675, "calibration/distribution_entropy_10": 0.5854273557012318, "calibration/distribution_entropy_100": 0.4253124440379624, "calibration/ece": 0.09023499731095522, "calibration/mean_confidence": 0.6499715924246285, "calibration/unique_confidence_per_question": 0.03958333333333333, "calibration/unique_confidences": 15.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012760416666666653, "completions/max_length": 3917.2, "completions/max_terminated_length": 3917.2, "completions/mean_length": 703.5712768554688, "completions/mean_terminated_length": 712.6527587890625, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.09599880001499982, "grad_norm": 0.0004154318303335458, "learning_rate": 4.761904761904762e-06, "loss": -0.0103, "num_tokens": 75306865.0, "reward": 0.9813725113868713, "reward_std": 0.15861513316631318, "rewards/accuracy_reward": 0.6460069417953491, "rewards/brier_reward": 0.7578154802322388, "rewards/confidence_uniqueness_reward": 0.629870867729187, "rewards/format_reward": 0.985156238079071, "rewards/frontier_aurc_reward": -0.002467139856889844, "rewards/frontier_coverage_0": -0.03592981658875942, "rewards/frontier_coverage_1": -0.03592981658875942, "rewards/frontier_coverage_10": -0.03592981658875942, "rewards/frontier_coverage_15": -0.03592981658875942, "rewards/frontier_coverage_20": -0.03592981658875942, "rewards/frontier_coverage_25": -0.03592981658875942, "rewards/frontier_coverage_5": -0.03592981658875942, "rewards/true_frontier_ece_gap_only_reward": -0.029612084105610847, "signal/accuracy_reward/centered_abs_mean": 0.19372829794883728, "signal/accuracy_reward/group_std_mean": 0.2543206661939621, "signal/accuracy_reward/group_zero_std_frac": 0.286111119389534, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09686414897441864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09686414897441864, "signal/advantage_abs_mean": 0.11751253008842469, "signal/advantage_pre_scale_abs_mean": 0.11751253008842469, "signal/advantage_pre_scale_std": 0.186165389418602, "signal/advantage_std": 0.186165389418602, "signal/brier_reward/centered_abs_mean": 0.11083936840295791, "signal/brier_reward/group_std_mean": 0.14510888755321502, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013854921050369739, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013854921050369739, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1665905848145485, "signal/confidence_uniqueness_reward/group_std_mean": 0.1974548101425171, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020823823101818562, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020823823101818562, "signal/format_reward/centered_abs_mean": 0.02540690116584301, "signal/format_reward/group_std_mean": 0.04733345359563827, "signal/format_reward/group_zero_std_frac": 0.8055555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012703450582921504, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012703450582921504, "signal/frontier_aurc_reward/centered_abs_mean": 0.0008696626755408943, "signal/frontier_aurc_reward/group_std_mean": 0.0013225122122094036, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.3588479305326473e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.3588479305326473e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.09154021292924881, "signal/frontier_coverage_0/group_std_mean": 0.11824491173028946, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014303158270195127, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014303158270195127, "signal/frontier_coverage_1/centered_abs_mean": 0.09154021292924881, "signal/frontier_coverage_1/group_std_mean": 0.11824491173028946, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014303158270195127, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014303158270195127, "signal/frontier_coverage_10/centered_abs_mean": 0.09154021292924881, "signal/frontier_coverage_10/group_std_mean": 0.11824491173028946, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014303158270195127, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014303158270195127, "signal/frontier_coverage_15/centered_abs_mean": 0.09154021292924881, "signal/frontier_coverage_15/group_std_mean": 0.11824491173028946, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014303158270195127, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014303158270195127, "signal/frontier_coverage_20/centered_abs_mean": 0.09154021292924881, "signal/frontier_coverage_20/group_std_mean": 0.11824491173028946, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014303158270195127, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014303158270195127, "signal/frontier_coverage_25/centered_abs_mean": 0.09154021292924881, "signal/frontier_coverage_25/group_std_mean": 0.11824491173028946, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014303158270195127, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014303158270195127, "signal/frontier_coverage_5/centered_abs_mean": 0.09154021292924881, "signal/frontier_coverage_5/group_std_mean": 0.11824491173028946, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014303158270195127, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014303158270195127, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.02095247954130173, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.03012901544570923, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.002619059942662716, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.002619059942662716, "step": 40 }, { "calibration/aurc": 0.20306210885118028, "calibration/batch_distribution_entropy": 0.6887549737306013, "calibration/batch_entropy_100bins": 0.4745983011730009, "calibration/batch_entropy_10bins": 0.6887549737306013, "calibration/batch_entropy_50bins": 0.5543744512155419, "calibration/batch_uniqueness": 0.6833862362276909, "calibration/buffer_distribution_entropy": 0.6364236887118315, "calibration/buffer_entropy_100bins": 0.5046055763785582, "calibration/buffer_entropy_10bins": 0.6364236887118315, "calibration/buffer_entropy_50bins": 0.5889470060117332, "calibration/confidence_entropy": 0.5883243712866314, "calibration/coverage@0%": 0.014789383258954939, "calibration/coverage@1%": 0.014789383258954939, "calibration/coverage@10%": 0.057257313784553766, "calibration/coverage@15%": 0.1596987720979517, "calibration/coverage@20%": 0.4277598387998176, "calibration/coverage@25%": 0.8117097398897354, "calibration/coverage@30%": 0.9646739130434783, "calibration/coverage@5%": 0.014789383258954939, "calibration/distribution_entropy_10": 0.6887549737306013, "calibration/distribution_entropy_100": 0.4745983011730009, "calibration/ece": 0.07153040992310632, "calibration/mean_confidence": 0.6690040608217023, "calibration/unique_confidence_per_question": 0.06197916666666666, "calibration/unique_confidences": 23.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015104166666666651, "completions/max_length": 3774.0, "completions/max_terminated_length": 3774.0, "completions/mean_length": 735.8694580078125, "completions/mean_terminated_length": 747.207177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 239.4, "epoch": 0.1079986500168748, "grad_norm": 0.0004037077887915075, "learning_rate": 4.909638554216868e-06, "loss": -0.0114, "num_tokens": 86919345.0, "reward": 0.9962880134582519, "reward_std": 0.15157434940338135, "rewards/accuracy_reward": 0.659375, "rewards/brier_reward": 0.7679201841354371, "rewards/confidence_uniqueness_reward": 0.677590298652649, "rewards/format_reward": 0.9844618201255798, "rewards/frontier_aurc_reward": -0.002255662181414664, "rewards/frontier_coverage_0": -0.028669605404138564, "rewards/frontier_coverage_1": -0.028669605404138564, "rewards/frontier_coverage_10": -0.028669605404138564, "rewards/frontier_coverage_15": -0.028669605404138564, "rewards/frontier_coverage_20": -0.028669605404138564, "rewards/frontier_coverage_25": -0.028669605404138564, "rewards/frontier_coverage_5": -0.028669605404138564, "rewards/true_frontier_ece_gap_only_reward": -0.02518573999404907, "signal/accuracy_reward/centered_abs_mean": 0.1809027761220932, "signal/accuracy_reward/group_std_mean": 0.24004943072795867, "signal/accuracy_reward/group_zero_std_frac": 0.31666667461395265, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0904513880610466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0904513880610466, "signal/advantage_abs_mean": 0.11121969670057297, "signal/advantage_pre_scale_abs_mean": 0.11121969670057297, "signal/advantage_pre_scale_std": 0.1811255246400833, "signal/advantage_std": 0.1811255246400833, "signal/brier_reward/centered_abs_mean": 0.11833977550268174, "signal/brier_reward/group_std_mean": 0.1528250217437744, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014792471937835217, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014792471937835217, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.15492647886276245, "signal/confidence_uniqueness_reward/group_std_mean": 0.18688772320747377, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019365809857845306, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.019365809857845306, "signal/format_reward/centered_abs_mean": 0.02518988773226738, "signal/format_reward/group_std_mean": 0.04387797862291336, "signal/format_reward/group_zero_std_frac": 0.8305555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01259494386613369, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01259494386613369, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012202380341477693, "signal/frontier_aurc_reward/group_std_mean": 0.001992561621591449, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9066219283558895e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9066219283558895e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.09129920750856399, "signal/frontier_coverage_0/group_std_mean": 0.12056645601987839, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014265501173213123, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014265501173213123, "signal/frontier_coverage_1/centered_abs_mean": 0.09129920750856399, "signal/frontier_coverage_1/group_std_mean": 0.12056645601987839, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014265501173213123, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014265501173213123, "signal/frontier_coverage_10/centered_abs_mean": 0.09129920750856399, "signal/frontier_coverage_10/group_std_mean": 0.12056645601987839, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014265501173213123, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014265501173213123, "signal/frontier_coverage_15/centered_abs_mean": 0.09129920750856399, "signal/frontier_coverage_15/group_std_mean": 0.12056645601987839, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014265501173213123, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014265501173213123, "signal/frontier_coverage_20/centered_abs_mean": 0.09129920750856399, "signal/frontier_coverage_20/group_std_mean": 0.12056645601987839, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014265501173213123, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014265501173213123, "signal/frontier_coverage_25/centered_abs_mean": 0.09129920750856399, "signal/frontier_coverage_25/group_std_mean": 0.12056645601987839, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014265501173213123, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014265501173213123, "signal/frontier_coverage_5/centered_abs_mean": 0.09129920750856399, "signal/frontier_coverage_5/group_std_mean": 0.12056645601987839, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014265501173213123, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014265501173213123, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.021791164949536323, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.03154192678630352, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0027238956186920404, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0027238956186920404, "step": 45 }, { "calibration/aurc": 0.39036076960907307, "calibration/batch_distribution_entropy": 0.7862094401826989, "calibration/batch_entropy_100bins": 0.6110341298979247, "calibration/batch_entropy_10bins": 0.7862094401826989, "calibration/batch_entropy_50bins": 0.6934297857057619, "calibration/batch_uniqueness": 0.8247381911119701, "calibration/buffer_distribution_entropy": 0.6734957884749172, "calibration/buffer_entropy_100bins": 0.5282027696652414, "calibration/buffer_entropy_10bins": 0.6734957884749172, "calibration/buffer_entropy_50bins": 0.6156049219048293, "calibration/confidence_entropy": 0.5949417037140304, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.003655352480417755, "calibration/coverage@20%": 0.03593048475555003, "calibration/coverage@25%": 0.0391050879301532, "calibration/coverage@30%": 0.2087096269843971, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.7862094401826989, "calibration/distribution_entropy_100": 0.6110341298979247, "calibration/ece": 0.12655827505964012, "calibration/mean_confidence": 0.6225528034108609, "calibration/unique_confidence_per_question": 0.10677083333333334, "calibration/unique_confidences": 41.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011979166666666652, "completions/max_length": 3659.4, "completions/max_terminated_length": 3659.4, "completions/mean_length": 737.67744140625, "completions/mean_terminated_length": 746.5686889648438, "completions/min_length": 0.0, "completions/min_terminated_length": 213.2, "epoch": 0.11999850001874976, "grad_norm": 0.0004018676117993891, "learning_rate": 4.759036144578314e-06, "loss": -0.0112, "num_tokens": 98514989.0, "reward": 1.000908660888672, "reward_std": 0.14541010558605194, "rewards/accuracy_reward": 0.6365451455116272, "rewards/brier_reward": 0.7578566431999206, "rewards/confidence_uniqueness_reward": 0.8036070704460144, "rewards/format_reward": 0.9878472208976745, "rewards/frontier_aurc_reward": -0.0020980457309633495, "rewards/frontier_coverage_0": -0.028162200190126895, "rewards/frontier_coverage_1": -0.028162200190126895, "rewards/frontier_coverage_10": -0.028162200190126895, "rewards/frontier_coverage_15": -0.028162200190126895, "rewards/frontier_coverage_20": -0.028162200190126895, "rewards/frontier_coverage_25": -0.028162200190126895, "rewards/frontier_coverage_5": -0.028162200190126895, "rewards/true_frontier_ece_gap_only_reward": -0.026859960705041885, "signal/accuracy_reward/centered_abs_mean": 0.1797797292470932, "signal/accuracy_reward/group_std_mean": 0.2389494448900223, "signal/accuracy_reward/group_zero_std_frac": 0.3166666805744171, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0898898646235466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0898898646235466, "signal/advantage_abs_mean": 0.105811907351017, "signal/advantage_pre_scale_abs_mean": 0.105811907351017, "signal/advantage_pre_scale_std": 0.1739170879125595, "signal/advantage_std": 0.1739170879125595, "signal/brier_reward/centered_abs_mean": 0.12420935779809952, "signal/brier_reward/group_std_mean": 0.16102492213249206, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01552616972476244, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01552616972476244, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.10721739381551743, "signal/confidence_uniqueness_reward/group_std_mean": 0.13165029883384705, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013402174226939678, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013402174226939678, "signal/format_reward/centered_abs_mean": 0.02109375, "signal/format_reward/group_std_mean": 0.03988752476871014, "signal/format_reward/group_zero_std_frac": 0.8361111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010546875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010546875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016509333858266474, "signal/frontier_aurc_reward/group_std_mean": 0.0027218869887292384, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5795834153541365e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5795834153541365e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.11604345738887786, "signal/frontier_coverage_0/group_std_mean": 0.1531293898820877, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018131790217012166, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018131790217012166, "signal/frontier_coverage_1/centered_abs_mean": 0.11604345738887786, "signal/frontier_coverage_1/group_std_mean": 0.1531293898820877, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018131790217012166, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018131790217012166, "signal/frontier_coverage_10/centered_abs_mean": 0.11604345738887786, "signal/frontier_coverage_10/group_std_mean": 0.1531293898820877, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018131790217012166, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018131790217012166, "signal/frontier_coverage_15/centered_abs_mean": 0.11604345738887786, "signal/frontier_coverage_15/group_std_mean": 0.1531293898820877, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018131790217012166, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018131790217012166, "signal/frontier_coverage_20/centered_abs_mean": 0.11604345738887786, "signal/frontier_coverage_20/group_std_mean": 0.1531293898820877, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018131790217012166, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018131790217012166, "signal/frontier_coverage_25/centered_abs_mean": 0.11604345738887786, "signal/frontier_coverage_25/group_std_mean": 0.1531293898820877, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018131790217012166, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018131790217012166, "signal/frontier_coverage_5/centered_abs_mean": 0.11604345738887786, "signal/frontier_coverage_5/group_std_mean": 0.1531293898820877, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018131790217012166, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018131790217012166, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.02609681598842144, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.03738295584917069, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00326210199855268, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00326210199855268, "step": 50 }, { "epoch": 0.11999850001874976, "eval_completions/clipped_ratio": 0.013888888888888895, "eval_completions/max_length": 1763.6666666666667, "eval_completions/max_terminated_length": 1763.6666666666667, "eval_completions/mean_length": 710.3668518066406, "eval_completions/mean_terminated_length": 720.3878885904948, "eval_completions/min_length": 65.33333333333333, "eval_completions/min_terminated_length": 283.0, "eval_loss": 0.0, "eval_num_tokens": 98514989.0, "eval_reward": 1.0152363975842793, "eval_reward_std": 0.25018754849831265, "eval_rewards/accuracy_reward": 0.671875, "eval_rewards/brier_reward": 0.7494580149650574, "eval_rewards/confidence_uniqueness_reward": 0.8206921716531118, "eval_rewards/format_reward": 0.984375, "eval_rewards/frontier_aurc_reward": -0.0020469005879325173, "eval_rewards/frontier_coverage_0": -0.05519990002115568, "eval_rewards/frontier_coverage_1": -0.05519990002115568, "eval_rewards/frontier_coverage_10": -0.05519990002115568, "eval_rewards/frontier_coverage_15": -0.05519990002115568, "eval_rewards/frontier_coverage_20": -0.05519990002115568, "eval_rewards/frontier_coverage_25": -0.05519990002115568, "eval_rewards/frontier_coverage_5": -0.05519990002115568, "eval_rewards/true_frontier_ece_gap_only_reward": -0.024703877978026867, "eval_runtime": 205.888, "eval_samples_per_second": 4.857, "eval_signal/accuracy_reward/centered_abs_mean": 0.4297960052887599, "eval_signal/accuracy_reward/group_std_mean": 0.4702196568250656, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21489800264437994, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21489800264437994, "eval_signal/advantage_abs_mean": 0.21398618072271347, "eval_signal/advantage_pre_scale_abs_mean": 0.21398618072271347, "eval_signal/advantage_pre_scale_std": 0.2496974691748619, "eval_signal/advantage_std": 0.2496974691748619, "eval_signal/brier_reward/centered_abs_mean": 0.14907778551181158, "eval_signal/brier_reward/group_std_mean": 0.20007833590110144, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018634723188976448, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.018634723188976448, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.09062439575791359, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.12857400501767793, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011328049469739199, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011328049469739199, "eval_signal/format_reward/centered_abs_mean": 0.029947916977107525, "eval_signal/format_reward/group_std_mean": 0.0794201207657655, "eval_signal/format_reward/group_zero_std_frac": 0.5833333432674408, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.014973958488553762, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.014973958488553762, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0015480444611360629, "eval_signal/frontier_aurc_reward/group_std_mean": 0.003234441547344128, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4188194705250982e-05, "eval_signal/frontier_aurc_reward/weight": 0.015625, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4188194705250982e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.20009989539782205, "eval_signal/frontier_coverage_0/group_std_mean": 0.26012368500232697, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0031265608655909696, "eval_signal/frontier_coverage_0/weight": 0.015625, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0031265608655909696, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.20009989539782205, "eval_signal/frontier_coverage_1/group_std_mean": 0.26012368500232697, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031265608655909696, "eval_signal/frontier_coverage_1/weight": 0.015625, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031265608655909696, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.20009989539782205, "eval_signal/frontier_coverage_10/group_std_mean": 0.26012368500232697, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031265608655909696, "eval_signal/frontier_coverage_10/weight": 0.015625, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031265608655909696, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.20009989539782205, "eval_signal/frontier_coverage_15/group_std_mean": 0.26012368500232697, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031265608655909696, "eval_signal/frontier_coverage_15/weight": 0.015625, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031265608655909696, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.20009989539782205, "eval_signal/frontier_coverage_20/group_std_mean": 0.26012368500232697, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031265608655909696, "eval_signal/frontier_coverage_20/weight": 0.015625, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031265608655909696, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.20009989539782205, "eval_signal/frontier_coverage_25/group_std_mean": 0.26012368500232697, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031265608655909696, "eval_signal/frontier_coverage_25/weight": 0.015625, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031265608655909696, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.20009989539782205, "eval_signal/frontier_coverage_5/group_std_mean": 0.26012368500232697, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031265608655909696, "eval_signal/frontier_coverage_5/weight": 0.015625, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031265608655909696, "eval_signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.025939644935230415, "eval_signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.04151128667096297, "eval_signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "eval_signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.003242455616903802, "eval_signal/true_frontier_ece_gap_only_reward/weight": 0.125, "eval_signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.003242455616903802, "eval_steps_per_second": 0.029, "step": 50 }, { "epoch": 0.11999850001874976, "step": 50, "train_probe_completions/clipped_ratio": 0.013715277777777776, "train_probe_completions/max_length": 2407.6666666666665, "train_probe_completions/max_terminated_length": 2407.6666666666665, "train_probe_completions/mean_length": 720.0277811686198, "train_probe_completions/mean_terminated_length": 730.0629069010416, "train_probe_completions/min_length": 0.0, "train_probe_completions/min_terminated_length": 217.16666666666666, "train_probe_loss": 0.0, "train_probe_num_tokens": 98514989.0, "train_probe_reward": 1.0258092880249023, "train_probe_reward_std": 0.24107951919237772, "train_probe_rewards/accuracy_reward": 0.6814236144224802, "train_probe_rewards/brier_reward": 0.7728658020496368, "train_probe_rewards/confidence_uniqueness_reward": 0.825143297513326, "train_probe_rewards/format_reward": 0.9869791666666666, "train_probe_rewards/frontier_aurc_reward": -0.0015906431168938677, "train_probe_rewards/frontier_coverage_0": -0.041856971802189946, "train_probe_rewards/frontier_coverage_1": -0.041856971802189946, "train_probe_rewards/frontier_coverage_10": -0.041856971802189946, "train_probe_rewards/frontier_coverage_15": -0.041856971802189946, "train_probe_rewards/frontier_coverage_20": -0.041856971802189946, "train_probe_rewards/frontier_coverage_25": -0.041856971802189946, "train_probe_rewards/frontier_coverage_5": -0.041856971802189946, "train_probe_rewards/true_frontier_ece_gap_only_reward": -0.028322534635663033, "train_probe_runtime": 203.2167, "train_probe_samples_per_second": 4.921, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4200846354166667, "train_probe_signal/accuracy_reward/group_std_mean": 0.46454379459222156, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21004231770833334, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.21004231770833334, "train_probe_signal/advantage_abs_mean": 0.20514148473739624, "train_probe_signal/advantage_pre_scale_abs_mean": 0.20514148473739624, "train_probe_signal/advantage_pre_scale_std": 0.24087134500344595, "train_probe_signal/advantage_std": 0.24087134500344595, "train_probe_signal/brier_reward/centered_abs_mean": 0.1424630656838417, "train_probe_signal/brier_reward/group_std_mean": 0.189873273173968, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017807883210480213, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.017807883210480213, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.09026772528886795, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.1251646839082241, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011283465661108494, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011283465661108494, "train_probe_signal/format_reward/centered_abs_mean": 0.025010850746184587, "train_probe_signal/format_reward/group_std_mean": 0.06767813768237829, "train_probe_signal/format_reward/group_zero_std_frac": 0.6388889054457346, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.012505425373092294, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.012505425373092294, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0015710045505935948, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0031676616442079344, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.454694610302492e-05, "train_probe_signal/frontier_aurc_reward/weight": 0.015625, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.454694610302492e-05, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.2021650398770968, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.266250138481458, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0031588287480796375, "train_probe_signal/frontier_coverage_0/weight": 0.015625, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0031588287480796375, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.2021650398770968, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.266250138481458, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031588287480796375, "train_probe_signal/frontier_coverage_1/weight": 0.015625, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031588287480796375, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.2021650398770968, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.266250138481458, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031588287480796375, "train_probe_signal/frontier_coverage_10/weight": 0.015625, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031588287480796375, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.2021650398770968, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.266250138481458, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031588287480796375, "train_probe_signal/frontier_coverage_15/weight": 0.015625, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031588287480796375, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.2021650398770968, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.266250138481458, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031588287480796375, "train_probe_signal/frontier_coverage_20/weight": 0.015625, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031588287480796375, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.2021650398770968, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.266250138481458, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031588287480796375, "train_probe_signal/frontier_coverage_25/weight": 0.015625, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031588287480796375, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.2021650398770968, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.266250138481458, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031588287480796375, "train_probe_signal/frontier_coverage_5/weight": 0.015625, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031588287480796375, "train_probe_signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.02938245516270399, "train_probe_signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.04563416292270025, "train_probe_signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "train_probe_signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.003672806895337999, "train_probe_signal/true_frontier_ece_gap_only_reward/weight": 0.125, "train_probe_signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.003672806895337999, "train_probe_steps_per_second": 0.03 }, { "calibration/aurc": 0.30551913172089973, "calibration/batch_distribution_entropy": 0.8602443760010654, "calibration/batch_entropy_100bins": 0.736749609443292, "calibration/batch_entropy_10bins": 0.8602443760010654, "calibration/batch_entropy_50bins": 0.8128175073562515, "calibration/batch_uniqueness": 0.9014400949713448, "calibration/buffer_distribution_entropy": 0.7210148263994508, "calibration/buffer_entropy_100bins": 0.5682523629363754, "calibration/buffer_entropy_10bins": 0.7210148263994508, "calibration/buffer_entropy_50bins": 0.6571883430263827, "calibration/confidence_entropy": 0.5935431137939096, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.062436579806646866, "calibration/coverage@20%": 0.27323636460432554, "calibration/coverage@25%": 0.43223015091863515, "calibration/coverage@30%": 0.5437253937007874, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.8602443760010654, "calibration/distribution_entropy_100": 0.736749609443292, "calibration/ece": 0.1438151896191343, "calibration/mean_confidence": 0.5757818524322698, "calibration/unique_confidence_per_question": 0.17395833333333335, "calibration/unique_confidences": 66.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014496527777777768, "completions/max_length": 3464.4, "completions/max_terminated_length": 3464.4, "completions/mean_length": 735.163037109375, "completions/mean_terminated_length": 746.039599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 199.2, "epoch": 0.13199835002062474, "grad_norm": 0.0003842144215013832, "learning_rate": 4.60843373493976e-06, "loss": -0.0115, "num_tokens": 110064643.0, "reward": 1.0145560026168823, "reward_std": 0.13707308173179628, "rewards/accuracy_reward": 0.649218738079071, "rewards/brier_reward": 0.7490468740463256, "rewards/confidence_uniqueness_reward": 0.8952165722846985, "rewards/format_reward": 0.9855034708976745, "rewards/frontier_aurc_reward": -0.001880918419919908, "rewards/frontier_coverage_0": -0.042861418426036836, "rewards/frontier_coverage_1": -0.042861418426036836, "rewards/frontier_coverage_10": -0.042861418426036836, "rewards/frontier_coverage_15": -0.042861418426036836, "rewards/frontier_coverage_20": -0.042861418426036836, "rewards/frontier_coverage_25": -0.042861418426036836, "rewards/frontier_coverage_5": -0.042861418426036836, "rewards/true_frontier_ece_gap_only_reward": -0.028965843096375465, "signal/accuracy_reward/centered_abs_mean": 0.17503797709941865, "signal/accuracy_reward/group_std_mean": 0.22645011842250823, "signal/accuracy_reward/group_zero_std_frac": 0.37500000596046446, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08751898854970933, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08751898854970933, "signal/advantage_abs_mean": 0.10272245854139328, "signal/advantage_pre_scale_abs_mean": 0.10272245854139328, "signal/advantage_pre_scale_std": 0.16644595563411713, "signal/advantage_std": 0.16644595563411713, "signal/brier_reward/centered_abs_mean": 0.1416968137025833, "signal/brier_reward/group_std_mean": 0.1823040783405304, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017712101712822913, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017712101712822913, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06181478276848793, "signal/confidence_uniqueness_reward/group_std_mean": 0.08373434096574783, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0077268478460609915, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0077268478460609915, "signal/format_reward/centered_abs_mean": 0.022303602285683156, "signal/format_reward/group_std_mean": 0.03928558751940727, "signal/format_reward/group_zero_std_frac": 0.8472222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011151801142841578, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011151801142841578, "signal/frontier_aurc_reward/centered_abs_mean": 0.001758693833835423, "signal/frontier_aurc_reward/group_std_mean": 0.0033450972754508258, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7479591153678484e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7479591153678484e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.16558919548988343, "signal/frontier_coverage_0/group_std_mean": 0.21207553446292876, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025873311795294287, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025873311795294287, "signal/frontier_coverage_1/centered_abs_mean": 0.16558919548988343, "signal/frontier_coverage_1/group_std_mean": 0.21207553446292876, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025873311795294287, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025873311795294287, "signal/frontier_coverage_10/centered_abs_mean": 0.16558919548988343, "signal/frontier_coverage_10/group_std_mean": 0.21207553446292876, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025873311795294287, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025873311795294287, "signal/frontier_coverage_15/centered_abs_mean": 0.16558919548988343, "signal/frontier_coverage_15/group_std_mean": 0.21207553446292876, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025873311795294287, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025873311795294287, "signal/frontier_coverage_20/centered_abs_mean": 0.16558919548988343, "signal/frontier_coverage_20/group_std_mean": 0.21207553446292876, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025873311795294287, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025873311795294287, "signal/frontier_coverage_25/centered_abs_mean": 0.16558919548988343, "signal/frontier_coverage_25/group_std_mean": 0.21207553446292876, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025873311795294287, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025873311795294287, "signal/frontier_coverage_5/centered_abs_mean": 0.16558919548988343, "signal/frontier_coverage_5/group_std_mean": 0.21207553446292876, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025873311795294287, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025873311795294287, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.026407453045248986, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.038250190764665605, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0033009316306561232, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0033009316306561232, "step": 55 }, { "calibration/aurc": 0.3289672986883583, "calibration/batch_distribution_entropy": 0.8569993905562576, "calibration/batch_entropy_100bins": 0.7871838425661777, "calibration/batch_entropy_10bins": 0.8569993905562576, "calibration/batch_entropy_50bins": 0.8397181934509937, "calibration/batch_uniqueness": 0.9215534730345482, "calibration/buffer_distribution_entropy": 0.7594609879037215, "calibration/buffer_entropy_100bins": 0.6073773228204736, "calibration/buffer_entropy_10bins": 0.7594609879037215, "calibration/buffer_entropy_50bins": 0.6953888937595927, "calibration/confidence_entropy": 0.5982193870283902, "calibration/coverage@0%": 0.004736842105263158, "calibration/coverage@1%": 0.004736842105263158, "calibration/coverage@10%": 0.090082667401488, "calibration/coverage@15%": 0.20160099200881784, "calibration/coverage@20%": 0.3591953706255167, "calibration/coverage@25%": 0.4010801873794434, "calibration/coverage@30%": 0.47177459355194273, "calibration/coverage@5%": 0.004736842105263158, "calibration/distribution_entropy_10": 0.8569993905562576, "calibration/distribution_entropy_100": 0.7871838425661777, "calibration/ece": 0.19379714223757522, "calibration/mean_confidence": 0.5902312308233718, "calibration/unique_confidence_per_question": 0.2088541666666667, "calibration/unique_confidences": 80.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01519097222222221, "completions/max_length": 3634.8, "completions/max_terminated_length": 3634.8, "completions/mean_length": 732.9686767578125, "completions/mean_terminated_length": 744.3640380859375, "completions/min_length": 0.0, "completions/min_terminated_length": 216.0, "epoch": 0.14399820002249972, "grad_norm": 0.0004008902469649911, "learning_rate": 4.457831325301205e-06, "loss": -0.0135, "num_tokens": 121605018.0, "reward": 1.0072944164276123, "reward_std": 0.15234946310520173, "rewards/accuracy_reward": 0.6269965291023254, "rewards/brier_reward": 0.7545630693435669, "rewards/confidence_uniqueness_reward": 0.9078129887580871, "rewards/format_reward": 0.9844618082046509, "rewards/frontier_aurc_reward": -0.0017762274481356144, "rewards/frontier_coverage_0": -0.019636033568531275, "rewards/frontier_coverage_1": -0.019636033568531275, "rewards/frontier_coverage_10": -0.019636033568531275, "rewards/frontier_coverage_15": -0.019636033568531275, "rewards/frontier_coverage_20": -0.019636033568531275, "rewards/frontier_coverage_25": -0.019636033568531275, "rewards/frontier_coverage_5": -0.019636033568531275, "rewards/true_frontier_ece_gap_only_reward": -0.032450299337506296, "signal/accuracy_reward/centered_abs_mean": 0.19198676347732543, "signal/accuracy_reward/group_std_mean": 0.24801050424575805, "signal/accuracy_reward/group_zero_std_frac": 0.3194444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09599338173866272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09599338173866272, "signal/advantage_abs_mean": 0.11215932667255402, "signal/advantage_pre_scale_abs_mean": 0.11215932667255402, "signal/advantage_pre_scale_std": 0.17976947426795958, "signal/advantage_std": 0.17976947426795958, "signal/brier_reward/centered_abs_mean": 0.14585065245628356, "signal/brier_reward/group_std_mean": 0.18760787844657897, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018231331557035445, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018231331557035445, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06241054162383079, "signal/confidence_uniqueness_reward/group_std_mean": 0.09066012054681778, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007801317702978849, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007801317702978849, "signal/format_reward/centered_abs_mean": 0.025927734375, "signal/format_reward/group_std_mean": 0.05031422972679138, "signal/format_reward/group_zero_std_frac": 0.7861111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0129638671875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0129638671875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002507622819393873, "signal/frontier_aurc_reward/group_std_mean": 0.004252730589359999, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.918160655302927e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.918160655302927e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15634405016899108, "signal/frontier_coverage_0/group_std_mean": 0.20499549806118011, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024428757838904857, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024428757838904857, "signal/frontier_coverage_1/centered_abs_mean": 0.15634405016899108, "signal/frontier_coverage_1/group_std_mean": 0.20499549806118011, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024428757838904857, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024428757838904857, "signal/frontier_coverage_10/centered_abs_mean": 0.15634405016899108, "signal/frontier_coverage_10/group_std_mean": 0.20499549806118011, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024428757838904857, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024428757838904857, "signal/frontier_coverage_15/centered_abs_mean": 0.15634405016899108, "signal/frontier_coverage_15/group_std_mean": 0.20499549806118011, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024428757838904857, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024428757838904857, "signal/frontier_coverage_20/centered_abs_mean": 0.15634405016899108, "signal/frontier_coverage_20/group_std_mean": 0.20499549806118011, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024428757838904857, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024428757838904857, "signal/frontier_coverage_25/centered_abs_mean": 0.15634405016899108, "signal/frontier_coverage_25/group_std_mean": 0.20499549806118011, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024428757838904857, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024428757838904857, "signal/frontier_coverage_5/centered_abs_mean": 0.15634405016899108, "signal/frontier_coverage_5/group_std_mean": 0.20499549806118011, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024428757838904857, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024428757838904857, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.029747573658823967, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.04069453105330467, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.003718446707352996, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.003718446707352996, "step": 60 }, { "calibration/aurc": 0.27763096415418204, "calibration/batch_distribution_entropy": 0.8015767599827139, "calibration/batch_entropy_100bins": 0.7749875634194134, "calibration/batch_entropy_10bins": 0.8015767599827139, "calibration/batch_entropy_50bins": 0.8129389718529344, "calibration/batch_uniqueness": 0.9117152414913241, "calibration/buffer_distribution_entropy": 0.7814829282050988, "calibration/buffer_entropy_100bins": 0.6385089459762243, "calibration/buffer_entropy_10bins": 0.7814829282050988, "calibration/buffer_entropy_50bins": 0.7230478450681841, "calibration/confidence_entropy": 0.5883703123093914, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.005774278215223097, "calibration/coverage@15%": 0.2747384205246596, "calibration/coverage@20%": 0.5393372634703384, "calibration/coverage@25%": 0.6022603839441535, "calibration/coverage@30%": 0.7034666666666667, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.8015767599827139, "calibration/distribution_entropy_100": 0.7749875634194134, "calibration/ece": 0.13966504962011914, "calibration/mean_confidence": 0.639796467979542, "calibration/unique_confidence_per_question": 0.18958333333333333, "calibration/unique_confidences": 72.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013368055555555581, "completions/max_length": 3414.8, "completions/max_terminated_length": 3414.8, "completions/mean_length": 708.5317016601563, "completions/mean_terminated_length": 718.0807495117188, "completions/min_length": 0.0, "completions/min_terminated_length": 209.4, "epoch": 0.1559980500243747, "grad_norm": 0.0004595128120854497, "learning_rate": 4.307228915662651e-06, "loss": -0.0113, "num_tokens": 132861351.0, "reward": 1.0280081152915954, "reward_std": 0.13622777462005614, "rewards/accuracy_reward": 0.6611979246139527, "rewards/brier_reward": 0.7732649683952332, "rewards/confidence_uniqueness_reward": 0.9014915823936462, "rewards/format_reward": 0.9864583373069763, "rewards/frontier_aurc_reward": -0.0018865561811253428, "rewards/frontier_coverage_0": -0.024941197596490383, "rewards/frontier_coverage_1": -0.024941197596490383, "rewards/frontier_coverage_10": -0.024941197596490383, "rewards/frontier_coverage_15": -0.024941197596490383, "rewards/frontier_coverage_20": -0.024941197596490383, "rewards/frontier_coverage_25": -0.024941197596490383, "rewards/frontier_coverage_5": -0.024941197596490383, "rewards/true_frontier_ece_gap_only_reward": -0.019257388636469842, "signal/accuracy_reward/centered_abs_mean": 0.16076931357383728, "signal/accuracy_reward/group_std_mean": 0.21609613299369812, "signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08038465678691864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08038465678691864, "signal/advantage_abs_mean": 0.09798353910446167, "signal/advantage_pre_scale_abs_mean": 0.09798353910446167, "signal/advantage_pre_scale_std": 0.1702731281518936, "signal/advantage_std": 0.1702731281518936, "signal/brier_reward/centered_abs_mean": 0.11966974288225174, "signal/brier_reward/group_std_mean": 0.1555977314710617, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014958717860281467, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014958717860281467, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07107813656330109, "signal/confidence_uniqueness_reward/group_std_mean": 0.09667231291532516, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008884767070412636, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008884767070412636, "signal/format_reward/centered_abs_mean": 0.02330729179084301, "signal/format_reward/group_std_mean": 0.043983825296163556, "signal/format_reward/group_zero_std_frac": 0.8222222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011653645895421505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011653645895421505, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014523085206747054, "signal/frontier_aurc_reward/group_std_mean": 0.0024229245027527213, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2692320635542272e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2692320635542272e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.10854218900203705, "signal/frontier_coverage_0/group_std_mean": 0.14894305765628815, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016959717031568289, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016959717031568289, "signal/frontier_coverage_1/centered_abs_mean": 0.10854218900203705, "signal/frontier_coverage_1/group_std_mean": 0.14894305765628815, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016959717031568289, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016959717031568289, "signal/frontier_coverage_10/centered_abs_mean": 0.10854218900203705, "signal/frontier_coverage_10/group_std_mean": 0.14894305765628815, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016959717031568289, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016959717031568289, "signal/frontier_coverage_15/centered_abs_mean": 0.10854218900203705, "signal/frontier_coverage_15/group_std_mean": 0.14894305765628815, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016959717031568289, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016959717031568289, "signal/frontier_coverage_20/centered_abs_mean": 0.10854218900203705, "signal/frontier_coverage_20/group_std_mean": 0.14894305765628815, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016959717031568289, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016959717031568289, "signal/frontier_coverage_25/centered_abs_mean": 0.10854218900203705, "signal/frontier_coverage_25/group_std_mean": 0.14894305765628815, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016959717031568289, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016959717031568289, "signal/frontier_coverage_5/centered_abs_mean": 0.10854218900203705, "signal/frontier_coverage_5/group_std_mean": 0.14894305765628815, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016959717031568289, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016959717031568289, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0190825667232275, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.02821722887456417, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0023853208404034376, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0023853208404034376, "step": 65 }, { "calibration/aurc": 0.2942477916112748, "calibration/batch_distribution_entropy": 0.7283107979539343, "calibration/batch_entropy_100bins": 0.7784214972740136, "calibration/batch_entropy_10bins": 0.7283107979539343, "calibration/batch_entropy_50bins": 0.7914896701898086, "calibration/batch_uniqueness": 0.9236069702763444, "calibration/buffer_distribution_entropy": 0.794101813716716, "calibration/buffer_entropy_100bins": 0.6672103969274696, "calibration/buffer_entropy_10bins": 0.794101813716716, "calibration/buffer_entropy_50bins": 0.746071032044594, "calibration/confidence_entropy": 0.5846905333336511, "calibration/coverage@0%": 0.0199668754084534, "calibration/coverage@1%": 0.0199668754084534, "calibration/coverage@10%": 0.05303774155018569, "calibration/coverage@15%": 0.07666255160682335, "calibration/coverage@20%": 0.23386797400915085, "calibration/coverage@25%": 0.3064000917049651, "calibration/coverage@30%": 0.49787412509417434, "calibration/coverage@5%": 0.0330902349885059, "calibration/distribution_entropy_10": 0.7283107979539343, "calibration/distribution_entropy_100": 0.7784214972740136, "calibration/ece": 0.091677989362732, "calibration/mean_confidence": 0.6715764005231176, "calibration/unique_confidence_per_question": 0.171875, "calibration/unique_confidences": 66.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010156250000000023, "completions/max_length": 3671.2, "completions/max_terminated_length": 3671.2, "completions/mean_length": 689.2970703125, "completions/mean_terminated_length": 696.3696044921875, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.16799790002624967, "grad_norm": 0.00040550867561250925, "learning_rate": 4.156626506024097e-06, "loss": -0.0087, "num_tokens": 143880197.0, "reward": 1.0235817909240723, "reward_std": 0.13608680069446563, "rewards/accuracy_reward": 0.6411458253860474, "rewards/brier_reward": 0.771563458442688, "rewards/confidence_uniqueness_reward": 0.9243876576423645, "rewards/format_reward": 0.9894965291023254, "rewards/frontier_aurc_reward": -0.0020083141047507525, "rewards/frontier_coverage_0": -0.014133398490957915, "rewards/frontier_coverage_1": -0.014133398490957915, "rewards/frontier_coverage_10": -0.014133398490957915, "rewards/frontier_coverage_15": -0.014133398490957915, "rewards/frontier_coverage_20": -0.014133398490957915, "rewards/frontier_coverage_25": -0.014133398490957915, "rewards/frontier_coverage_5": -0.014133398490957915, "rewards/true_frontier_ece_gap_only_reward": -0.017248846217989923, "signal/accuracy_reward/centered_abs_mean": 0.1690972238779068, "signal/accuracy_reward/group_std_mean": 0.2230025738477707, "signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0845486119389534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0845486119389534, "signal/advantage_abs_mean": 0.09818485230207444, "signal/advantage_pre_scale_abs_mean": 0.09818485230207444, "signal/advantage_pre_scale_std": 0.16889992356300354, "signal/advantage_std": 0.16889992356300354, "signal/brier_reward/centered_abs_mean": 0.11540952920913697, "signal/brier_reward/group_std_mean": 0.1515179991722107, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014426191151142121, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014426191151142121, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.047419081628322604, "signal/confidence_uniqueness_reward/group_std_mean": 0.07018115222454072, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0059273852035403255, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0059273852035403255, "signal/format_reward/centered_abs_mean": 0.01873372420668602, "signal/format_reward/group_std_mean": 0.03724060095846653, "signal/format_reward/group_zero_std_frac": 0.8416666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00936686210334301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00936686210334301, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013382966397330164, "signal/frontier_aurc_reward/group_std_mean": 0.0020241386722773314, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0910884995828382e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0910884995828382e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.10258018672466278, "signal/frontier_coverage_0/group_std_mean": 0.14305810928344725, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001602815417572856, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001602815417572856, "signal/frontier_coverage_1/centered_abs_mean": 0.10258018672466278, "signal/frontier_coverage_1/group_std_mean": 0.14305810928344725, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001602815417572856, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001602815417572856, "signal/frontier_coverage_10/centered_abs_mean": 0.10258018672466278, "signal/frontier_coverage_10/group_std_mean": 0.14305810928344725, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001602815417572856, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001602815417572856, "signal/frontier_coverage_15/centered_abs_mean": 0.10258018672466278, "signal/frontier_coverage_15/group_std_mean": 0.14305810928344725, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001602815417572856, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001602815417572856, "signal/frontier_coverage_20/centered_abs_mean": 0.10258018672466278, "signal/frontier_coverage_20/group_std_mean": 0.14305810928344725, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001602815417572856, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001602815417572856, "signal/frontier_coverage_25/centered_abs_mean": 0.10258018672466278, "signal/frontier_coverage_25/group_std_mean": 0.14305810928344725, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001602815417572856, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001602815417572856, "signal/frontier_coverage_5/centered_abs_mean": 0.10258018672466278, "signal/frontier_coverage_5/group_std_mean": 0.14305810928344725, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001602815417572856, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001602815417572856, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.016778473183512686, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.023937665671110154, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.002097309147939086, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.002097309147939086, "step": 70 }, { "calibration/aurc": 0.26809441942504275, "calibration/batch_distribution_entropy": 0.6838393931393899, "calibration/batch_entropy_100bins": 0.7729783643288141, "calibration/batch_entropy_10bins": 0.6838393931393899, "calibration/batch_entropy_50bins": 0.7797124119634354, "calibration/batch_uniqueness": 0.9257592753961467, "calibration/buffer_distribution_entropy": 0.8014343512506427, "calibration/buffer_entropy_100bins": 0.6965847069678281, "calibration/buffer_entropy_10bins": 0.8014343512506427, "calibration/buffer_entropy_50bins": 0.7671058988973287, "calibration/confidence_entropy": 0.5651903684955479, "calibration/coverage@0%": 0.007869979733363341, "calibration/coverage@1%": 0.007869979733363341, "calibration/coverage@10%": 0.007869979733363341, "calibration/coverage@15%": 0.22780781645114204, "calibration/coverage@20%": 0.257160283163038, "calibration/coverage@25%": 0.46021289135147203, "calibration/coverage@30%": 0.6366343893697362, "calibration/coverage@5%": 0.007869979733363341, "calibration/distribution_entropy_10": 0.6838393931393899, "calibration/distribution_entropy_100": 0.7729783643288141, "calibration/ece": 0.11109141408517027, "calibration/mean_confidence": 0.7044527597089597, "calibration/unique_confidence_per_question": 0.16354166666666664, "calibration/unique_confidences": 62.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00703125, "completions/max_length": 3003.8, "completions/max_terminated_length": 3003.8, "completions/mean_length": 662.574658203125, "completions/mean_terminated_length": 667.2730590820313, "completions/min_length": 0.0, "completions/min_terminated_length": 191.8, "epoch": 0.17999775002812465, "grad_norm": 0.000408834166591987, "learning_rate": 4.006024096385543e-06, "loss": -0.0048, "num_tokens": 154577953.0, "reward": 1.052880334854126, "reward_std": 0.13121603578329086, "rewards/accuracy_reward": 0.6934027791023254, "rewards/brier_reward": 0.7911527037620545, "rewards/confidence_uniqueness_reward": 0.9237256646156311, "rewards/format_reward": 0.9928819537162781, "rewards/frontier_aurc_reward": -0.001983778248541057, "rewards/frontier_coverage_0": -0.02244817279279232, "rewards/frontier_coverage_1": -0.02244817279279232, "rewards/frontier_coverage_10": -0.02244817279279232, "rewards/frontier_coverage_15": -0.02244817279279232, "rewards/frontier_coverage_20": -0.02244817279279232, "rewards/frontier_coverage_25": -0.02244817279279232, "rewards/frontier_coverage_5": -0.02244817279279232, "rewards/true_frontier_ece_gap_only_reward": -0.01708451323211193, "signal/accuracy_reward/centered_abs_mean": 0.17001952826976777, "signal/accuracy_reward/group_std_mean": 0.22074966430664061, "signal/accuracy_reward/group_zero_std_frac": 0.38333333730697633, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08500976413488388, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08500976413488388, "signal/advantage_abs_mean": 0.09748768210411071, "signal/advantage_pre_scale_abs_mean": 0.09748768210411071, "signal/advantage_pre_scale_std": 0.16811644434928893, "signal/advantage_std": 0.16811644434928893, "signal/brier_reward/centered_abs_mean": 0.110137939453125, "signal/brier_reward/group_std_mean": 0.14241448044776917, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013767242431640625, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013767242431640625, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0404249906539917, "signal/confidence_uniqueness_reward/group_std_mean": 0.05972475409507751, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005053123831748963, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005053123831748963, "signal/format_reward/centered_abs_mean": 0.012988281436264515, "signal/format_reward/group_std_mean": 0.02832689881324768, "signal/format_reward/group_zero_std_frac": 0.8694444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0064941407181322575, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0064941407181322575, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014353625476360321, "signal/frontier_aurc_reward/group_std_mean": 0.002100939303636551, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2427539806813002e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2427539806813002e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.07951787561178207, "signal/frontier_coverage_0/group_std_mean": 0.1129288211464882, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012424668064340949, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012424668064340949, "signal/frontier_coverage_1/centered_abs_mean": 0.07951787561178207, "signal/frontier_coverage_1/group_std_mean": 0.1129288211464882, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012424668064340949, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012424668064340949, "signal/frontier_coverage_10/centered_abs_mean": 0.07951787561178207, "signal/frontier_coverage_10/group_std_mean": 0.1129288211464882, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012424668064340949, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012424668064340949, "signal/frontier_coverage_15/centered_abs_mean": 0.07951787561178207, "signal/frontier_coverage_15/group_std_mean": 0.1129288211464882, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012424668064340949, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012424668064340949, "signal/frontier_coverage_20/centered_abs_mean": 0.07951787561178207, "signal/frontier_coverage_20/group_std_mean": 0.1129288211464882, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012424668064340949, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012424668064340949, "signal/frontier_coverage_25/centered_abs_mean": 0.07951787561178207, "signal/frontier_coverage_25/group_std_mean": 0.1129288211464882, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012424668064340949, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012424668064340949, "signal/frontier_coverage_5/centered_abs_mean": 0.07951787561178207, "signal/frontier_coverage_5/group_std_mean": 0.1129288211464882, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012424668064340949, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012424668064340949, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.016805017180740834, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.023270204663276672, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0021006271475926042, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0021006271475926042, "step": 75 }, { "calibration/aurc": 0.21834242019243638, "calibration/batch_distribution_entropy": 0.6654728425240195, "calibration/batch_entropy_100bins": 0.7709733639402805, "calibration/batch_entropy_10bins": 0.6654728425240195, "calibration/batch_entropy_50bins": 0.7749415689063472, "calibration/batch_uniqueness": 0.9253690248872815, "calibration/buffer_distribution_entropy": 0.8056100350450295, "calibration/buffer_entropy_100bins": 0.7215675673965605, "calibration/buffer_entropy_10bins": 0.8056100350450295, "calibration/buffer_entropy_50bins": 0.7838614791193015, "calibration/confidence_entropy": 0.545574600148968, "calibration/coverage@0%": 0.02220438511574777, "calibration/coverage@1%": 0.02220438511574777, "calibration/coverage@10%": 0.23966102127612107, "calibration/coverage@15%": 0.34606994679725095, "calibration/coverage@20%": 0.5504157493778901, "calibration/coverage@25%": 0.6507980412356587, "calibration/coverage@30%": 0.7648198585964071, "calibration/coverage@5%": 0.07932105432846319, "calibration/distribution_entropy_10": 0.6654728425240195, "calibration/distribution_entropy_100": 0.7709733639402805, "calibration/ece": 0.12195687052074375, "calibration/mean_confidence": 0.7164405583247829, "calibration/unique_confidence_per_question": 0.1703125, "calibration/unique_confidences": 65.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01137152777777779, "completions/max_length": 3956.8, "completions/max_terminated_length": 3956.8, "completions/mean_length": 682.140283203125, "completions/mean_terminated_length": 690.0593139648438, "completions/min_length": 0.0, "completions/min_terminated_length": 193.0, "epoch": 0.19199760002999963, "grad_norm": 0.0004445287340786308, "learning_rate": 3.855421686746989e-06, "loss": -0.0085, "num_tokens": 165489489.0, "reward": 1.0320314049720765, "reward_std": 0.1364010527729988, "rewards/accuracy_reward": 0.6573784589767456, "rewards/brier_reward": 0.7777002453804016, "rewards/confidence_uniqueness_reward": 0.919914448261261, "rewards/format_reward": 0.9884548544883728, "rewards/frontier_aurc_reward": -0.002187199471518397, "rewards/frontier_coverage_0": -0.009425394237041473, "rewards/frontier_coverage_1": -0.009425394237041473, "rewards/frontier_coverage_10": -0.009425394237041473, "rewards/frontier_coverage_15": -0.009425394237041473, "rewards/frontier_coverage_20": -0.009425394237041473, "rewards/frontier_coverage_25": -0.009425394237041473, "rewards/frontier_coverage_5": -0.009425394237041473, "rewards/true_frontier_ece_gap_only_reward": -0.016176528483629226, "signal/accuracy_reward/centered_abs_mean": 0.16724717915058135, "signal/accuracy_reward/group_std_mean": 0.22416210770606995, "signal/accuracy_reward/group_zero_std_frac": 0.35, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08362358957529067, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08362358957529067, "signal/advantage_abs_mean": 0.09906900972127915, "signal/advantage_pre_scale_abs_mean": 0.09906900972127915, "signal/advantage_pre_scale_std": 0.16992701590061188, "signal/advantage_std": 0.16992701590061188, "signal/brier_reward/centered_abs_mean": 0.11396953910589218, "signal/brier_reward/group_std_mean": 0.14948717951774598, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014246192388236522, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014246192388236522, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.041234496235847476, "signal/confidence_uniqueness_reward/group_std_mean": 0.06095789596438408, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0051543120294809345, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0051543120294809345, "signal/format_reward/centered_abs_mean": 0.01700846329331398, "signal/format_reward/group_std_mean": 0.03214373588562012, "signal/format_reward/group_zero_std_frac": 0.8666666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00850423164665699, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00850423164665699, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015931333182379603, "signal/frontier_aurc_reward/group_std_mean": 0.0023470679763704537, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.489270809746813e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.489270809746813e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.07731934040784835, "signal/frontier_coverage_0/group_std_mean": 0.10898190438747406, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012081146938726305, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012081146938726305, "signal/frontier_coverage_1/centered_abs_mean": 0.07731934040784835, "signal/frontier_coverage_1/group_std_mean": 0.10898190438747406, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012081146938726305, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012081146938726305, "signal/frontier_coverage_10/centered_abs_mean": 0.07731934040784835, "signal/frontier_coverage_10/group_std_mean": 0.10898190438747406, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012081146938726305, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012081146938726305, "signal/frontier_coverage_15/centered_abs_mean": 0.07731934040784835, "signal/frontier_coverage_15/group_std_mean": 0.10898190438747406, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012081146938726305, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012081146938726305, "signal/frontier_coverage_20/centered_abs_mean": 0.07731934040784835, "signal/frontier_coverage_20/group_std_mean": 0.10898190438747406, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012081146938726305, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012081146938726305, "signal/frontier_coverage_25/centered_abs_mean": 0.07731934040784835, "signal/frontier_coverage_25/group_std_mean": 0.10898190438747406, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012081146938726305, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012081146938726305, "signal/frontier_coverage_5/centered_abs_mean": 0.07731934040784835, "signal/frontier_coverage_5/group_std_mean": 0.10898190438747406, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012081146938726305, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012081146938726305, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.015519179962575435, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.021731919422745705, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0019398974953219294, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0019398974953219294, "step": 80 }, { "calibration/aurc": 0.223200967338523, "calibration/batch_distribution_entropy": 0.7470308632593083, "calibration/batch_entropy_100bins": 0.8097957063944881, "calibration/batch_entropy_10bins": 0.7470308632593083, "calibration/batch_entropy_50bins": 0.8235600550504187, "calibration/batch_uniqueness": 0.9389559027110078, "calibration/buffer_distribution_entropy": 0.810030947507659, "calibration/buffer_entropy_100bins": 0.7424741081330337, "calibration/buffer_entropy_10bins": 0.810030947507659, "calibration/buffer_entropy_50bins": 0.7981797753927561, "calibration/confidence_entropy": 0.5852915959437024, "calibration/coverage@0%": 0.009550042580097244, "calibration/coverage@1%": 0.009550042580097244, "calibration/coverage@10%": 0.13673516904381933, "calibration/coverage@15%": 0.25685554970229674, "calibration/coverage@20%": 0.41418009626638747, "calibration/coverage@25%": 0.6514727170160993, "calibration/coverage@30%": 0.8271656731884673, "calibration/coverage@5%": 0.04056608536084591, "calibration/distribution_entropy_10": 0.7470308632593083, "calibration/distribution_entropy_100": 0.8097957063944881, "calibration/ece": 0.09254755119264485, "calibration/mean_confidence": 0.6642736722712816, "calibration/unique_confidence_per_question": 0.1880208333333333, "calibration/unique_confidences": 72.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008159722222222231, "completions/max_length": 3769.2, "completions/max_terminated_length": 3769.2, "completions/mean_length": 685.8019897460938, "completions/mean_terminated_length": 691.5063598632812, "completions/min_length": 0.0, "completions/min_terminated_length": 200.0, "epoch": 0.2039974500318746, "grad_norm": 0.00041797629091888666, "learning_rate": 3.7048192771084342e-06, "loss": -0.0068, "num_tokens": 176477128.0, "reward": 1.0475164890289306, "reward_std": 0.1313488855957985, "rewards/accuracy_reward": 0.6796006917953491, "rewards/brier_reward": 0.7933130860328674, "rewards/confidence_uniqueness_reward": 0.9283158540725708, "rewards/format_reward": 0.9916666746139526, "rewards/frontier_aurc_reward": -0.0017628843430429696, "rewards/frontier_coverage_0": -0.01574636101722717, "rewards/frontier_coverage_1": -0.01574636101722717, "rewards/frontier_coverage_10": -0.01574636101722717, "rewards/frontier_coverage_15": -0.01574636101722717, "rewards/frontier_coverage_20": -0.01574636101722717, "rewards/frontier_coverage_25": -0.01574636101722717, "rewards/frontier_coverage_5": -0.01574636101722717, "rewards/true_frontier_ece_gap_only_reward": -0.01256832219660282, "signal/accuracy_reward/centered_abs_mean": 0.1685926616191864, "signal/accuracy_reward/group_std_mean": 0.22146643698215485, "signal/accuracy_reward/group_zero_std_frac": 0.36666667461395264, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0842963308095932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0842963308095932, "signal/advantage_abs_mean": 0.096477310359478, "signal/advantage_pre_scale_abs_mean": 0.096477310359478, "signal/advantage_pre_scale_std": 0.16670409142971038, "signal/advantage_std": 0.16670409142971038, "signal/brier_reward/centered_abs_mean": 0.10519883632659913, "signal/brier_reward/group_std_mean": 0.13862771689891815, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01314985454082489, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01314985454082489, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.037385327741503716, "signal/confidence_uniqueness_reward/group_std_mean": 0.058187781274318694, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0046731659676879644, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0046731659676879644, "signal/format_reward/centered_abs_mean": 0.014822048507630825, "signal/format_reward/group_std_mean": 0.03110768012702465, "signal/format_reward/group_zero_std_frac": 0.8583333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007411024253815413, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007411024253815413, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013182483380660415, "signal/frontier_aurc_reward/group_std_mean": 0.0019752333406358956, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0597630282281898e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0597630282281898e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.08810206353664399, "signal/frontier_coverage_0/group_std_mean": 0.1211852788925171, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013765947427600623, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013765947427600623, "signal/frontier_coverage_1/centered_abs_mean": 0.08810206353664399, "signal/frontier_coverage_1/group_std_mean": 0.1211852788925171, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013765947427600623, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013765947427600623, "signal/frontier_coverage_10/centered_abs_mean": 0.08810206353664399, "signal/frontier_coverage_10/group_std_mean": 0.1211852788925171, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013765947427600623, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013765947427600623, "signal/frontier_coverage_15/centered_abs_mean": 0.08810206353664399, "signal/frontier_coverage_15/group_std_mean": 0.1211852788925171, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013765947427600623, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013765947427600623, "signal/frontier_coverage_20/centered_abs_mean": 0.08810206353664399, "signal/frontier_coverage_20/group_std_mean": 0.1211852788925171, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013765947427600623, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013765947427600623, "signal/frontier_coverage_25/centered_abs_mean": 0.08810206353664399, "signal/frontier_coverage_25/group_std_mean": 0.1211852788925171, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013765947427600623, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013765947427600623, "signal/frontier_coverage_5/centered_abs_mean": 0.08810206353664399, "signal/frontier_coverage_5/group_std_mean": 0.1211852788925171, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013765947427600623, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013765947427600623, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.01288688350468874, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.019262754917144777, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0016108604380860926, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0016108604380860926, "step": 85 }, { "calibration/aurc": 0.1750125348445183, "calibration/batch_distribution_entropy": 0.7160344664199889, "calibration/batch_entropy_100bins": 0.791538051421172, "calibration/batch_entropy_10bins": 0.7160344664199889, "calibration/batch_entropy_50bins": 0.803385649917867, "calibration/batch_uniqueness": 0.9360382132701994, "calibration/buffer_distribution_entropy": 0.8130252160812972, "calibration/buffer_entropy_100bins": 0.7592675172132644, "calibration/buffer_entropy_10bins": 0.8130252160812972, "calibration/buffer_entropy_50bins": 0.8095419172681041, "calibration/confidence_entropy": 0.5752702254035462, "calibration/coverage@0%": 0.021076727388243136, "calibration/coverage@1%": 0.021076727388243136, "calibration/coverage@10%": 0.28257062422924945, "calibration/coverage@15%": 0.46590979360010365, "calibration/coverage@20%": 0.6646229888828084, "calibration/coverage@25%": 0.8164021164021165, "calibration/coverage@30%": 0.8899470899470898, "calibration/coverage@5%": 0.05624733106278381, "calibration/distribution_entropy_10": 0.7160344664199889, "calibration/distribution_entropy_100": 0.791538051421172, "calibration/ece": 0.07857751360789, "calibration/mean_confidence": 0.6838434656307941, "calibration/unique_confidence_per_question": 0.16197916666666667, "calibration/unique_confidences": 62.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011892361111111116, "completions/max_length": 2865.4, "completions/max_terminated_length": 2865.4, "completions/mean_length": 644.41875, "completions/mean_terminated_length": 652.1775756835938, "completions/min_length": 0.0, "completions/min_terminated_length": 177.6, "epoch": 0.2159973000337496, "grad_norm": 0.0004885443486273289, "learning_rate": 3.5542168674698798e-06, "loss": -0.01, "num_tokens": 186969504.0, "reward": 1.0422258853912354, "reward_std": 0.1346314489841461, "rewards/accuracy_reward": 0.6743055701255798, "rewards/brier_reward": 0.7906572461128235, "rewards/confidence_uniqueness_reward": 0.9217118740081787, "rewards/format_reward": 0.9880208373069763, "rewards/frontier_aurc_reward": -0.001758960704319179, "rewards/frontier_coverage_0": -0.01478583961725235, "rewards/frontier_coverage_1": -0.01478583961725235, "rewards/frontier_coverage_10": -0.01478583961725235, "rewards/frontier_coverage_15": -0.01478583961725235, "rewards/frontier_coverage_20": -0.01478583961725235, "rewards/frontier_coverage_25": -0.01478583961725235, "rewards/frontier_coverage_5": -0.01478583961725235, "rewards/true_frontier_ece_gap_only_reward": -0.01070992909371853, "signal/accuracy_reward/centered_abs_mean": 0.16719835251569748, "signal/accuracy_reward/group_std_mean": 0.2174463987350464, "signal/accuracy_reward/group_zero_std_frac": 0.38888888955116274, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08359917625784874, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08359917625784874, "signal/advantage_abs_mean": 0.0994023248553276, "signal/advantage_pre_scale_abs_mean": 0.0994023248553276, "signal/advantage_pre_scale_std": 0.1722485601902008, "signal/advantage_std": 0.1722485601902008, "signal/brier_reward/centered_abs_mean": 0.10387470126152039, "signal/brier_reward/group_std_mean": 0.13654259741306304, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012984337657690049, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012984337657690049, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.043320811539888385, "signal/confidence_uniqueness_reward/group_std_mean": 0.06586214751005173, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005415101442486048, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005415101442486048, "signal/format_reward/centered_abs_mean": 0.02032335065305233, "signal/format_reward/group_std_mean": 0.03864026740193367, "signal/format_reward/group_zero_std_frac": 0.8388888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010161675326526166, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010161675326526166, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012993402313441038, "signal/frontier_aurc_reward/group_std_mean": 0.0018939806381240488, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.030219111475162e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.030219111475162e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.07952361851930619, "signal/frontier_coverage_0/group_std_mean": 0.10869008004665374, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012425565393641592, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012425565393641592, "signal/frontier_coverage_1/centered_abs_mean": 0.07952361851930619, "signal/frontier_coverage_1/group_std_mean": 0.10869008004665374, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012425565393641592, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012425565393641592, "signal/frontier_coverage_10/centered_abs_mean": 0.07952361851930619, "signal/frontier_coverage_10/group_std_mean": 0.10869008004665374, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012425565393641592, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012425565393641592, "signal/frontier_coverage_15/centered_abs_mean": 0.07952361851930619, "signal/frontier_coverage_15/group_std_mean": 0.10869008004665374, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012425565393641592, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012425565393641592, "signal/frontier_coverage_20/centered_abs_mean": 0.07952361851930619, "signal/frontier_coverage_20/group_std_mean": 0.10869008004665374, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012425565393641592, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012425565393641592, "signal/frontier_coverage_25/centered_abs_mean": 0.07952361851930619, "signal/frontier_coverage_25/group_std_mean": 0.10869008004665374, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012425565393641592, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012425565393641592, "signal/frontier_coverage_5/centered_abs_mean": 0.07952361851930619, "signal/frontier_coverage_5/group_std_mean": 0.10869008004665374, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012425565393641592, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012425565393641592, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.010778117179870605, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.01537728812545538, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0013472646474838256, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0013472646474838256, "step": 90 }, { "calibration/aurc": 0.20718600424081587, "calibration/batch_distribution_entropy": 0.7145419993415512, "calibration/batch_entropy_100bins": 0.7940045869247137, "calibration/batch_entropy_10bins": 0.7145419993415512, "calibration/batch_entropy_50bins": 0.8040417294748641, "calibration/batch_uniqueness": 0.9367676324025431, "calibration/buffer_distribution_entropy": 0.8145125153648216, "calibration/buffer_entropy_100bins": 0.7725883097693098, "calibration/buffer_entropy_10bins": 0.8145125153648216, "calibration/buffer_entropy_50bins": 0.817964486415557, "calibration/confidence_entropy": 0.5770644535474301, "calibration/coverage@0%": 0.018359180375690363, "calibration/coverage@1%": 0.018359180375690363, "calibration/coverage@10%": 0.3142551794049478, "calibration/coverage@15%": 0.47292650936860287, "calibration/coverage@20%": 0.5374353404763397, "calibration/coverage@25%": 0.6198096590711307, "calibration/coverage@30%": 0.70205074477485, "calibration/coverage@5%": 0.15136133782852185, "calibration/distribution_entropy_10": 0.7145419993415512, "calibration/distribution_entropy_100": 0.7940045869247137, "calibration/ece": 0.12296266760467423, "calibration/mean_confidence": 0.6829029134911659, "calibration/unique_confidence_per_question": 0.15885416666666669, "calibration/unique_confidences": 61.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00824652777777779, "completions/max_length": 3075.4, "completions/max_terminated_length": 3075.4, "completions/mean_length": 649.2624145507813, "completions/mean_terminated_length": 654.6318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.22799715003562457, "grad_norm": 0.00040514481952413917, "learning_rate": 3.4036144578313257e-06, "loss": -0.0068, "num_tokens": 197540687.0, "reward": 1.0414008617401123, "reward_std": 0.12309487164020538, "rewards/accuracy_reward": 0.6670138955116272, "rewards/brier_reward": 0.7899128198623657, "rewards/confidence_uniqueness_reward": 0.926601231098175, "rewards/format_reward": 0.9917534589767456, "rewards/frontier_aurc_reward": -0.0018277077469974756, "rewards/frontier_coverage_0": -0.012428297474980355, "rewards/frontier_coverage_1": -0.012428297474980355, "rewards/frontier_coverage_10": -0.012428297474980355, "rewards/frontier_coverage_15": -0.012428297474980355, "rewards/frontier_coverage_20": -0.012428297474980355, "rewards/frontier_coverage_25": -0.012428297474980355, "rewards/frontier_coverage_5": -0.012428297474980355, "rewards/true_frontier_ece_gap_only_reward": -0.009273872710764408, "signal/accuracy_reward/centered_abs_mean": 0.15275607407093048, "signal/accuracy_reward/group_std_mean": 0.2017911434173584, "signal/accuracy_reward/group_zero_std_frac": 0.4277777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07637803703546524, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07637803703546524, "signal/advantage_abs_mean": 0.08974921703338623, "signal/advantage_pre_scale_abs_mean": 0.08974921703338623, "signal/advantage_pre_scale_std": 0.16027459800243377, "signal/advantage_std": 0.16027459800243377, "signal/brier_reward/centered_abs_mean": 0.09923952370882035, "signal/brier_reward/group_std_mean": 0.13173434436321257, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012404940463602543, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012404940463602543, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03722200207412243, "signal/confidence_uniqueness_reward/group_std_mean": 0.05656850188970566, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0046527502592653034, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0046527502592653034, "signal/format_reward/centered_abs_mean": 0.014653862826526165, "signal/format_reward/group_std_mean": 0.0300372663885355, "signal/format_reward/group_zero_std_frac": 0.8694444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007326931413263083, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007326931413263083, "signal/frontier_aurc_reward/centered_abs_mean": 0.001311829499900341, "signal/frontier_aurc_reward/group_std_mean": 0.0018943335162475705, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0497335935942828e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0497335935942828e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.07780194133520127, "signal/frontier_coverage_0/group_std_mean": 0.10721372663974763, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012156553333625198, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012156553333625198, "signal/frontier_coverage_1/centered_abs_mean": 0.07780194133520127, "signal/frontier_coverage_1/group_std_mean": 0.10721372663974763, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012156553333625198, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012156553333625198, "signal/frontier_coverage_10/centered_abs_mean": 0.07780194133520127, "signal/frontier_coverage_10/group_std_mean": 0.10721372663974763, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012156553333625198, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012156553333625198, "signal/frontier_coverage_15/centered_abs_mean": 0.07780194133520127, "signal/frontier_coverage_15/group_std_mean": 0.10721372663974763, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012156553333625198, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012156553333625198, "signal/frontier_coverage_20/centered_abs_mean": 0.07780194133520127, "signal/frontier_coverage_20/group_std_mean": 0.10721372663974763, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012156553333625198, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012156553333625198, "signal/frontier_coverage_25/centered_abs_mean": 0.07780194133520127, "signal/frontier_coverage_25/group_std_mean": 0.10721372663974763, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012156553333625198, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012156553333625198, "signal/frontier_coverage_5/centered_abs_mean": 0.07780194133520127, "signal/frontier_coverage_5/group_std_mean": 0.10721372663974763, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012156553333625198, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012156553333625198, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.009020444191992282, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.012560129538178444, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0011275555239990353, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0011275555239990353, "step": 95 }, { "calibration/aurc": 0.18912161303723035, "calibration/batch_distribution_entropy": 0.7509673473140573, "calibration/batch_entropy_100bins": 0.8107610890298, "calibration/batch_entropy_10bins": 0.7509673473140573, "calibration/batch_entropy_50bins": 0.8259464172988149, "calibration/batch_uniqueness": 0.9409531377801335, "calibration/buffer_distribution_entropy": 0.8153438800401813, "calibration/buffer_entropy_100bins": 0.7838405798633927, "calibration/buffer_entropy_10bins": 0.8153438800401813, "calibration/buffer_entropy_50bins": 0.8251483429084174, "calibration/confidence_entropy": 0.5673696735876815, "calibration/coverage@0%": 0.005273188206117388, "calibration/coverage@1%": 0.005273188206117388, "calibration/coverage@10%": 0.14509694130614492, "calibration/coverage@15%": 0.41793198434036133, "calibration/coverage@20%": 0.6852799572454381, "calibration/coverage@25%": 0.8459034433939087, "calibration/coverage@30%": 0.9333465402575349, "calibration/coverage@5%": 0.005273188206117388, "calibration/distribution_entropy_10": 0.7509673473140573, "calibration/distribution_entropy_100": 0.8107610890298, "calibration/ece": 0.08220890696497, "calibration/mean_confidence": 0.6813237522023581, "calibration/unique_confidence_per_question": 0.17552083333333335, "calibration/unique_confidences": 67.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012934027777777768, "completions/max_length": 3705.4, "completions/max_terminated_length": 3705.4, "completions/mean_length": 671.973193359375, "completions/mean_terminated_length": 680.76796875, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.23999700003749952, "grad_norm": 0.0004094898758921772, "learning_rate": 3.2530120481927713e-06, "loss": -0.0104, "num_tokens": 208380890.0, "reward": 1.048251223564148, "reward_std": 0.13225021511316298, "rewards/accuracy_reward": 0.6796875, "rewards/brier_reward": 0.8040428400039673, "rewards/confidence_uniqueness_reward": 0.9271102786064148, "rewards/format_reward": 0.9870659828186035, "rewards/frontier_aurc_reward": -0.0017124064732342958, "rewards/frontier_coverage_0": -0.0011624779552221298, "rewards/frontier_coverage_1": -0.0011624779552221298, "rewards/frontier_coverage_10": -0.0011624779552221298, "rewards/frontier_coverage_15": -0.0011624779552221298, "rewards/frontier_coverage_20": -0.0011624779552221298, "rewards/frontier_coverage_25": -0.0011624779552221298, "rewards/frontier_coverage_5": -0.0011624779552221298, "rewards/true_frontier_ece_gap_only_reward": -0.010926000401377678, "signal/accuracy_reward/centered_abs_mean": 0.16735026240348816, "signal/accuracy_reward/group_std_mean": 0.21467447876930237, "signal/accuracy_reward/group_zero_std_frac": 0.4138888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08367513120174408, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08367513120174408, "signal/advantage_abs_mean": 0.09893292784690857, "signal/advantage_pre_scale_abs_mean": 0.09893292784690857, "signal/advantage_pre_scale_std": 0.17071661055088044, "signal/advantage_std": 0.17071661055088044, "signal/brier_reward/centered_abs_mean": 0.1086144745349884, "signal/brier_reward/group_std_mean": 0.141890849173069, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01357680931687355, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01357680931687355, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04100620374083519, "signal/confidence_uniqueness_reward/group_std_mean": 0.060381069034338, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005125775467604399, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005125775467604399, "signal/format_reward/centered_abs_mean": 0.020198567770421505, "signal/format_reward/group_std_mean": 0.03544421307742596, "signal/format_reward/group_zero_std_frac": 0.8638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010099283885210752, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010099283885210752, "signal/frontier_aurc_reward/centered_abs_mean": 0.001432186597958207, "signal/frontier_aurc_reward/group_std_mean": 0.002116669714450836, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2377915593096985e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2377915593096985e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.09051385223865509, "signal/frontier_coverage_0/group_std_mean": 0.12151473313570023, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014142789412289857, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014142789412289857, "signal/frontier_coverage_1/centered_abs_mean": 0.09051385223865509, "signal/frontier_coverage_1/group_std_mean": 0.12151473313570023, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014142789412289857, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014142789412289857, "signal/frontier_coverage_10/centered_abs_mean": 0.09051385223865509, "signal/frontier_coverage_10/group_std_mean": 0.12151473313570023, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014142789412289857, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014142789412289857, "signal/frontier_coverage_15/centered_abs_mean": 0.09051385223865509, "signal/frontier_coverage_15/group_std_mean": 0.12151473313570023, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014142789412289857, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014142789412289857, "signal/frontier_coverage_20/centered_abs_mean": 0.09051385223865509, "signal/frontier_coverage_20/group_std_mean": 0.12151473313570023, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014142789412289857, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014142789412289857, "signal/frontier_coverage_25/centered_abs_mean": 0.09051385223865509, "signal/frontier_coverage_25/group_std_mean": 0.12151473313570023, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014142789412289857, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014142789412289857, "signal/frontier_coverage_5/centered_abs_mean": 0.09051385223865509, "signal/frontier_coverage_5/group_std_mean": 0.12151473313570023, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014142789412289857, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014142789412289857, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.00986800417304039, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.014217101410031319, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0012335005216300488, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0012335005216300488, "step": 100 }, { "epoch": 0.23999700003749952, "eval_completions/clipped_ratio": 0.013020833333333334, "eval_completions/max_length": 2601.6666666666665, "eval_completions/max_terminated_length": 2601.6666666666665, "eval_completions/mean_length": 658.6519266764323, "eval_completions/mean_terminated_length": 667.3694864908854, "eval_completions/min_length": 49.166666666666664, "eval_completions/min_terminated_length": 235.33333333333334, "eval_loss": 0.0, "eval_num_tokens": 208380890.0, "eval_reward": 1.0273559093475342, "eval_reward_std": 0.2644694770375888, "eval_rewards/accuracy_reward": 0.65625, "eval_rewards/brier_reward": 0.7910055716832479, "eval_rewards/confidence_uniqueness_reward": 0.8701359728972117, "eval_rewards/format_reward": 0.9861111144224802, "eval_rewards/frontier_aurc_reward": -0.0019449660709748666, "eval_rewards/frontier_coverage_0": 9.137632635732491e-05, "eval_rewards/frontier_coverage_1": 9.137632635732491e-05, "eval_rewards/frontier_coverage_10": 9.137632635732491e-05, "eval_rewards/frontier_coverage_15": 9.137632635732491e-05, "eval_rewards/frontier_coverage_20": 9.137632635732491e-05, "eval_rewards/frontier_coverage_25": 9.137632635732491e-05, "eval_rewards/frontier_coverage_5": 9.137632635732491e-05, "eval_rewards/true_frontier_ece_gap_only_reward": -0.011575784999877214, "eval_runtime": 208.6217, "eval_samples_per_second": 4.793, "eval_signal/accuracy_reward/centered_abs_mean": 0.4365234325329463, "eval_signal/accuracy_reward/group_std_mean": 0.47344937423865, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21826171626647314, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21826171626647314, "eval_signal/advantage_abs_mean": 0.23294218629598618, "eval_signal/advantage_pre_scale_abs_mean": 0.23294218629598618, "eval_signal/advantage_pre_scale_std": 0.26374371101458866, "eval_signal/advantage_std": 0.26374371101458866, "eval_signal/brier_reward/centered_abs_mean": 0.17583947877089182, "eval_signal/brier_reward/group_std_mean": 0.2251211479306221, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021979934846361477, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.021979934846361477, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06556083882848422, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09988817572593689, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008195104853560528, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008195104853560528, "eval_signal/format_reward/centered_abs_mean": 0.026258680348594982, "eval_signal/format_reward/group_std_mean": 0.0630940409998099, "eval_signal/format_reward/group_zero_std_frac": 0.6944444676240286, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.013129340174297491, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.013129340174297491, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0024230304018904767, "eval_signal/frontier_aurc_reward/group_std_mean": 0.003932161644722025, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.78598500295387e-05, "eval_signal/frontier_aurc_reward/weight": 0.015625, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.78598500295387e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.12514647220571837, "eval_signal/frontier_coverage_0/group_std_mean": 0.1818079153696696, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019554136282143495, "eval_signal/frontier_coverage_0/weight": 0.015625, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019554136282143495, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.12514647220571837, "eval_signal/frontier_coverage_1/group_std_mean": 0.1818079153696696, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019554136282143495, "eval_signal/frontier_coverage_1/weight": 0.015625, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019554136282143495, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.12514647220571837, "eval_signal/frontier_coverage_10/group_std_mean": 0.1818079153696696, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019554136282143495, "eval_signal/frontier_coverage_10/weight": 0.015625, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019554136282143495, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.12514647220571837, "eval_signal/frontier_coverage_15/group_std_mean": 0.1818079153696696, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019554136282143495, "eval_signal/frontier_coverage_15/weight": 0.015625, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019554136282143495, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.12514647220571837, "eval_signal/frontier_coverage_20/group_std_mean": 0.1818079153696696, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019554136282143495, "eval_signal/frontier_coverage_20/weight": 0.015625, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019554136282143495, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.12514647220571837, "eval_signal/frontier_coverage_25/group_std_mean": 0.1818079153696696, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019554136282143495, "eval_signal/frontier_coverage_25/weight": 0.015625, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019554136282143495, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.12514647220571837, "eval_signal/frontier_coverage_5/group_std_mean": 0.1818079153696696, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019554136282143495, "eval_signal/frontier_coverage_5/weight": 0.015625, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019554136282143495, "eval_signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.01118487554291884, "eval_signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.017368461936712265, "eval_signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "eval_signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.001398109442864855, "eval_signal/true_frontier_ece_gap_only_reward/weight": 0.125, "eval_signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.001398109442864855, "eval_steps_per_second": 0.029, "step": 100 }, { "epoch": 0.23999700003749952, "step": 100, "train_probe_completions/clipped_ratio": 0.017187500000000022, "train_probe_completions/max_length": 2913.6666666666665, "train_probe_completions/max_terminated_length": 2913.6666666666665, "train_probe_completions/mean_length": 669.9316202799479, "train_probe_completions/mean_terminated_length": 681.6310424804688, "train_probe_completions/min_length": 0.0, "train_probe_completions/min_terminated_length": 213.83333333333334, "train_probe_loss": 0.0, "train_probe_num_tokens": 208380890.0, "train_probe_reward": 1.0431965788205464, "train_probe_reward_std": 0.2604084312915802, "train_probe_rewards/accuracy_reward": 0.6848958233992258, "train_probe_rewards/brier_reward": 0.8070287605126699, "train_probe_rewards/confidence_uniqueness_reward": 0.8711295028527578, "train_probe_rewards/format_reward": 0.9852430621782938, "train_probe_rewards/frontier_aurc_reward": -0.001599111206208666, "train_probe_rewards/frontier_coverage_0": -0.00111961656754526, "train_probe_rewards/frontier_coverage_1": -0.00111961656754526, "train_probe_rewards/frontier_coverage_10": -0.00111961656754526, "train_probe_rewards/frontier_coverage_15": -0.00111961656754526, "train_probe_rewards/frontier_coverage_20": -0.00111961656754526, "train_probe_rewards/frontier_coverage_25": -0.00111961656754526, "train_probe_rewards/frontier_coverage_5": -0.00111961656754526, "train_probe_rewards/true_frontier_ece_gap_only_reward": -0.011961817430953184, "train_probe_runtime": 210.4634, "train_probe_samples_per_second": 4.751, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4185655365387599, "train_probe_signal/accuracy_reward/group_std_mean": 0.4635271529356639, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20928276826937994, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.20928276826937994, "train_probe_signal/advantage_abs_mean": 0.22382708390553793, "train_probe_signal/advantage_pre_scale_abs_mean": 0.22382708390553793, "train_probe_signal/advantage_pre_scale_std": 0.2600322514772415, "train_probe_signal/advantage_std": 0.2600322514772415, "train_probe_signal/brier_reward/centered_abs_mean": 0.16734372824430466, "train_probe_signal/brier_reward/group_std_mean": 0.21692882478237152, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020917966030538082, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.020917966030538082, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06453707938392957, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.10520645851890247, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008067134922991196, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008067134922991196, "train_probe_signal/format_reward/centered_abs_mean": 0.02783203125, "train_probe_signal/format_reward/group_std_mean": 0.0701802521944046, "train_probe_signal/format_reward/group_zero_std_frac": 0.6388889104127884, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.013916015625, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.013916015625, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.002024289375791947, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0032714407813424864, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.162952149674917e-05, "train_probe_signal/frontier_aurc_reward/weight": 0.015625, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.162952149674917e-05, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.12542071690162024, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.18846788754065832, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019596987015878162, "train_probe_signal/frontier_coverage_0/weight": 0.015625, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019596987015878162, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.12542071690162024, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.18846788754065832, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019596987015878162, "train_probe_signal/frontier_coverage_1/weight": 0.015625, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019596987015878162, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.12542071690162024, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.18846788754065832, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019596987015878162, "train_probe_signal/frontier_coverage_10/weight": 0.015625, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019596987015878162, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.12542071690162024, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.18846788754065832, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019596987015878162, "train_probe_signal/frontier_coverage_15/weight": 0.015625, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019596987015878162, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.12542071690162024, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.18846788754065832, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019596987015878162, "train_probe_signal/frontier_coverage_20/weight": 0.015625, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019596987015878162, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.12542071690162024, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.18846788754065832, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019596987015878162, "train_probe_signal/frontier_coverage_25/weight": 0.015625, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019596987015878162, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.12542071690162024, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.18846788754065832, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019596987015878162, "train_probe_signal/frontier_coverage_5/weight": 0.015625, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019596987015878162, "train_probe_signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.011922950390726328, "train_probe_signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.018872848711907864, "train_probe_signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "train_probe_signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.001490368798840791, "train_probe_signal/true_frontier_ece_gap_only_reward/weight": 0.125, "train_probe_signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.001490368798840791, "train_probe_steps_per_second": 0.029 }, { "calibration/aurc": 0.3267044793348773, "calibration/batch_distribution_entropy": 0.7514441882328864, "calibration/batch_entropy_100bins": 0.8095879565010626, "calibration/batch_entropy_10bins": 0.7514441882328864, "calibration/batch_entropy_50bins": 0.8265329826402684, "calibration/batch_uniqueness": 0.9404129011408552, "calibration/buffer_distribution_entropy": 0.8180824212746325, "calibration/buffer_entropy_100bins": 0.7960694640163056, "calibration/buffer_entropy_10bins": 0.8180824212746325, "calibration/buffer_entropy_50bins": 0.8335788091827643, "calibration/confidence_entropy": 0.5574560509219397, "calibration/coverage@0%": 0.00994418656056587, "calibration/coverage@1%": 0.00994418656056587, "calibration/coverage@10%": 0.14097866931918657, "calibration/coverage@15%": 0.1568937886825818, "calibration/coverage@20%": 0.2546232650242107, "calibration/coverage@25%": 0.3336462535299578, "calibration/coverage@30%": 0.389026431209603, "calibration/coverage@5%": 0.10649591069849691, "calibration/distribution_entropy_10": 0.7514441882328864, "calibration/distribution_entropy_100": 0.8095879565010626, "calibration/ece": 0.14820278446655621, "calibration/mean_confidence": 0.6889263632869203, "calibration/unique_confidence_per_question": 0.175, "calibration/unique_confidences": 67.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012065972222222231, "completions/max_length": 3604.2, "completions/max_terminated_length": 3604.2, "completions/mean_length": 673.2842163085937, "completions/mean_terminated_length": 681.5394897460938, "completions/min_length": 0.0, "completions/min_terminated_length": 179.0, "epoch": 0.2519968500393745, "grad_norm": 0.0004055156314279884, "learning_rate": 3.1024096385542172e-06, "loss": -0.0102, "num_tokens": 219213988.0, "reward": 1.044841742515564, "reward_std": 0.12685696184635162, "rewards/accuracy_reward": 0.6730902671813965, "rewards/brier_reward": 0.7991919994354248, "rewards/confidence_uniqueness_reward": 0.9291059017181397, "rewards/format_reward": 0.9876736044883728, "rewards/frontier_aurc_reward": -0.001739606261253357, "rewards/frontier_coverage_0": -0.001969197951257229, "rewards/frontier_coverage_1": -0.001969197951257229, "rewards/frontier_coverage_10": -0.001969197951257229, "rewards/frontier_coverage_15": -0.001969197951257229, "rewards/frontier_coverage_20": -0.001969197951257229, "rewards/frontier_coverage_25": -0.001969197951257229, "rewards/frontier_coverage_5": -0.001969197951257229, "rewards/true_frontier_ece_gap_only_reward": -0.010678962059319019, "signal/accuracy_reward/centered_abs_mean": 0.15438368171453476, "signal/accuracy_reward/group_std_mean": 0.20449974834918977, "signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07719184085726738, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07719184085726738, "signal/advantage_abs_mean": 0.09131217449903488, "signal/advantage_pre_scale_abs_mean": 0.09131217449903488, "signal/advantage_pre_scale_std": 0.16192201673984527, "signal/advantage_std": 0.16192201673984527, "signal/brier_reward/centered_abs_mean": 0.1098570004105568, "signal/brier_reward/group_std_mean": 0.14484120011329651, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0137321250513196, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0137321250513196, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04084142223000527, "signal/confidence_uniqueness_reward/group_std_mean": 0.06246491596102714, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005105177778750658, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005105177778750658, "signal/format_reward/centered_abs_mean": 0.01923828125, "signal/format_reward/group_std_mean": 0.03651031218469143, "signal/format_reward/group_zero_std_frac": 0.850000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009619140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009619140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014394932892173528, "signal/frontier_aurc_reward/group_std_mean": 0.0021645855624228714, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2492082644021137e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2492082644021137e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.0979076936841011, "signal/frontier_coverage_0/group_std_mean": 0.13328861594200134, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015298077138140797, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0015298077138140797, "signal/frontier_coverage_1/centered_abs_mean": 0.0979076936841011, "signal/frontier_coverage_1/group_std_mean": 0.13328861594200134, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015298077138140797, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015298077138140797, "signal/frontier_coverage_10/centered_abs_mean": 0.0979076936841011, "signal/frontier_coverage_10/group_std_mean": 0.13328861594200134, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015298077138140797, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015298077138140797, "signal/frontier_coverage_15/centered_abs_mean": 0.0979076936841011, "signal/frontier_coverage_15/group_std_mean": 0.13328861594200134, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015298077138140797, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015298077138140797, "signal/frontier_coverage_20/centered_abs_mean": 0.0979076936841011, "signal/frontier_coverage_20/group_std_mean": 0.13328861594200134, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015298077138140797, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015298077138140797, "signal/frontier_coverage_25/centered_abs_mean": 0.0979076936841011, "signal/frontier_coverage_25/group_std_mean": 0.13328861594200134, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015298077138140797, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015298077138140797, "signal/frontier_coverage_5/centered_abs_mean": 0.0979076936841011, "signal/frontier_coverage_5/group_std_mean": 0.13328861594200134, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015298077138140797, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015298077138140797, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.009452897682785988, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.014724508672952653, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0011816122103482484, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0011816122103482484, "step": 105 }, { "calibration/aurc": 0.19653911867283705, "calibration/batch_distribution_entropy": 0.742189294691922, "calibration/batch_entropy_100bins": 0.8103696123897912, "calibration/batch_entropy_10bins": 0.742189294691922, "calibration/batch_entropy_50bins": 0.8205309171620865, "calibration/batch_uniqueness": 0.9383345760593518, "calibration/buffer_distribution_entropy": 0.8196634901938914, "calibration/buffer_entropy_100bins": 0.8046767967346928, "calibration/buffer_entropy_10bins": 0.8196634901938914, "calibration/buffer_entropy_50bins": 0.8391647012059913, "calibration/confidence_entropy": 0.5437583638987209, "calibration/coverage@0%": 0.017870423903897183, "calibration/coverage@1%": 0.017870423903897183, "calibration/coverage@10%": 0.14836196586429348, "calibration/coverage@15%": 0.2993953758400066, "calibration/coverage@20%": 0.5307628916308488, "calibration/coverage@25%": 0.7679343830030556, "calibration/coverage@30%": 0.9234316719507646, "calibration/coverage@5%": 0.04352487416567728, "calibration/distribution_entropy_10": 0.742189294691922, "calibration/distribution_entropy_100": 0.8103696123897912, "calibration/ece": 0.09015899744059616, "calibration/mean_confidence": 0.6941812575865814, "calibration/unique_confidence_per_question": 0.19114583333333332, "calibration/unique_confidences": 73.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009982638888888905, "completions/max_length": 3548.6, "completions/max_terminated_length": 3548.6, "completions/mean_length": 683.9628540039063, "completions/mean_terminated_length": 690.8879516601562, "completions/min_length": 0.0, "completions/min_terminated_length": 193.6, "epoch": 0.2639967000412495, "grad_norm": 0.0004070218128617853, "learning_rate": 2.9518072289156627e-06, "loss": -0.0086, "num_tokens": 230201688.0, "reward": 1.0639292001724243, "reward_std": 0.12081557959318161, "rewards/accuracy_reward": 0.7053819298744202, "rewards/brier_reward": 0.8154799818992615, "rewards/confidence_uniqueness_reward": 0.929580807685852, "rewards/format_reward": 0.989843738079071, "rewards/frontier_aurc_reward": -0.001527873962186277, "rewards/frontier_coverage_0": -0.005524499481543899, "rewards/frontier_coverage_1": -0.005524499481543899, "rewards/frontier_coverage_10": -0.005524499481543899, "rewards/frontier_coverage_15": -0.005524499481543899, "rewards/frontier_coverage_20": -0.005524499481543899, "rewards/frontier_coverage_25": -0.005524499481543899, "rewards/frontier_coverage_5": -0.005524499481543899, "rewards/true_frontier_ece_gap_only_reward": -0.009504916891455651, "signal/accuracy_reward/centered_abs_mean": 0.14922960102558136, "signal/accuracy_reward/group_std_mean": 0.20320949256420134, "signal/accuracy_reward/group_zero_std_frac": 0.4000000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07461480051279068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07461480051279068, "signal/advantage_abs_mean": 0.0867169290781021, "signal/advantage_pre_scale_abs_mean": 0.0867169290781021, "signal/advantage_pre_scale_std": 0.158852681517601, "signal/advantage_std": 0.158852681517601, "signal/brier_reward/centered_abs_mean": 0.10190331041812897, "signal/brier_reward/group_std_mean": 0.13416367769241333, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012737913802266122, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012737913802266122, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03820802196860314, "signal/confidence_uniqueness_reward/group_std_mean": 0.05503500029444695, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004776002746075392, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004776002746075392, "signal/format_reward/centered_abs_mean": 0.01648763045668602, "signal/format_reward/group_std_mean": 0.029129663482308388, "signal/format_reward/group_zero_std_frac": 0.8833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00824381522834301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00824381522834301, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013106558239087463, "signal/frontier_aurc_reward/group_std_mean": 0.001980750821530819, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.047899724857416e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.047899724857416e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.09209920465946198, "signal/frontier_coverage_0/group_std_mean": 0.12662244141101836, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014390500728040934, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014390500728040934, "signal/frontier_coverage_1/centered_abs_mean": 0.09209920465946198, "signal/frontier_coverage_1/group_std_mean": 0.12662244141101836, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014390500728040934, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014390500728040934, "signal/frontier_coverage_10/centered_abs_mean": 0.09209920465946198, "signal/frontier_coverage_10/group_std_mean": 0.12662244141101836, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014390500728040934, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014390500728040934, "signal/frontier_coverage_15/centered_abs_mean": 0.09209920465946198, "signal/frontier_coverage_15/group_std_mean": 0.12662244141101836, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014390500728040934, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014390500728040934, "signal/frontier_coverage_20/centered_abs_mean": 0.09209920465946198, "signal/frontier_coverage_20/group_std_mean": 0.12662244141101836, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014390500728040934, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014390500728040934, "signal/frontier_coverage_25/centered_abs_mean": 0.09209920465946198, "signal/frontier_coverage_25/group_std_mean": 0.12662244141101836, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014390500728040934, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014390500728040934, "signal/frontier_coverage_5/centered_abs_mean": 0.09209920465946198, "signal/frontier_coverage_5/group_std_mean": 0.12662244141101836, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014390500728040934, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014390500728040934, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.008595239371061325, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.01369424220174551, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0010744049213826656, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0010744049213826656, "step": 110 }, { "calibration/aurc": 0.30524022173226106, "calibration/batch_distribution_entropy": 0.7818711531082017, "calibration/batch_entropy_100bins": 0.8248857687848549, "calibration/batch_entropy_10bins": 0.7818711531082017, "calibration/batch_entropy_50bins": 0.8433798093291898, "calibration/batch_uniqueness": 0.9433997785726878, "calibration/buffer_distribution_entropy": 0.8217132367095644, "calibration/buffer_entropy_100bins": 0.8122731904636383, "calibration/buffer_entropy_10bins": 0.8217132367095644, "calibration/buffer_entropy_50bins": 0.8442563490493079, "calibration/confidence_entropy": 0.5560049283881041, "calibration/coverage@0%": 0.006447631273640926, "calibration/coverage@1%": 0.006447631273640926, "calibration/coverage@10%": 0.006447631273640926, "calibration/coverage@15%": 0.14715146010246977, "calibration/coverage@20%": 0.4041453376318914, "calibration/coverage@25%": 0.5688995513801839, "calibration/coverage@30%": 0.6564715896999361, "calibration/coverage@5%": 0.006447631273640926, "calibration/distribution_entropy_10": 0.7818711531082017, "calibration/distribution_entropy_100": 0.8248857687848549, "calibration/ece": 0.15221433813065566, "calibration/mean_confidence": 0.6707838972409772, "calibration/unique_confidence_per_question": 0.18958333333333335, "calibration/unique_confidences": 72.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017708333333333326, "completions/max_length": 3419.2, "completions/max_terminated_length": 3419.2, "completions/mean_length": 679.98916015625, "completions/mean_terminated_length": 692.3475830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 207.8, "epoch": 0.27599655004312446, "grad_norm": 0.00038507970748469234, "learning_rate": 2.8012048192771087e-06, "loss": -0.0156, "num_tokens": 241114363.0, "reward": 1.0363726139068603, "reward_std": 0.13486612737178802, "rewards/accuracy_reward": 0.6633680582046508, "rewards/brier_reward": 0.793210256099701, "rewards/confidence_uniqueness_reward": 0.9223326444625854, "rewards/format_reward": 0.9822916626930237, "rewards/frontier_aurc_reward": -0.0019153600791469217, "rewards/frontier_coverage_0": 0.003437680657953024, "rewards/frontier_coverage_1": 0.003437680657953024, "rewards/frontier_coverage_10": 0.003437680657953024, "rewards/frontier_coverage_15": 0.003437680657953024, "rewards/frontier_coverage_20": 0.003437680657953024, "rewards/frontier_coverage_25": 0.003437680657953024, "rewards/frontier_coverage_5": 0.003437680657953024, "rewards/true_frontier_ece_gap_only_reward": -0.009969686530530453, "signal/accuracy_reward/centered_abs_mean": 0.16082899123430253, "signal/accuracy_reward/group_std_mean": 0.2069980025291443, "signal/accuracy_reward/group_zero_std_frac": 0.4277777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08041449561715126, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08041449561715126, "signal/advantage_abs_mean": 0.09912077933549882, "signal/advantage_pre_scale_abs_mean": 0.09912077933549882, "signal/advantage_pre_scale_std": 0.1765537291765213, "signal/advantage_std": 0.1765537291765213, "signal/brier_reward/centered_abs_mean": 0.11570018827915192, "signal/brier_reward/group_std_mean": 0.151495760679245, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01446252353489399, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01446252353489399, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04762213602662087, "signal/confidence_uniqueness_reward/group_std_mean": 0.07307658642530442, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0059527670033276085, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0059527670033276085, "signal/format_reward/centered_abs_mean": 0.02702907994389534, "signal/format_reward/group_std_mean": 0.04844924733042717, "signal/format_reward/group_zero_std_frac": 0.8111111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01351453997194767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01351453997194767, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016357111278921365, "signal/frontier_aurc_reward/group_std_mean": 0.002513893973082304, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5557986373314633e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5557986373314633e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.09530313909053803, "signal/frontier_coverage_0/group_std_mean": 0.12956467568874358, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014891115482896567, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014891115482896567, "signal/frontier_coverage_1/centered_abs_mean": 0.09530313909053803, "signal/frontier_coverage_1/group_std_mean": 0.12956467568874358, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014891115482896567, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014891115482896567, "signal/frontier_coverage_10/centered_abs_mean": 0.09530313909053803, "signal/frontier_coverage_10/group_std_mean": 0.12956467568874358, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014891115482896567, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014891115482896567, "signal/frontier_coverage_15/centered_abs_mean": 0.09530313909053803, "signal/frontier_coverage_15/group_std_mean": 0.12956467568874358, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014891115482896567, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014891115482896567, "signal/frontier_coverage_20/centered_abs_mean": 0.09530313909053803, "signal/frontier_coverage_20/group_std_mean": 0.12956467568874358, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014891115482896567, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014891115482896567, "signal/frontier_coverage_25/centered_abs_mean": 0.09530313909053803, "signal/frontier_coverage_25/group_std_mean": 0.12956467568874358, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014891115482896567, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014891115482896567, "signal/frontier_coverage_5/centered_abs_mean": 0.09530313909053803, "signal/frontier_coverage_5/group_std_mean": 0.12956467568874358, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014891115482896567, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014891115482896567, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.00945689920336008, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.01578503046184778, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00118211240042001, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00118211240042001, "step": 115 }, { "calibration/aurc": 0.2829972784176247, "calibration/batch_distribution_entropy": 0.7978632783074413, "calibration/batch_entropy_100bins": 0.8326413576750176, "calibration/batch_entropy_10bins": 0.7978632783074413, "calibration/batch_entropy_50bins": 0.8484370752003694, "calibration/batch_uniqueness": 0.9428754810521134, "calibration/buffer_distribution_entropy": 0.8256495412935981, "calibration/buffer_entropy_100bins": 0.8198335167796709, "calibration/buffer_entropy_10bins": 0.8256495412935981, "calibration/buffer_entropy_50bins": 0.8499243478716128, "calibration/confidence_entropy": 0.5454490383451711, "calibration/coverage@0%": 0.024511889179755674, "calibration/coverage@1%": 0.024511889179755674, "calibration/coverage@10%": 0.18489583333333334, "calibration/coverage@15%": 0.3580170157068063, "calibration/coverage@20%": 0.47328206806282724, "calibration/coverage@25%": 0.5411131108202444, "calibration/coverage@30%": 0.5953125, "calibration/coverage@5%": 0.04899105584642234, "calibration/distribution_entropy_10": 0.7978632783074413, "calibration/distribution_entropy_100": 0.8326413576750176, "calibration/ece": 0.14527504044433004, "calibration/mean_confidence": 0.6648941085090072, "calibration/unique_confidence_per_question": 0.2046875, "calibration/unique_confidences": 78.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011024305555555558, "completions/max_length": 3318.6, "completions/max_terminated_length": 3318.6, "completions/mean_length": 686.3271728515625, "completions/mean_terminated_length": 693.9168334960938, "completions/min_length": 0.0, "completions/min_terminated_length": 229.2, "epoch": 0.28799640004499943, "grad_norm": 0.0003715228522196412, "learning_rate": 2.6506024096385547e-06, "loss": -0.009, "num_tokens": 252102708.0, "reward": 1.048642134666443, "reward_std": 0.12622617483139037, "rewards/accuracy_reward": 0.6764756917953492, "rewards/brier_reward": 0.8032851457595825, "rewards/confidence_uniqueness_reward": 0.9316941499710083, "rewards/format_reward": 0.9888888835906983, "rewards/frontier_aurc_reward": -0.001726908260025084, "rewards/frontier_coverage_0": 0.0026118648587726057, "rewards/frontier_coverage_1": 0.0026118648587726057, "rewards/frontier_coverage_10": 0.0026118648587726057, "rewards/frontier_coverage_15": 0.0026118648587726057, "rewards/frontier_coverage_20": 0.0026118648587726057, "rewards/frontier_coverage_25": 0.0026118648587726057, "rewards/frontier_coverage_5": 0.0026118648587726057, "rewards/true_frontier_ece_gap_only_reward": -0.009370057098567485, "signal/accuracy_reward/centered_abs_mean": 0.16347113847732545, "signal/accuracy_reward/group_std_mean": 0.21460457444190978, "signal/accuracy_reward/group_zero_std_frac": 0.38888890147209165, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08173556923866272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08173556923866272, "signal/advantage_abs_mean": 0.09389262199401856, "signal/advantage_pre_scale_abs_mean": 0.09389262199401856, "signal/advantage_pre_scale_std": 0.16233381628990173, "signal/advantage_std": 0.16233381628990173, "signal/brier_reward/centered_abs_mean": 0.10937037020921707, "signal/brier_reward/group_std_mean": 0.14254879355430602, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013671296276152134, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013671296276152134, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03843596838414669, "signal/confidence_uniqueness_reward/group_std_mean": 0.057198996841907504, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004804496048018336, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004804496048018336, "signal/format_reward/centered_abs_mean": 0.017404513992369176, "signal/format_reward/group_std_mean": 0.032134901732206345, "signal/format_reward/group_zero_std_frac": 0.8666666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008702256996184588, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008702256996184588, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014132563257589937, "signal/frontier_aurc_reward/group_std_mean": 0.0021206842735409736, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2082130089984276e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2082130089984276e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.10615848153829574, "signal/frontier_coverage_0/group_std_mean": 0.14436171054840088, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001658726274035871, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001658726274035871, "signal/frontier_coverage_1/centered_abs_mean": 0.10615848153829574, "signal/frontier_coverage_1/group_std_mean": 0.14436171054840088, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001658726274035871, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001658726274035871, "signal/frontier_coverage_10/centered_abs_mean": 0.10615848153829574, "signal/frontier_coverage_10/group_std_mean": 0.14436171054840088, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001658726274035871, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001658726274035871, "signal/frontier_coverage_15/centered_abs_mean": 0.10615848153829574, "signal/frontier_coverage_15/group_std_mean": 0.14436171054840088, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001658726274035871, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001658726274035871, "signal/frontier_coverage_20/centered_abs_mean": 0.10615848153829574, "signal/frontier_coverage_20/group_std_mean": 0.14436171054840088, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001658726274035871, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001658726274035871, "signal/frontier_coverage_25/centered_abs_mean": 0.10615848153829574, "signal/frontier_coverage_25/group_std_mean": 0.14436171054840088, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001658726274035871, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001658726274035871, "signal/frontier_coverage_5/centered_abs_mean": 0.10615848153829574, "signal/frontier_coverage_5/group_std_mean": 0.14436171054840088, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001658726274035871, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001658726274035871, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.008557920716702938, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.014681273698806762, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0010697400895878673, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0010697400895878673, "step": 120 }, { "calibration/aurc": 0.19671941465770884, "calibration/batch_distribution_entropy": 0.714069251213561, "calibration/batch_entropy_100bins": 0.79093280229107, "calibration/batch_entropy_10bins": 0.714069251213561, "calibration/batch_entropy_50bins": 0.7976220273158751, "calibration/batch_uniqueness": 0.9272707791642439, "calibration/buffer_distribution_entropy": 0.8283623145695502, "calibration/buffer_entropy_100bins": 0.8261738795237367, "calibration/buffer_entropy_10bins": 0.8283623145695502, "calibration/buffer_entropy_50bins": 0.8545030151086038, "calibration/confidence_entropy": 0.5104855330872758, "calibration/coverage@0%": 0.014166491343532641, "calibration/coverage@1%": 0.014166491343532641, "calibration/coverage@10%": 0.31885399134353265, "calibration/coverage@15%": 0.3815390808752131, "calibration/coverage@20%": 0.4841432475418797, "calibration/coverage@25%": 0.8064810181594071, "calibration/coverage@30%": 0.9008741303877912, "calibration/coverage@5%": 0.014166491343532641, "calibration/distribution_entropy_10": 0.714069251213561, "calibration/distribution_entropy_100": 0.79093280229107, "calibration/ece": 0.09618043935237061, "calibration/mean_confidence": 0.7132224626554917, "calibration/unique_confidence_per_question": 0.1828125, "calibration/unique_confidences": 70.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012239583333333304, "completions/max_length": 3284.4, "completions/max_terminated_length": 3284.4, "completions/mean_length": 691.3783081054687, "completions/mean_terminated_length": 700.0879638671875, "completions/min_length": 0.0, "completions/min_terminated_length": 221.8, "epoch": 0.2999962500468744, "grad_norm": 0.00044733521644957364, "learning_rate": 2.5e-06, "loss": -0.0113, "num_tokens": 263185050.0, "reward": 1.055611777305603, "reward_std": 0.1275094196200371, "rewards/accuracy_reward": 0.6895833253860474, "rewards/brier_reward": 0.8160496830940247, "rewards/confidence_uniqueness_reward": 0.92153559923172, "rewards/format_reward": 0.9876736164093017, "rewards/frontier_aurc_reward": -0.0017230862518772482, "rewards/frontier_coverage_0": 0.01225762339308858, "rewards/frontier_coverage_1": 0.01225762339308858, "rewards/frontier_coverage_10": 0.01225762339308858, "rewards/frontier_coverage_15": 0.01225762339308858, "rewards/frontier_coverage_20": 0.01225762339308858, "rewards/frontier_coverage_25": 0.01225762339308858, "rewards/frontier_coverage_5": 0.01225762339308858, "rewards/true_frontier_ece_gap_only_reward": -0.012229060940444469, "signal/accuracy_reward/centered_abs_mean": 0.16138237714767456, "signal/accuracy_reward/group_std_mean": 0.21225160956382752, "signal/accuracy_reward/group_zero_std_frac": 0.397222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08069118857383728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08069118857383728, "signal/advantage_abs_mean": 0.0945181205868721, "signal/advantage_pre_scale_abs_mean": 0.0945181205868721, "signal/advantage_pre_scale_std": 0.1663988560438156, "signal/advantage_std": 0.1663988560438156, "signal/brier_reward/centered_abs_mean": 0.11373110711574555, "signal/brier_reward/group_std_mean": 0.1483635872602463, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014216388389468194, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014216388389468194, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04427947551012039, "signal/confidence_uniqueness_reward/group_std_mean": 0.062184395641088484, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005534934438765049, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005534934438765049, "signal/format_reward/centered_abs_mean": 0.018576388712972403, "signal/format_reward/group_std_mean": 0.031818334758281705, "signal/format_reward/group_zero_std_frac": 0.8777777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009288194356486201, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009288194356486201, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016881852876394986, "signal/frontier_aurc_reward/group_std_mean": 0.0025606358423829077, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6377895119367166e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6377895119367166e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.0979716956615448, "signal/frontier_coverage_0/group_std_mean": 0.1332421526312828, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015308077447116375, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0015308077447116375, "signal/frontier_coverage_1/centered_abs_mean": 0.0979716956615448, "signal/frontier_coverage_1/group_std_mean": 0.1332421526312828, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015308077447116375, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015308077447116375, "signal/frontier_coverage_10/centered_abs_mean": 0.0979716956615448, "signal/frontier_coverage_10/group_std_mean": 0.1332421526312828, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015308077447116375, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015308077447116375, "signal/frontier_coverage_15/centered_abs_mean": 0.0979716956615448, "signal/frontier_coverage_15/group_std_mean": 0.1332421526312828, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015308077447116375, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015308077447116375, "signal/frontier_coverage_20/centered_abs_mean": 0.0979716956615448, "signal/frontier_coverage_20/group_std_mean": 0.1332421526312828, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015308077447116375, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015308077447116375, "signal/frontier_coverage_25/centered_abs_mean": 0.0979716956615448, "signal/frontier_coverage_25/group_std_mean": 0.1332421526312828, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015308077447116375, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015308077447116375, "signal/frontier_coverage_5/centered_abs_mean": 0.0979716956615448, "signal/frontier_coverage_5/group_std_mean": 0.1332421526312828, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015308077447116375, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015308077447116375, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.01217461358755827, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.02020731884986162, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0015218266984447838, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0015218266984447838, "step": 125 }, { "calibration/aurc": 0.26231922913161954, "calibration/batch_distribution_entropy": 0.7849330143793635, "calibration/batch_entropy_100bins": 0.8313458268638213, "calibration/batch_entropy_10bins": 0.7849330143793635, "calibration/batch_entropy_50bins": 0.8469249993347873, "calibration/batch_uniqueness": 0.9375108076362693, "calibration/buffer_distribution_entropy": 0.8294780083702189, "calibration/buffer_entropy_100bins": 0.8315937057293505, "calibration/buffer_entropy_10bins": 0.8294780083702189, "calibration/buffer_entropy_50bins": 0.8582865661934059, "calibration/confidence_entropy": 0.5124842329428232, "calibration/coverage@0%": 0.012623205773998373, "calibration/coverage@1%": 0.012623205773998373, "calibration/coverage@10%": 0.1614250792927287, "calibration/coverage@15%": 0.2572956123196314, "calibration/coverage@20%": 0.3132771916457143, "calibration/coverage@25%": 0.43678116869105654, "calibration/coverage@30%": 0.8075378762952686, "calibration/coverage@5%": 0.022596959054838266, "calibration/distribution_entropy_10": 0.7849330143793635, "calibration/distribution_entropy_100": 0.8313458268638213, "calibration/ece": 0.12743665254843256, "calibration/mean_confidence": 0.6790373502611695, "calibration/unique_confidence_per_question": 0.21770833333333334, "calibration/unique_confidences": 83.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017795138888888885, "completions/max_length": 3700.6, "completions/max_terminated_length": 3700.6, "completions/mean_length": 712.4034790039062, "completions/mean_terminated_length": 725.374267578125, "completions/min_length": 0.0, "completions/min_terminated_length": 191.2, "epoch": 0.3119961000487494, "grad_norm": 0.00043796157115139067, "learning_rate": 2.349397590361446e-06, "loss": -0.0143, "num_tokens": 274516738.0, "reward": 1.0368208050727845, "reward_std": 0.13528352975845337, "rewards/accuracy_reward": 0.6607638716697692, "rewards/brier_reward": 0.8013606786727905, "rewards/confidence_uniqueness_reward": 0.9176180601119995, "rewards/format_reward": 0.9821180462837219, "rewards/frontier_aurc_reward": -0.001984483632259071, "rewards/frontier_coverage_0": 0.01965160174295306, "rewards/frontier_coverage_1": 0.01965160174295306, "rewards/frontier_coverage_10": 0.01965160174295306, "rewards/frontier_coverage_15": 0.01965160174295306, "rewards/frontier_coverage_20": 0.01965160174295306, "rewards/frontier_coverage_25": 0.01965160174295306, "rewards/frontier_coverage_5": 0.01965160174295306, "rewards/true_frontier_ece_gap_only_reward": -0.012887386418879033, "signal/accuracy_reward/centered_abs_mean": 0.1666666716337204, "signal/accuracy_reward/group_std_mean": 0.21933417916297912, "signal/accuracy_reward/group_zero_std_frac": 0.3805555641651154, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0833333358168602, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0833333358168602, "signal/advantage_abs_mean": 0.09987292736768723, "signal/advantage_pre_scale_abs_mean": 0.09987292736768723, "signal/advantage_pre_scale_std": 0.17289304733276367, "signal/advantage_std": 0.17289304733276367, "signal/brier_reward/centered_abs_mean": 0.11932021975517274, "signal/brier_reward/group_std_mean": 0.15680161118507385, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014915027469396592, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014915027469396592, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.049394051730632785, "signal/confidence_uniqueness_reward/group_std_mean": 0.07107506543397904, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006174256466329098, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006174256466329098, "signal/format_reward/centered_abs_mean": 0.02468533031642437, "signal/format_reward/group_std_mean": 0.04145882315933704, "signal/format_reward/group_zero_std_frac": 0.8416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012342665158212186, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012342665158212186, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018592241685837507, "signal/frontier_aurc_reward/group_std_mean": 0.002899319725111127, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9050377634121105e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9050377634121105e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.10627900362014771, "signal/frontier_coverage_0/group_std_mean": 0.14542074501514435, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001660609431564808, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001660609431564808, "signal/frontier_coverage_1/centered_abs_mean": 0.10627900362014771, "signal/frontier_coverage_1/group_std_mean": 0.14542074501514435, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001660609431564808, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001660609431564808, "signal/frontier_coverage_10/centered_abs_mean": 0.10627900362014771, "signal/frontier_coverage_10/group_std_mean": 0.14542074501514435, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001660609431564808, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001660609431564808, "signal/frontier_coverage_15/centered_abs_mean": 0.10627900362014771, "signal/frontier_coverage_15/group_std_mean": 0.14542074501514435, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001660609431564808, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001660609431564808, "signal/frontier_coverage_20/centered_abs_mean": 0.10627900362014771, "signal/frontier_coverage_20/group_std_mean": 0.14542074501514435, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001660609431564808, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001660609431564808, "signal/frontier_coverage_25/centered_abs_mean": 0.10627900362014771, "signal/frontier_coverage_25/group_std_mean": 0.14542074501514435, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001660609431564808, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001660609431564808, "signal/frontier_coverage_5/centered_abs_mean": 0.10627900362014771, "signal/frontier_coverage_5/group_std_mean": 0.14542074501514435, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001660609431564808, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001660609431564808, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.013442159257829189, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.02378322519361973, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0016802699072286486, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0016802699072286486, "step": 130 }, { "calibration/aurc": 0.2100724966240294, "calibration/batch_distribution_entropy": 0.7932919126000467, "calibration/batch_entropy_100bins": 0.8307378530163175, "calibration/batch_entropy_10bins": 0.7932919126000467, "calibration/batch_entropy_50bins": 0.847089723931882, "calibration/batch_uniqueness": 0.9421351893583964, "calibration/buffer_distribution_entropy": 0.8324469338084997, "calibration/buffer_entropy_100bins": 0.8370055455965371, "calibration/buffer_entropy_10bins": 0.8324469338084997, "calibration/buffer_entropy_50bins": 0.862450153507045, "calibration/confidence_entropy": 0.5360752773461647, "calibration/coverage@0%": 0.019922239322602862, "calibration/coverage@1%": 0.019922239322602862, "calibration/coverage@10%": 0.3430041437433335, "calibration/coverage@15%": 0.35355823345309606, "calibration/coverage@20%": 0.4321721670970639, "calibration/coverage@25%": 0.6357222721233556, "calibration/coverage@30%": 0.7409831478414641, "calibration/coverage@5%": 0.30943497381008883, "calibration/distribution_entropy_10": 0.7932919126000467, "calibration/distribution_entropy_100": 0.8307378530163175, "calibration/ece": 0.13974870453897614, "calibration/mean_confidence": 0.6561501450334012, "calibration/unique_confidence_per_question": 0.19531250000000003, "calibration/unique_confidences": 75.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0109375, "completions/max_length": 3228.4, "completions/max_terminated_length": 3228.4, "completions/mean_length": 692.0186767578125, "completions/mean_terminated_length": 699.7394653320313, "completions/min_length": 0.0, "completions/min_terminated_length": 181.8, "epoch": 0.32399595005062437, "grad_norm": 0.0004282255540601909, "learning_rate": 2.1987951807228917e-06, "loss": -0.0093, "num_tokens": 285581817.0, "reward": 1.054863166809082, "reward_std": 0.12371634542942048, "rewards/accuracy_reward": 0.6837673544883728, "rewards/brier_reward": 0.8118860840797424, "rewards/confidence_uniqueness_reward": 0.9371923685073853, "rewards/format_reward": 0.9890625, "rewards/frontier_aurc_reward": -0.0013375790789723397, "rewards/frontier_coverage_0": 0.006732956040650606, "rewards/frontier_coverage_1": 0.006732956040650606, "rewards/frontier_coverage_10": 0.006732956040650606, "rewards/frontier_coverage_15": 0.006732956040650606, "rewards/frontier_coverage_20": 0.006732956040650606, "rewards/frontier_coverage_25": 0.006732956040650606, "rewards/frontier_coverage_5": 0.006732956040650606, "rewards/true_frontier_ece_gap_only_reward": -0.007217477634549141, "signal/accuracy_reward/centered_abs_mean": 0.15847981721162796, "signal/accuracy_reward/group_std_mean": 0.21153208017349243, "signal/accuracy_reward/group_zero_std_frac": 0.38888888955116274, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07923990860581398, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07923990860581398, "signal/advantage_abs_mean": 0.08863486796617508, "signal/advantage_pre_scale_abs_mean": 0.08863486796617508, "signal/advantage_pre_scale_std": 0.15808248221874238, "signal/advantage_std": 0.15808248221874238, "signal/brier_reward/centered_abs_mean": 0.11114487051963806, "signal/brier_reward/group_std_mean": 0.14610919654369353, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013893108814954758, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013893108814954758, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03625557161867619, "signal/confidence_uniqueness_reward/group_std_mean": 0.05811716765165329, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004531946452334523, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004531946452334523, "signal/format_reward/centered_abs_mean": 0.01872829869389534, "signal/format_reward/group_std_mean": 0.036873598024249075, "signal/format_reward/group_zero_std_frac": 0.8416666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00936414934694767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00936414934694767, "signal/frontier_aurc_reward/centered_abs_mean": 0.0011374737368896604, "signal/frontier_aurc_reward/group_std_mean": 0.0018357637338340283, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7773027138900943e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7773027138900943e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.12717922925949096, "signal/frontier_coverage_0/group_std_mean": 0.17108558714389802, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019871754571795463, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019871754571795463, "signal/frontier_coverage_1/centered_abs_mean": 0.12717922925949096, "signal/frontier_coverage_1/group_std_mean": 0.17108558714389802, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019871754571795463, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019871754571795463, "signal/frontier_coverage_10/centered_abs_mean": 0.12717922925949096, "signal/frontier_coverage_10/group_std_mean": 0.17108558714389802, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019871754571795463, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019871754571795463, "signal/frontier_coverage_15/centered_abs_mean": 0.12717922925949096, "signal/frontier_coverage_15/group_std_mean": 0.17108558714389802, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019871754571795463, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019871754571795463, "signal/frontier_coverage_20/centered_abs_mean": 0.12717922925949096, "signal/frontier_coverage_20/group_std_mean": 0.17108558714389802, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019871754571795463, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019871754571795463, "signal/frontier_coverage_25/centered_abs_mean": 0.12717922925949096, "signal/frontier_coverage_25/group_std_mean": 0.17108558714389802, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019871754571795463, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019871754571795463, "signal/frontier_coverage_5/centered_abs_mean": 0.12717922925949096, "signal/frontier_coverage_5/group_std_mean": 0.17108558714389802, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019871754571795463, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019871754571795463, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.006792500615119934, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.012135511264204979, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0008490625768899918, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0008490625768899918, "step": 135 }, { "calibration/aurc": 0.12316983748625196, "calibration/batch_distribution_entropy": 0.8033321196483671, "calibration/batch_entropy_100bins": 0.840970506147876, "calibration/batch_entropy_10bins": 0.8033321196483671, "calibration/batch_entropy_50bins": 0.8595641216344057, "calibration/batch_uniqueness": 0.9441387779910293, "calibration/buffer_distribution_entropy": 0.8356524945448074, "calibration/buffer_entropy_100bins": 0.844583354188865, "calibration/buffer_entropy_10bins": 0.8356524945448074, "calibration/buffer_entropy_50bins": 0.8676924896189366, "calibration/confidence_entropy": 0.5414429455483087, "calibration/coverage@0%": 0.024367036493294764, "calibration/coverage@1%": 0.024367036493294764, "calibration/coverage@10%": 0.5015683248215262, "calibration/coverage@15%": 0.7200879000764464, "calibration/coverage@20%": 0.806558927378318, "calibration/coverage@25%": 0.887908015749872, "calibration/coverage@30%": 0.9627324607905436, "calibration/coverage@5%": 0.21789834997139393, "calibration/distribution_entropy_10": 0.8033321196483671, "calibration/distribution_entropy_100": 0.840970506147876, "calibration/ece": 0.12557287187845612, "calibration/mean_confidence": 0.6414325364057707, "calibration/unique_confidence_per_question": 0.20520833333333335, "calibration/unique_confidences": 78.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012847222222222232, "completions/max_length": 3215.0, "completions/max_terminated_length": 3215.0, "completions/mean_length": 675.0425415039062, "completions/mean_terminated_length": 683.766796875, "completions/min_length": 0.0, "completions/min_terminated_length": 194.2, "epoch": 0.33599580005249935, "grad_norm": 0.0003816323878709227, "learning_rate": 2.0481927710843377e-06, "loss": -0.0113, "num_tokens": 296462531.0, "reward": 1.0525088548660277, "reward_std": 0.12087329030036927, "rewards/accuracy_reward": 0.681163203716278, "rewards/brier_reward": 0.8102995872497558, "rewards/confidence_uniqueness_reward": 0.9345194458961487, "rewards/format_reward": 0.9871527791023255, "rewards/frontier_aurc_reward": -0.0013744331081397831, "rewards/frontier_coverage_0": 0.00963379731401801, "rewards/frontier_coverage_1": 0.00963379731401801, "rewards/frontier_coverage_10": 0.00963379731401801, "rewards/frontier_coverage_15": 0.00963379731401801, "rewards/frontier_coverage_20": 0.00963379731401801, "rewards/frontier_coverage_25": 0.00963379731401801, "rewards/frontier_coverage_5": 0.00963379731401801, "rewards/true_frontier_ece_gap_only_reward": -0.006269952561706304, "signal/accuracy_reward/centered_abs_mean": 0.14478624165058135, "signal/accuracy_reward/group_std_mean": 0.19590498208999635, "signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07239312082529067, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07239312082529067, "signal/advantage_abs_mean": 0.08584622740745544, "signal/advantage_pre_scale_abs_mean": 0.08584622740745544, "signal/advantage_pre_scale_std": 0.15582461655139923, "signal/advantage_std": 0.15582461655139923, "signal/brier_reward/centered_abs_mean": 0.11108436435461044, "signal/brier_reward/group_std_mean": 0.14652538895606995, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013885545544326305, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013885545544326305, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.039488519355654714, "signal/confidence_uniqueness_reward/group_std_mean": 0.06021577715873718, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004936064919456839, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004936064919456839, "signal/format_reward/centered_abs_mean": 0.020616319216787814, "signal/format_reward/group_std_mean": 0.037392809987068176, "signal/format_reward/group_zero_std_frac": 0.85, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010308159608393907, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010308159608393907, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012093130266293884, "signal/frontier_aurc_reward/group_std_mean": 0.0018805687082931398, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8895516041084194e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8895516041084194e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.12452564984560013, "signal/frontier_coverage_0/group_std_mean": 0.17022224068641661, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001945713278837502, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001945713278837502, "signal/frontier_coverage_1/centered_abs_mean": 0.12452564984560013, "signal/frontier_coverage_1/group_std_mean": 0.17022224068641661, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001945713278837502, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001945713278837502, "signal/frontier_coverage_10/centered_abs_mean": 0.12452564984560013, "signal/frontier_coverage_10/group_std_mean": 0.17022224068641661, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001945713278837502, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001945713278837502, "signal/frontier_coverage_15/centered_abs_mean": 0.12452564984560013, "signal/frontier_coverage_15/group_std_mean": 0.17022224068641661, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001945713278837502, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001945713278837502, "signal/frontier_coverage_20/centered_abs_mean": 0.12452564984560013, "signal/frontier_coverage_20/group_std_mean": 0.17022224068641661, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001945713278837502, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001945713278837502, "signal/frontier_coverage_25/centered_abs_mean": 0.12452564984560013, "signal/frontier_coverage_25/group_std_mean": 0.17022224068641661, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001945713278837502, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001945713278837502, "signal/frontier_coverage_5/centered_abs_mean": 0.12452564984560013, "signal/frontier_coverage_5/group_std_mean": 0.17022224068641661, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001945713278837502, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001945713278837502, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0054856881499290465, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.00944354822859168, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0006857110187411308, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0006857110187411308, "step": 140 }, { "calibration/aurc": 0.1707190282167193, "calibration/batch_distribution_entropy": 0.7870297352600305, "calibration/batch_entropy_100bins": 0.8287951367704729, "calibration/batch_entropy_10bins": 0.7870297352600305, "calibration/batch_entropy_50bins": 0.8444396351033697, "calibration/batch_uniqueness": 0.9381528864738238, "calibration/buffer_distribution_entropy": 0.836454052730403, "calibration/buffer_entropy_100bins": 0.8535101399161015, "calibration/buffer_entropy_10bins": 0.836454052730403, "calibration/buffer_entropy_50bins": 0.8728050714670547, "calibration/confidence_entropy": 0.5240414918619417, "calibration/coverage@0%": 0.026351947388887843, "calibration/coverage@1%": 0.026351947388887843, "calibration/coverage@10%": 0.41923410430351626, "calibration/coverage@15%": 0.5314265164218459, "calibration/coverage@20%": 0.620989570820157, "calibration/coverage@25%": 0.751987830680877, "calibration/coverage@30%": 0.8425736230097444, "calibration/coverage@5%": 0.19100326031606285, "calibration/distribution_entropy_10": 0.7870297352600305, "calibration/distribution_entropy_100": 0.8287951367704729, "calibration/ece": 0.1261507501639957, "calibration/mean_confidence": 0.6589088571731689, "calibration/unique_confidence_per_question": 0.20416666666666666, "calibration/unique_confidences": 78.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01015625, "completions/max_length": 3217.4, "completions/max_terminated_length": 3217.4, "completions/mean_length": 651.4529541015625, "completions/mean_terminated_length": 658.1746948242187, "completions/min_length": 0.0, "completions/min_terminated_length": 206.8, "epoch": 0.34799565005437433, "grad_norm": 0.0004082749364897609, "learning_rate": 1.8975903614457832e-06, "loss": -0.0092, "num_tokens": 307031877.0, "reward": 1.0682907581329346, "reward_std": 0.11585188210010529, "rewards/accuracy_reward": 0.7100694179534912, "rewards/brier_reward": 0.8259410500526428, "rewards/confidence_uniqueness_reward": 0.9193386673927307, "rewards/format_reward": 0.9896701335906982, "rewards/frontier_aurc_reward": -0.0016615271219052375, "rewards/frontier_coverage_0": 0.010000471444800495, "rewards/frontier_coverage_1": 0.010000471444800495, "rewards/frontier_coverage_10": 0.010000471444800495, "rewards/frontier_coverage_15": 0.010000471444800495, "rewards/frontier_coverage_20": 0.010000471444800495, "rewards/frontier_coverage_25": 0.01015151059255004, "rewards/frontier_coverage_5": 0.010000471444800495, "rewards/true_frontier_ece_gap_only_reward": -0.006473575532436371, "signal/accuracy_reward/centered_abs_mean": 0.1404513895511627, "signal/accuracy_reward/group_std_mean": 0.19066681563854218, "signal/accuracy_reward/group_zero_std_frac": 0.44166667461395265, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07022569477558135, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07022569477558135, "signal/advantage_abs_mean": 0.08215740621089936, "signal/advantage_pre_scale_abs_mean": 0.08215740621089936, "signal/advantage_pre_scale_std": 0.15350556373596191, "signal/advantage_std": 0.15350556373596191, "signal/brier_reward/centered_abs_mean": 0.10650975555181504, "signal/brier_reward/group_std_mean": 0.13953691720962524, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01331371944397688, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01331371944397688, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04581173062324524, "signal/confidence_uniqueness_reward/group_std_mean": 0.06800653263926507, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005726466327905655, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005726466327905655, "signal/format_reward/centered_abs_mean": 0.01798502616584301, "signal/format_reward/group_std_mean": 0.03467189371585846, "signal/format_reward/group_zero_std_frac": 0.8555555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008992513082921505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008992513082921505, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017546760383993388, "signal/frontier_aurc_reward/group_std_mean": 0.0026522258296608923, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7416813099989668e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7416813099989668e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.10596181005239487, "signal/frontier_coverage_0/group_std_mean": 0.1449078232049942, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016556532820686698, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016556532820686698, "signal/frontier_coverage_1/centered_abs_mean": 0.10596181005239487, "signal/frontier_coverage_1/group_std_mean": 0.1449078232049942, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016556532820686698, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016556532820686698, "signal/frontier_coverage_10/centered_abs_mean": 0.10596181005239487, "signal/frontier_coverage_10/group_std_mean": 0.1449078232049942, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016556532820686698, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016556532820686698, "signal/frontier_coverage_15/centered_abs_mean": 0.10596181005239487, "signal/frontier_coverage_15/group_std_mean": 0.1449078232049942, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016556532820686698, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016556532820686698, "signal/frontier_coverage_20/centered_abs_mean": 0.10596181005239487, "signal/frontier_coverage_20/group_std_mean": 0.1449078232049942, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016556532820686698, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016556532820686698, "signal/frontier_coverage_25/centered_abs_mean": 0.1005162313580513, "signal/frontier_coverage_25/group_std_mean": 0.13775036633014678, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015705661149695515, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015705661149695515, "signal/frontier_coverage_5/centered_abs_mean": 0.10596181005239487, "signal/frontier_coverage_5/group_std_mean": 0.1449078232049942, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016556532820686698, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016556532820686698, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.005957813002169132, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.011087938956916333, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0007447266252711415, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0007447266252711415, "step": 145 }, { "calibration/aurc": 0.18209448578532905, "calibration/batch_distribution_entropy": 0.7402098532235222, "calibration/batch_entropy_100bins": 0.8083018729096372, "calibration/batch_entropy_10bins": 0.7402098532235222, "calibration/batch_entropy_50bins": 0.8191539259469073, "calibration/batch_uniqueness": 0.9301449071354948, "calibration/buffer_distribution_entropy": 0.8311244629730232, "calibration/buffer_entropy_100bins": 0.8587688993364191, "calibration/buffer_entropy_10bins": 0.8311244629730232, "calibration/buffer_entropy_50bins": 0.8739160708252068, "calibration/confidence_entropy": 0.5100700509903451, "calibration/coverage@0%": 0.004201001640614507, "calibration/coverage@1%": 0.004201001640614507, "calibration/coverage@10%": 0.34029064401297937, "calibration/coverage@15%": 0.47469158826861174, "calibration/coverage@20%": 0.5657170493820617, "calibration/coverage@25%": 0.7275975167531156, "calibration/coverage@30%": 0.8418513621822916, "calibration/coverage@5%": 0.19152198006106938, "calibration/distribution_entropy_10": 0.7402098532235222, "calibration/distribution_entropy_100": 0.8083018729096372, "calibration/ece": 0.12855990872542672, "calibration/mean_confidence": 0.6749319072008179, "calibration/unique_confidence_per_question": 0.19739583333333333, "calibration/unique_confidences": 75.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009635416666666674, "completions/max_length": 3069.4, "completions/max_terminated_length": 3069.4, "completions/mean_length": 702.6961791992187, "completions/mean_terminated_length": 709.4884643554688, "completions/min_length": 0.0, "completions/min_terminated_length": 192.8, "epoch": 0.3599955000562493, "grad_norm": 0.0004130478191655129, "learning_rate": 1.7469879518072292e-06, "loss": -0.0071, "num_tokens": 318237273.0, "reward": 1.0614855289459229, "reward_std": 0.1201270878314972, "rewards/accuracy_reward": 0.6957465291023255, "rewards/brier_reward": 0.8208768486976623, "rewards/confidence_uniqueness_reward": 0.9183289051055908, "rewards/format_reward": 0.9903645873069763, "rewards/frontier_aurc_reward": -0.0020163535373285413, "rewards/frontier_coverage_0": 0.013487431593239308, "rewards/frontier_coverage_1": 0.013487431593239308, "rewards/frontier_coverage_10": 0.013487431593239308, "rewards/frontier_coverage_15": 0.013487431593239308, "rewards/frontier_coverage_20": 0.012901889439672232, "rewards/frontier_coverage_25": 0.020031385496258734, "rewards/frontier_coverage_5": 0.013487431593239308, "rewards/true_frontier_ece_gap_only_reward": -0.0040603259578347204, "signal/accuracy_reward/centered_abs_mean": 0.15117729753255843, "signal/accuracy_reward/group_std_mean": 0.20031063556671141, "signal/accuracy_reward/group_zero_std_frac": 0.4277777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07558864876627922, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07558864876627922, "signal/advantage_abs_mean": 0.08776713460683823, "signal/advantage_pre_scale_abs_mean": 0.08776713460683823, "signal/advantage_pre_scale_std": 0.1591554254293442, "signal/advantage_std": 0.1591554254293442, "signal/brier_reward/centered_abs_mean": 0.10871631652116776, "signal/brier_reward/group_std_mean": 0.14245359599590302, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01358953956514597, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01358953956514597, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.042987743765115737, "signal/confidence_uniqueness_reward/group_std_mean": 0.062032976746559144, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005373467970639467, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005373467970639467, "signal/format_reward/centered_abs_mean": 0.015554470103234052, "signal/format_reward/group_std_mean": 0.029216957837343217, "signal/format_reward/group_zero_std_frac": 0.8805555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007777235051617026, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007777235051617026, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022927817422896623, "signal/frontier_aurc_reward/group_std_mean": 0.003552949335426092, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.582471472327597e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.582471472327597e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.09796600192785263, "signal/frontier_coverage_0/group_std_mean": 0.13663864582777024, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015307187801226973, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0015307187801226973, "signal/frontier_coverage_1/centered_abs_mean": 0.09796600192785263, "signal/frontier_coverage_1/group_std_mean": 0.13663864582777024, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015307187801226973, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015307187801226973, "signal/frontier_coverage_10/centered_abs_mean": 0.09796600192785263, "signal/frontier_coverage_10/group_std_mean": 0.13663864582777024, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015307187801226973, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015307187801226973, "signal/frontier_coverage_15/centered_abs_mean": 0.09796600192785263, "signal/frontier_coverage_15/group_std_mean": 0.13663864582777024, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015307187801226973, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015307187801226973, "signal/frontier_coverage_20/centered_abs_mean": 0.09587855786085128, "signal/frontier_coverage_20/group_std_mean": 0.13392478972673416, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014981024665758013, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014981024665758013, "signal/frontier_coverage_25/centered_abs_mean": 0.036509061604738234, "signal/frontier_coverage_25/group_std_mean": 0.0526451326906681, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005704540875740349, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005704540875740349, "signal/frontier_coverage_5/centered_abs_mean": 0.09796600192785263, "signal/frontier_coverage_5/group_std_mean": 0.13663864582777024, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015307187801226973, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015307187801226973, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0036625199019908903, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.006539558339864015, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.002777777798473835, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0004578149877488613, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0004578149877488613, "step": 150 }, { "epoch": 0.3599955000562493, "eval_completions/clipped_ratio": 0.011284722222222229, "eval_completions/max_length": 2426.8333333333335, "eval_completions/max_terminated_length": 2426.8333333333335, "eval_completions/mean_length": 678.6144205729166, "eval_completions/mean_terminated_length": 686.3630065917969, "eval_completions/min_length": 51.5, "eval_completions/min_terminated_length": 242.16666666666666, "eval_loss": 0.0, "eval_num_tokens": 318237273.0, "eval_reward": 1.037120411793391, "eval_reward_std": 0.26037078599135083, "eval_rewards/accuracy_reward": 0.6710069278875986, "eval_rewards/brier_reward": 0.7987116674582163, "eval_rewards/confidence_uniqueness_reward": 0.864409069220225, "eval_rewards/format_reward": 0.9861111144224802, "eval_rewards/frontier_aurc_reward": -0.0020773761401263378, "eval_rewards/frontier_coverage_0": 0.008917404959599176, "eval_rewards/frontier_coverage_1": 0.008917404959599176, "eval_rewards/frontier_coverage_10": 0.008917404959599176, "eval_rewards/frontier_coverage_15": 0.008917404959599176, "eval_rewards/frontier_coverage_20": 0.00835825433023274, "eval_rewards/frontier_coverage_25": 0.019696833721051615, "eval_rewards/frontier_coverage_5": 0.008917404959599176, "eval_rewards/true_frontier_ece_gap_only_reward": -0.003450475827169915, "eval_runtime": 210.1124, "eval_samples_per_second": 4.759, "eval_signal/accuracy_reward/centered_abs_mean": 0.4268120676279068, "eval_signal/accuracy_reward/group_std_mean": 0.46841634809970856, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2134060338139534, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2134060338139534, "eval_signal/advantage_abs_mean": 0.22498015811045965, "eval_signal/advantage_pre_scale_abs_mean": 0.22498015811045965, "eval_signal/advantage_pre_scale_std": 0.2599627524614334, "eval_signal/advantage_std": 0.2599627524614334, "eval_signal/brier_reward/centered_abs_mean": 0.19082651287317276, "eval_signal/brier_reward/group_std_mean": 0.23933010548353195, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023853314109146595, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.023853314109146595, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07184251459936301, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.10839165622989337, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008980314324920377, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008980314324920377, "eval_signal/format_reward/centered_abs_mean": 0.026475694806625445, "eval_signal/format_reward/group_std_mean": 0.06660978465030591, "eval_signal/format_reward/group_zero_std_frac": 0.6666666865348816, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.013237847403312722, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.013237847403312722, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0033070850962152085, "eval_signal/frontier_aurc_reward/group_std_mean": 0.006110090451935927, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.167320462836263e-05, "eval_signal/frontier_aurc_reward/weight": 0.015625, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.167320462836263e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.1586559092005094, "eval_signal/frontier_coverage_0/group_std_mean": 0.24918479472398758, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024789985812579594, "eval_signal/frontier_coverage_0/weight": 0.015625, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024789985812579594, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.1586559092005094, "eval_signal/frontier_coverage_1/group_std_mean": 0.24918479472398758, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024789985812579594, "eval_signal/frontier_coverage_1/weight": 0.015625, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024789985812579594, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.1586559092005094, "eval_signal/frontier_coverage_10/group_std_mean": 0.24918479472398758, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024789985812579594, "eval_signal/frontier_coverage_10/weight": 0.015625, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024789985812579594, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.1586559092005094, "eval_signal/frontier_coverage_15/group_std_mean": 0.24918479472398758, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024789985812579594, "eval_signal/frontier_coverage_15/weight": 0.015625, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024789985812579594, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.12169866388042767, "eval_signal/frontier_coverage_20/group_std_mean": 0.19936797271172205, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019015416231316824, "eval_signal/frontier_coverage_20/weight": 0.015625, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019015416231316824, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.04645684982339541, "eval_signal/frontier_coverage_25/group_std_mean": 0.07699030389388402, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007258882784905533, "eval_signal/frontier_coverage_25/weight": 0.015625, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007258882784905533, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.1586559092005094, "eval_signal/frontier_coverage_5/group_std_mean": 0.24918479472398758, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024789985812579594, "eval_signal/frontier_coverage_5/weight": 0.015625, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024789985812579594, "eval_signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0035569225437939167, "eval_signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.005802453495562077, "eval_signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "eval_signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0004446153179742396, "eval_signal/true_frontier_ece_gap_only_reward/weight": 0.125, "eval_signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0004446153179742396, "eval_steps_per_second": 0.029, "step": 150 }, { "epoch": 0.3599955000562493, "step": 150, "train_probe_completions/clipped_ratio": 0.009375000000000003, "train_probe_completions/max_length": 2373.3333333333335, "train_probe_completions/max_terminated_length": 2373.3333333333335, "train_probe_completions/mean_length": 680.1991678873698, "train_probe_completions/mean_terminated_length": 686.6373596191406, "train_probe_completions/min_length": 70.66666666666667, "train_probe_completions/min_terminated_length": 212.5, "train_probe_loss": 0.0, "train_probe_num_tokens": 318237273.0, "train_probe_reward": 1.055055598417918, "train_probe_reward_std": 0.24644359946250916, "train_probe_rewards/accuracy_reward": 0.6944444477558136, "train_probe_rewards/brier_reward": 0.8187563320000967, "train_probe_rewards/confidence_uniqueness_reward": 0.873372862736384, "train_probe_rewards/format_reward": 0.9904513855775198, "train_probe_rewards/frontier_aurc_reward": -0.001900765870232135, "train_probe_rewards/frontier_coverage_0": 0.012389092764351517, "train_probe_rewards/frontier_coverage_1": 0.012389092764351517, "train_probe_rewards/frontier_coverage_10": 0.012389092764351517, "train_probe_rewards/frontier_coverage_15": 0.012389092764351517, "train_probe_rewards/frontier_coverage_20": 0.014410387520911172, "train_probe_rewards/frontier_coverage_25": 0.024321939796209335, "train_probe_rewards/frontier_coverage_5": 0.012389092764351517, "train_probe_rewards/true_frontier_ece_gap_only_reward": -0.0036154407619809112, "train_probe_runtime": 188.9198, "train_probe_samples_per_second": 5.293, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4124348958333333, "train_probe_signal/accuracy_reward/group_std_mean": 0.45987477401892346, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20621744791666666, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.20621744791666666, "train_probe_signal/advantage_abs_mean": 0.21144999066988626, "train_probe_signal/advantage_pre_scale_abs_mean": 0.21144999066988626, "train_probe_signal/advantage_pre_scale_std": 0.24553329994281134, "train_probe_signal/advantage_std": 0.24553329994281134, "train_probe_signal/brier_reward/centered_abs_mean": 0.17376654346783957, "train_probe_signal/brier_reward/group_std_mean": 0.2235363299647967, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021720817933479946, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.021720817933479946, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.060595336059729256, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.09015070833265781, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007574417007466157, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007574417007466157, "train_probe_signal/format_reward/centered_abs_mean": 0.018283420087148745, "train_probe_signal/format_reward/group_std_mean": 0.04803628505518039, "train_probe_signal/format_reward/group_zero_std_frac": 0.7500000149011612, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.009141710043574372, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.009141710043574372, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.003131849652466675, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0059242877177894115, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8935150819791794e-05, "train_probe_signal/frontier_aurc_reward/weight": 0.015625, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8935150819791794e-05, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.16411924362182617, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.25143779317537945, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002564363181591034, "train_probe_signal/frontier_coverage_0/weight": 0.015625, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002564363181591034, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.16411924362182617, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.25143779317537945, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002564363181591034, "train_probe_signal/frontier_coverage_1/weight": 0.015625, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002564363181591034, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.16411924362182617, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.25143779317537945, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002564363181591034, "train_probe_signal/frontier_coverage_10/weight": 0.015625, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002564363181591034, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.16411924362182617, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.25143779317537945, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002564363181591034, "train_probe_signal/frontier_coverage_15/weight": 0.015625, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002564363181591034, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.11266574015220006, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.17991459121306738, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001760402189878126, "train_probe_signal/frontier_coverage_20/weight": 0.015625, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001760402189878126, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.04643759255607923, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.07232892637451489, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000725587383688738, "train_probe_signal/frontier_coverage_25/weight": 0.015625, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000725587383688738, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.16411924362182617, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.25143779317537945, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002564363181591034, "train_probe_signal/frontier_coverage_5/weight": 0.015625, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002564363181591034, "train_probe_signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0037723184019948044, "train_probe_signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.00622099117996792, "train_probe_signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "train_probe_signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00047153980024935055, "train_probe_signal/true_frontier_ece_gap_only_reward/weight": 0.125, "train_probe_signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00047153980024935055, "train_probe_steps_per_second": 0.032 }, { "calibration/aurc": 0.13391643198760078, "calibration/batch_distribution_entropy": 0.7948606211172603, "calibration/batch_entropy_100bins": 0.8368200343599981, "calibration/batch_entropy_10bins": 0.7948606211172603, "calibration/batch_entropy_50bins": 0.8510265931141483, "calibration/batch_uniqueness": 0.944000865460584, "calibration/buffer_distribution_entropy": 0.8213485685365185, "calibration/buffer_entropy_100bins": 0.8620536929835996, "calibration/buffer_entropy_10bins": 0.8213485685365185, "calibration/buffer_entropy_50bins": 0.8727871263863621, "calibration/confidence_entropy": 0.5393953875867862, "calibration/coverage@0%": 0.032740440088172104, "calibration/coverage@1%": 0.032740440088172104, "calibration/coverage@10%": 0.3944664512557533, "calibration/coverage@15%": 0.6908698207865707, "calibration/coverage@20%": 0.7854961519008106, "calibration/coverage@25%": 0.8895916777165667, "calibration/coverage@30%": 0.9335078534031414, "calibration/coverage@5%": 0.24789239883630465, "calibration/distribution_entropy_10": 0.7948606211172603, "calibration/distribution_entropy_100": 0.8368200343599981, "calibration/ece": 0.1253224171661008, "calibration/mean_confidence": 0.6584433504347348, "calibration/unique_confidence_per_question": 0.196875, "calibration/unique_confidences": 75.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008767361111111116, "completions/max_length": 3146.6, "completions/max_terminated_length": 3146.6, "completions/mean_length": 669.9478393554688, "completions/mean_terminated_length": 675.8888916015625, "completions/min_length": 0.0, "completions/min_terminated_length": 214.6, "epoch": 0.3719953500581243, "grad_norm": 0.0004369099042378366, "learning_rate": 1.5963855421686747e-06, "loss": -0.0074, "num_tokens": 329062784.0, "reward": 1.0875563144683837, "reward_std": 0.1229211449623108, "rewards/accuracy_reward": 0.7425347328186035, "rewards/brier_reward": 0.835495126247406, "rewards/confidence_uniqueness_reward": 0.9289066553115845, "rewards/format_reward": 0.9911458492279053, "rewards/frontier_aurc_reward": -0.0014140044804662466, "rewards/frontier_coverage_0": -0.001173873944208026, "rewards/frontier_coverage_1": -0.001173873944208026, "rewards/frontier_coverage_10": -0.001173873944208026, "rewards/frontier_coverage_15": -0.001173873944208026, "rewards/frontier_coverage_20": 0.01185264540836215, "rewards/frontier_coverage_25": 0.028303157165646554, "rewards/frontier_coverage_5": -0.001173873944208026, "rewards/true_frontier_ece_gap_only_reward": -0.0027825822588056328, "signal/accuracy_reward/centered_abs_mean": 0.16309678852558135, "signal/accuracy_reward/group_std_mean": 0.21074391305446624, "signal/accuracy_reward/group_zero_std_frac": 0.4111111104488373, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08154839426279067, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08154839426279067, "signal/advantage_abs_mean": 0.09126082807779312, "signal/advantage_pre_scale_abs_mean": 0.09126082807779312, "signal/advantage_pre_scale_std": 0.16173238456249237, "signal/advantage_std": 0.16173238456249237, "signal/brier_reward/centered_abs_mean": 0.10318089425563812, "signal/brier_reward/group_std_mean": 0.13717953413724898, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012897611781954765, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012897611781954765, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03800181671977043, "signal/confidence_uniqueness_reward/group_std_mean": 0.05584709048271179, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004750227089971304, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004750227089971304, "signal/format_reward/centered_abs_mean": 0.014941406436264515, "signal/format_reward/group_std_mean": 0.02782573737204075, "signal/format_reward/group_zero_std_frac": 0.8861111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0074707032181322575, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0074707032181322575, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018860452808439731, "signal/frontier_aurc_reward/group_std_mean": 0.0032281734980642795, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.946945751318708e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.946945751318708e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.10934300273656845, "signal/frontier_coverage_0/group_std_mean": 0.15232057571411134, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001708484417758882, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001708484417758882, "signal/frontier_coverage_1/centered_abs_mean": 0.10934300273656845, "signal/frontier_coverage_1/group_std_mean": 0.15232057571411134, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001708484417758882, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001708484417758882, "signal/frontier_coverage_10/centered_abs_mean": 0.10934300273656845, "signal/frontier_coverage_10/group_std_mean": 0.15232057571411134, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001708484417758882, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001708484417758882, "signal/frontier_coverage_15/centered_abs_mean": 0.10934300273656845, "signal/frontier_coverage_15/group_std_mean": 0.15232057571411134, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001708484417758882, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001708484417758882, "signal/frontier_coverage_20/centered_abs_mean": 0.055034608393907544, "signal/frontier_coverage_20/group_std_mean": 0.08059312552213668, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008599157561548054, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008599157561548054, "signal/frontier_coverage_25/centered_abs_mean": 0.033925560861825944, "signal/frontier_coverage_25/group_std_mean": 0.04722090288996696, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005300868884660304, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005300868884660304, "signal/frontier_coverage_5/centered_abs_mean": 0.10934300273656845, "signal/frontier_coverage_5/group_std_mean": 0.15232057571411134, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001708484417758882, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001708484417758882, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.002597217308357358, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.0038485261145979168, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00032465216354466976, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00032465216354466976, "step": 155 }, { "calibration/aurc": 0.15233241534879957, "calibration/batch_distribution_entropy": 0.7053979650865262, "calibration/batch_entropy_100bins": 0.7897403240858474, "calibration/batch_entropy_10bins": 0.7053979650865262, "calibration/batch_entropy_50bins": 0.7958750116338504, "calibration/batch_uniqueness": 0.9272095434772781, "calibration/buffer_distribution_entropy": 0.8133656085702008, "calibration/buffer_entropy_100bins": 0.8631911273929548, "calibration/buffer_entropy_10bins": 0.8133656085702008, "calibration/buffer_entropy_50bins": 0.8712631524302715, "calibration/confidence_entropy": 0.5216451024089949, "calibration/coverage@0%": 0.12002745244582987, "calibration/coverage@1%": 0.20824453289395234, "calibration/coverage@10%": 0.45039907142423363, "calibration/coverage@15%": 0.735983934467869, "calibration/coverage@20%": 0.773753280839895, "calibration/coverage@25%": 0.7937007874015748, "calibration/coverage@30%": 0.8, "calibration/coverage@5%": 0.27139264976610233, "calibration/distribution_entropy_10": 0.7053979650865262, "calibration/distribution_entropy_100": 0.7897403240858474, "calibration/ece": 0.13596159831377114, "calibration/mean_confidence": 0.7032028452021251, "calibration/unique_confidence_per_question": 0.17864583333333334, "calibration/unique_confidences": 68.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011979166666666674, "completions/max_length": 3056.6, "completions/max_terminated_length": 3056.6, "completions/mean_length": 679.8614624023437, "completions/mean_terminated_length": 688.1098266601563, "completions/min_length": 0.0, "completions/min_terminated_length": 200.0, "epoch": 0.38399520005999926, "grad_norm": 0.0003775613440666348, "learning_rate": 1.4457831325301204e-06, "loss": -0.0102, "num_tokens": 339982084.0, "reward": 1.0505271673202514, "reward_std": 0.1207915186882019, "rewards/accuracy_reward": 0.677256953716278, "rewards/brier_reward": 0.8067374706268311, "rewards/confidence_uniqueness_reward": 0.9252258539199829, "rewards/format_reward": 0.9880208373069763, "rewards/frontier_aurc_reward": -0.0025012485682964327, "rewards/frontier_coverage_0": 0.012539402535185217, "rewards/frontier_coverage_1": 0.012539402535185217, "rewards/frontier_coverage_10": 0.012539402535185217, "rewards/frontier_coverage_15": 0.013295956503134221, "rewards/frontier_coverage_20": 0.016371296532452107, "rewards/frontier_coverage_25": 0.02971927933394909, "rewards/frontier_coverage_5": 0.012539402535185217, "rewards/true_frontier_ece_gap_only_reward": -0.0022376260720193388, "signal/accuracy_reward/centered_abs_mean": 0.1533745676279068, "signal/accuracy_reward/group_std_mean": 0.1958913177251816, "signal/accuracy_reward/group_zero_std_frac": 0.4666666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0766872838139534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0766872838139534, "signal/advantage_abs_mean": 0.09020575135946274, "signal/advantage_pre_scale_abs_mean": 0.09020575135946274, "signal/advantage_pre_scale_std": 0.16270052194595336, "signal/advantage_std": 0.16270052194595336, "signal/brier_reward/centered_abs_mean": 0.10894776731729508, "signal/brier_reward/group_std_mean": 0.1400896966457367, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013618470914661885, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013618470914661885, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04274830222129822, "signal/confidence_uniqueness_reward/group_std_mean": 0.06329518854618073, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005343537777662277, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005343537777662277, "signal/format_reward/centered_abs_mean": 0.019791666604578494, "signal/format_reward/group_std_mean": 0.03536950312554836, "signal/format_reward/group_zero_std_frac": 0.8611111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009895833302289247, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009895833302289247, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028245057445019485, "signal/frontier_aurc_reward/group_std_mean": 0.004615729767829179, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.4132902257842946e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.4132902257842946e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.11053884625434876, "signal/frontier_coverage_0/group_std_mean": 0.1505269557237625, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017271694727241994, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017271694727241994, "signal/frontier_coverage_1/centered_abs_mean": 0.11053884625434876, "signal/frontier_coverage_1/group_std_mean": 0.1505269557237625, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017271694727241994, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017271694727241994, "signal/frontier_coverage_10/centered_abs_mean": 0.11053884625434876, "signal/frontier_coverage_10/group_std_mean": 0.1505269557237625, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017271694727241994, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017271694727241994, "signal/frontier_coverage_15/centered_abs_mean": 0.09622148275375367, "signal/frontier_coverage_15/group_std_mean": 0.13204507827758788, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001503460668027401, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001503460668027401, "signal/frontier_coverage_20/centered_abs_mean": 0.04334339499473572, "signal/frontier_coverage_20/group_std_mean": 0.06285871043801308, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006772405467927456, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006772405467927456, "signal/frontier_coverage_25/centered_abs_mean": 0.036124877631664276, "signal/frontier_coverage_25/group_std_mean": 0.04845571741461754, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005644512129947543, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005644512129947543, "signal/frontier_coverage_5/centered_abs_mean": 0.11053884625434876, "signal/frontier_coverage_5/group_std_mean": 0.1505269557237625, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017271694727241994, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017271694727241994, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0021982237696647642, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.00320082139223814, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00027477797120809553, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00027477797120809553, "step": 160 }, { "calibration/aurc": 0.18058905327825436, "calibration/batch_distribution_entropy": 0.7767610930080233, "calibration/batch_entropy_100bins": 0.828420695410555, "calibration/batch_entropy_10bins": 0.7767610930080233, "calibration/batch_entropy_50bins": 0.8396945215174878, "calibration/batch_uniqueness": 0.9377851813118999, "calibration/buffer_distribution_entropy": 0.8137825525481095, "calibration/buffer_entropy_100bins": 0.8667738672543628, "calibration/buffer_entropy_10bins": 0.8137825525481095, "calibration/buffer_entropy_50bins": 0.873905604077686, "calibration/confidence_entropy": 0.5159103697988725, "calibration/coverage@0%": 0.0042729555744199215, "calibration/coverage@1%": 0.0042729555744199215, "calibration/coverage@10%": 0.41248231465761176, "calibration/coverage@15%": 0.4753775820599887, "calibration/coverage@20%": 0.532258064516129, "calibration/coverage@25%": 0.7172935454271994, "calibration/coverage@30%": 0.8444380428056248, "calibration/coverage@5%": 0.21247736276174306, "calibration/distribution_entropy_10": 0.7767610930080233, "calibration/distribution_entropy_100": 0.828420695410555, "calibration/ece": 0.12461592848485974, "calibration/mean_confidence": 0.6432186546021172, "calibration/unique_confidence_per_question": 0.20104166666666665, "calibration/unique_confidences": 77.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01362847222222221, "completions/max_length": 3466.0, "completions/max_terminated_length": 3466.0, "completions/mean_length": 714.0564208984375, "completions/mean_terminated_length": 724.0512939453125, "completions/min_length": 0.0, "completions/min_terminated_length": 200.6, "epoch": 0.39599505006187424, "grad_norm": 0.0003885479236487299, "learning_rate": 1.2951807228915664e-06, "loss": -0.0108, "num_tokens": 351347086.0, "reward": 1.0483879327774048, "reward_std": 0.12390840351581574, "rewards/accuracy_reward": 0.6697048664093017, "rewards/brier_reward": 0.8130905270576477, "rewards/confidence_uniqueness_reward": 0.9271872401237488, "rewards/format_reward": 0.9863715410232544, "rewards/frontier_aurc_reward": -0.002153858123347163, "rewards/frontier_coverage_0": 0.028279137797653675, "rewards/frontier_coverage_1": 0.028279137797653675, "rewards/frontier_coverage_10": 0.028279137797653675, "rewards/frontier_coverage_15": 0.028847700357437132, "rewards/frontier_coverage_20": 0.02469187043607235, "rewards/frontier_coverage_25": 0.03973658010363579, "rewards/frontier_coverage_5": 0.028279137797653675, "rewards/true_frontier_ece_gap_only_reward": -0.0030099464114755393, "signal/accuracy_reward/centered_abs_mean": 0.14902886599302292, "signal/accuracy_reward/group_std_mean": 0.19532329142093657, "signal/accuracy_reward/group_zero_std_frac": 0.44166667461395265, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07451443299651146, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07451443299651146, "signal/advantage_abs_mean": 0.09079683572053909, "signal/advantage_pre_scale_abs_mean": 0.09079683572053909, "signal/advantage_pre_scale_std": 0.16342334747314452, "signal/advantage_std": 0.16342334747314452, "signal/brier_reward/centered_abs_mean": 0.11507419794797898, "signal/brier_reward/group_std_mean": 0.15091157853603362, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014384274743497372, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014384274743497372, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04255444556474686, "signal/confidence_uniqueness_reward/group_std_mean": 0.06380771696567536, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005319305695593357, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005319305695593357, "signal/format_reward/centered_abs_mean": 0.022292751632630826, "signal/format_reward/group_std_mean": 0.03928981348872185, "signal/format_reward/group_zero_std_frac": 0.8472222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011146375816315413, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011146375816315413, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026308648753911256, "signal/frontier_aurc_reward/group_std_mean": 0.0045830888208001856, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.110726367798634e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.110726367798634e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.12479231059551239, "signal/frontier_coverage_0/group_std_mean": 0.16951032280921935, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019498798530548811, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019498798530548811, "signal/frontier_coverage_1/centered_abs_mean": 0.12479231059551239, "signal/frontier_coverage_1/group_std_mean": 0.16951032280921935, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019498798530548811, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019498798530548811, "signal/frontier_coverage_10/centered_abs_mean": 0.12479231059551239, "signal/frontier_coverage_10/group_std_mean": 0.16951032280921935, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019498798530548811, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019498798530548811, "signal/frontier_coverage_15/centered_abs_mean": 0.09972788542509078, "signal/frontier_coverage_15/group_std_mean": 0.13676573038101197, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015582482097670435, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015582482097670435, "signal/frontier_coverage_20/centered_abs_mean": 0.05088546723127365, "signal/frontier_coverage_20/group_std_mean": 0.07109279409050942, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007950854254886508, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007950854254886508, "signal/frontier_coverage_25/centered_abs_mean": 0.04205540716648102, "signal/frontier_coverage_25/group_std_mean": 0.05501595437526703, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006571157369762659, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006571157369762659, "signal/frontier_coverage_5/centered_abs_mean": 0.12479231059551239, "signal/frontier_coverage_5/group_std_mean": 0.16951032280921935, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019498798530548811, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019498798530548811, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0027308772783726453, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.0037563166581094263, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00034135965979658066, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00034135965979658066, "step": 165 }, { "calibration/aurc": 0.14844663172603637, "calibration/batch_distribution_entropy": 0.6995723211429985, "calibration/batch_entropy_100bins": 0.7860955102263528, "calibration/batch_entropy_10bins": 0.6995723211429985, "calibration/batch_entropy_50bins": 0.7898930908087898, "calibration/batch_uniqueness": 0.9239117880487016, "calibration/buffer_distribution_entropy": 0.8153715309967344, "calibration/buffer_entropy_100bins": 0.8700450976408975, "calibration/buffer_entropy_10bins": 0.8153715309967344, "calibration/buffer_entropy_50bins": 0.8761398327730963, "calibration/confidence_entropy": 0.511929929827141, "calibration/coverage@0%": 0.014253398536593877, "calibration/coverage@1%": 0.014253398536593877, "calibration/coverage@10%": 0.3627433108172956, "calibration/coverage@15%": 0.650761552543238, "calibration/coverage@20%": 0.7580184572251633, "calibration/coverage@25%": 0.9351347617666155, "calibration/coverage@30%": 0.9687830687830689, "calibration/coverage@5%": 0.014253398536593877, "calibration/distribution_entropy_10": 0.6995723211429985, "calibration/distribution_entropy_100": 0.7860955102263528, "calibration/ece": 0.09246575064390247, "calibration/mean_confidence": 0.7047365129909607, "calibration/unique_confidence_per_question": 0.1796875, "calibration/unique_confidences": 69.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009809027777777767, "completions/max_length": 3302.6, "completions/max_terminated_length": 3302.6, "completions/mean_length": 686.3215209960938, "completions/mean_terminated_length": 693.1324462890625, "completions/min_length": 0.0, "completions/min_terminated_length": 190.6, "epoch": 0.4079949000637492, "grad_norm": 0.0003591932763811201, "learning_rate": 1.1445783132530121e-06, "loss": -0.0087, "num_tokens": 362342694.0, "reward": 1.0759626150131225, "reward_std": 0.12116758078336716, "rewards/accuracy_reward": 0.7206597208976746, "rewards/brier_reward": 0.8285124659538269, "rewards/confidence_uniqueness_reward": 0.9224372029304504, "rewards/format_reward": 0.9901909828186035, "rewards/frontier_aurc_reward": -0.002220888831652701, "rewards/frontier_coverage_0": 0.010648279171437024, "rewards/frontier_coverage_1": 0.010648279171437024, "rewards/frontier_coverage_10": 0.010648279171437024, "rewards/frontier_coverage_15": 0.015687369927763938, "rewards/frontier_coverage_20": 0.02136296220123768, "rewards/frontier_coverage_25": 0.0481999009847641, "rewards/frontier_coverage_5": 0.010648279171437024, "rewards/true_frontier_ece_gap_only_reward": -0.002354492200538516, "signal/accuracy_reward/centered_abs_mean": 0.14618055820465087, "signal/accuracy_reward/group_std_mean": 0.19709926843643188, "signal/accuracy_reward/group_zero_std_frac": 0.42777777314186094, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07309027910232543, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07309027910232543, "signal/advantage_abs_mean": 0.08560824990272523, "signal/advantage_pre_scale_abs_mean": 0.08560824990272523, "signal/advantage_pre_scale_std": 0.1597005158662796, "signal/advantage_std": 0.1597005158662796, "signal/brier_reward/centered_abs_mean": 0.10978586375713348, "signal/brier_reward/group_std_mean": 0.14447366297245026, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013723232969641685, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013723232969641685, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04156898036599159, "signal/confidence_uniqueness_reward/group_std_mean": 0.06315687522292138, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005196122545748949, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005196122545748949, "signal/format_reward/centered_abs_mean": 0.017355685867369176, "signal/format_reward/group_std_mean": 0.03438038341701031, "signal/format_reward/group_zero_std_frac": 0.8527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008677842933684588, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008677842933684588, "signal/frontier_aurc_reward/centered_abs_mean": 0.002847391273826361, "signal/frontier_aurc_reward/group_std_mean": 0.004856492578983307, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.449048865353689e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.449048865353689e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.10908669680356979, "signal/frontier_coverage_0/group_std_mean": 0.1511477291584015, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001704479637555778, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001704479637555778, "signal/frontier_coverage_1/centered_abs_mean": 0.10908669680356979, "signal/frontier_coverage_1/group_std_mean": 0.1511477291584015, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001704479637555778, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001704479637555778, "signal/frontier_coverage_10/centered_abs_mean": 0.10908669680356979, "signal/frontier_coverage_10/group_std_mean": 0.1511477291584015, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001704479637555778, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001704479637555778, "signal/frontier_coverage_15/centered_abs_mean": 0.08291138708591461, "signal/frontier_coverage_15/group_std_mean": 0.11675856113433838, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012954904232174158, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012954904232174158, "signal/frontier_coverage_20/centered_abs_mean": 0.0416046604514122, "signal/frontier_coverage_20/group_std_mean": 0.059697122871875764, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006500728195533156, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006500728195533156, "signal/frontier_coverage_25/centered_abs_mean": 0.041713655740022657, "signal/frontier_coverage_25/group_std_mean": 0.05425269529223442, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000651775870937854, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000651775870937854, "signal/frontier_coverage_5/centered_abs_mean": 0.10908669680356979, "signal/frontier_coverage_5/group_std_mean": 0.1511477291584015, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001704479637555778, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001704479637555778, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0024256525095552204, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.0035729790572077034, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00030320656369440255, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00030320656369440255, "step": 170 }, { "calibration/aurc": 0.14461688721979393, "calibration/batch_distribution_entropy": 0.7800736712882704, "calibration/batch_entropy_100bins": 0.8300042856724333, "calibration/batch_entropy_10bins": 0.7800736712882704, "calibration/batch_entropy_50bins": 0.8404826975612186, "calibration/batch_uniqueness": 0.938196954829919, "calibration/buffer_distribution_entropy": 0.8152604036836385, "calibration/buffer_entropy_100bins": 0.8717666012501055, "calibration/buffer_entropy_10bins": 0.8152604036836385, "calibration/buffer_entropy_50bins": 0.8767388825784943, "calibration/confidence_entropy": 0.5208488747591534, "calibration/coverage@0%": 0.012662042165713209, "calibration/coverage@1%": 0.012662042165713209, "calibration/coverage@10%": 0.3192511394437657, "calibration/coverage@15%": 0.5831173985414801, "calibration/coverage@20%": 0.8100232338874823, "calibration/coverage@25%": 0.9231852982671059, "calibration/coverage@30%": 0.9916449086161879, "calibration/coverage@5%": 0.1479950273702389, "calibration/distribution_entropy_10": 0.7800736712882704, "calibration/distribution_entropy_100": 0.8300042856724333, "calibration/ece": 0.0991099850213959, "calibration/mean_confidence": 0.6590511411080155, "calibration/unique_confidence_per_question": 0.20572916666666666, "calibration/unique_confidences": 79.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011197916666666674, "completions/max_length": 3595.2, "completions/max_terminated_length": 3595.2, "completions/mean_length": 714.6521850585938, "completions/mean_terminated_length": 722.7832885742188, "completions/min_length": 0.0, "completions/min_terminated_length": 208.0, "epoch": 0.4199947500656242, "grad_norm": 0.0004064071399625391, "learning_rate": 9.93975903614458e-07, "loss": -0.0108, "num_tokens": 373683455.0, "reward": 1.0710617542266845, "reward_std": 0.12647038847208023, "rewards/accuracy_reward": 0.7127604246139526, "rewards/brier_reward": 0.823624587059021, "rewards/confidence_uniqueness_reward": 0.9248634934425354, "rewards/format_reward": 0.9888020873069763, "rewards/frontier_aurc_reward": -0.0020120171364396812, "rewards/frontier_coverage_0": 0.01165504176169634, "rewards/frontier_coverage_1": 0.01165504176169634, "rewards/frontier_coverage_10": 0.01165504176169634, "rewards/frontier_coverage_15": 0.015564435138367116, "rewards/frontier_coverage_20": 0.021842183917760848, "rewards/frontier_coverage_25": 0.051497886329889296, "rewards/frontier_coverage_5": 0.01165504176169634, "rewards/true_frontier_ece_gap_only_reward": -0.0029332443606108426, "signal/accuracy_reward/centered_abs_mean": 0.15782877802848816, "signal/accuracy_reward/group_std_mean": 0.21180022656917571, "signal/accuracy_reward/group_zero_std_frac": 0.39166666865348815, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07891438901424408, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07891438901424408, "signal/advantage_abs_mean": 0.0904485046863556, "signal/advantage_pre_scale_abs_mean": 0.0904485046863556, "signal/advantage_pre_scale_std": 0.16463069915771483, "signal/advantage_std": 0.16463069915771483, "signal/brier_reward/centered_abs_mean": 0.11573301851749421, "signal/brier_reward/group_std_mean": 0.1542545437812805, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014466627314686776, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014466627314686776, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.042830513417720796, "signal/confidence_uniqueness_reward/group_std_mean": 0.06403593942523003, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0053538141772150995, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0053538141772150995, "signal/format_reward/centered_abs_mean": 0.01941731758415699, "signal/format_reward/group_std_mean": 0.03575590215623379, "signal/format_reward/group_zero_std_frac": 0.8583333373069764, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009708658792078495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009708658792078495, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027021993417292835, "signal/frontier_aurc_reward/group_std_mean": 0.00471522705629468, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2221864714520055e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2221864714520055e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.12408257275819778, "signal/frontier_coverage_0/group_std_mean": 0.173751300573349, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019387901993468404, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019387901993468404, "signal/frontier_coverage_1/centered_abs_mean": 0.12408257275819778, "signal/frontier_coverage_1/group_std_mean": 0.173751300573349, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019387901993468404, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019387901993468404, "signal/frontier_coverage_10/centered_abs_mean": 0.12408257275819778, "signal/frontier_coverage_10/group_std_mean": 0.173751300573349, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019387901993468404, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019387901993468404, "signal/frontier_coverage_15/centered_abs_mean": 0.09372715502977372, "signal/frontier_coverage_15/group_std_mean": 0.13314026296138765, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014644867973402143, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014644867973402143, "signal/frontier_coverage_20/centered_abs_mean": 0.04657027423381806, "signal/frontier_coverage_20/group_std_mean": 0.0670913964509964, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007276605349034071, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007276605349034071, "signal/frontier_coverage_25/centered_abs_mean": 0.04459658488631248, "signal/frontier_coverage_25/group_std_mean": 0.05846061035990715, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006968216388486326, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006968216388486326, "signal/frontier_coverage_5/centered_abs_mean": 0.12408257275819778, "signal/frontier_coverage_5/group_std_mean": 0.173751300573349, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019387901993468404, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019387901993468404, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0028843230567872523, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.0041553780902177095, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00036054038209840653, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00036054038209840653, "step": 175 }, { "calibration/aurc": 0.10250645380777204, "calibration/batch_distribution_entropy": 0.7497535770739696, "calibration/batch_entropy_100bins": 0.8134197295262965, "calibration/batch_entropy_10bins": 0.7497535770739696, "calibration/batch_entropy_50bins": 0.8202766103654543, "calibration/batch_uniqueness": 0.9356761553909309, "calibration/buffer_distribution_entropy": 0.8144947209468967, "calibration/buffer_entropy_100bins": 0.8727833543328449, "calibration/buffer_entropy_10bins": 0.8144947209468967, "calibration/buffer_entropy_50bins": 0.876997242538901, "calibration/confidence_entropy": 0.5126060799233956, "calibration/coverage@0%": 0.013113817192110735, "calibration/coverage@1%": 0.013113817192110735, "calibration/coverage@10%": 0.5760200171226603, "calibration/coverage@15%": 0.8476765097999672, "calibration/coverage@20%": 0.9179676956091463, "calibration/coverage@25%": 0.9608355091383812, "calibration/coverage@30%": 0.981201044386423, "calibration/coverage@5%": 0.24902806695100796, "calibration/distribution_entropy_10": 0.7497535770739696, "calibration/distribution_entropy_100": 0.8134197295262965, "calibration/ece": 0.12372607921773937, "calibration/mean_confidence": 0.6789990075283177, "calibration/unique_confidence_per_question": 0.19635416666666666, "calibration/unique_confidences": 75.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010243055555555537, "completions/max_length": 3533.2, "completions/max_terminated_length": 3533.2, "completions/mean_length": 687.0453247070312, "completions/mean_terminated_length": 694.14697265625, "completions/min_length": 0.0, "completions/min_terminated_length": 214.8, "epoch": 0.4319946000674992, "grad_norm": 0.0003923497861251235, "learning_rate": 8.433734939759036e-07, "loss": -0.009, "num_tokens": 384698185.0, "reward": 1.0713138580322266, "reward_std": 0.12391123622655868, "rewards/accuracy_reward": 0.7142361044883728, "rewards/brier_reward": 0.8178089022636413, "rewards/confidence_uniqueness_reward": 0.9249926686286927, "rewards/format_reward": 0.9896701335906982, "rewards/frontier_aurc_reward": -0.0023395067546516657, "rewards/frontier_coverage_0": 0.009305649372981862, "rewards/frontier_coverage_1": 0.009305649372981862, "rewards/frontier_coverage_10": 0.009305649372981862, "rewards/frontier_coverage_15": 0.013631703774444759, "rewards/frontier_coverage_20": 0.022413885779678823, "rewards/frontier_coverage_25": 0.05289755538105965, "rewards/frontier_coverage_5": 0.009305649372981862, "rewards/true_frontier_ece_gap_only_reward": -0.0033940633293241262, "signal/accuracy_reward/centered_abs_mean": 0.1560980886220932, "signal/accuracy_reward/group_std_mean": 0.20446191132068633, "signal/accuracy_reward/group_zero_std_frac": 0.41666666865348817, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0780490443110466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0780490443110466, "signal/advantage_abs_mean": 0.09040587842464447, "signal/advantage_pre_scale_abs_mean": 0.09040587842464447, "signal/advantage_pre_scale_std": 0.1639217257499695, "signal/advantage_std": 0.1639217257499695, "signal/brier_reward/centered_abs_mean": 0.1156775861978531, "signal/brier_reward/group_std_mean": 0.15296037197113038, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014459698274731637, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014459698274731637, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04092123620212078, "signal/confidence_uniqueness_reward/group_std_mean": 0.06180307194590569, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005115154525265097, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005115154525265097, "signal/format_reward/centered_abs_mean": 0.017681206576526165, "signal/format_reward/group_std_mean": 0.03354543596506119, "signal/format_reward/group_zero_std_frac": 0.8611111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008840603288263083, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008840603288263083, "signal/frontier_aurc_reward/centered_abs_mean": 0.002855647308751941, "signal/frontier_aurc_reward/group_std_mean": 0.004864505957812071, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.4619489199249077e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.4619489199249077e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.12355190813541413, "signal/frontier_coverage_0/group_std_mean": 0.16903219521045684, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019304985646158457, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019304985646158457, "signal/frontier_coverage_1/centered_abs_mean": 0.12355190813541413, "signal/frontier_coverage_1/group_std_mean": 0.16903219521045684, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019304985646158457, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019304985646158457, "signal/frontier_coverage_10/centered_abs_mean": 0.12355190813541413, "signal/frontier_coverage_10/group_std_mean": 0.16903219521045684, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019304985646158457, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019304985646158457, "signal/frontier_coverage_15/centered_abs_mean": 0.08319898694753647, "signal/frontier_coverage_15/group_std_mean": 0.11640357077121735, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012999841710552573, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012999841710552573, "signal/frontier_coverage_20/centered_abs_mean": 0.04654121547937393, "signal/frontier_coverage_20/group_std_mean": 0.06476506888866425, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007272064918652177, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007272064918652177, "signal/frontier_coverage_25/centered_abs_mean": 0.048268646001815796, "signal/frontier_coverage_25/group_std_mean": 0.06180224493145943, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007541975937783718, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007541975937783718, "signal/frontier_coverage_5/centered_abs_mean": 0.12355190813541413, "signal/frontier_coverage_5/group_std_mean": 0.16903219521045684, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019304985646158457, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019304985646158457, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0031099628657102587, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.004470847826451063, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00038874535821378233, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00038874535821378233, "step": 180 }, { "calibration/aurc": 0.16988037415763974, "calibration/batch_distribution_entropy": 0.7940710514827269, "calibration/batch_entropy_100bins": 0.8368481344782455, "calibration/batch_entropy_10bins": 0.7940710514827269, "calibration/batch_entropy_50bins": 0.8476841669994413, "calibration/batch_uniqueness": 0.9411219051737953, "calibration/buffer_distribution_entropy": 0.8081097969439645, "calibration/buffer_entropy_100bins": 0.8695779315744969, "calibration/buffer_entropy_10bins": 0.8081097969439645, "calibration/buffer_entropy_50bins": 0.873141871107264, "calibration/confidence_entropy": 0.5195127592582154, "calibration/coverage@0%": 0.022429873118014296, "calibration/coverage@1%": 0.022429873118014296, "calibration/coverage@10%": 0.15776704580497936, "calibration/coverage@15%": 0.47283910007332636, "calibration/coverage@20%": 0.8386187835365227, "calibration/coverage@25%": 0.8956120693790559, "calibration/coverage@30%": 0.9506561679790027, "calibration/coverage@5%": 0.044304873118014294, "calibration/distribution_entropy_10": 0.7940710514827269, "calibration/distribution_entropy_100": 0.8368481344782455, "calibration/ece": 0.11371638934936182, "calibration/mean_confidence": 0.6561408147419053, "calibration/unique_confidence_per_question": 0.2109375, "calibration/unique_confidences": 81.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00980902777777779, "completions/max_length": 3568.8, "completions/max_terminated_length": 3568.8, "completions/mean_length": 694.918212890625, "completions/mean_terminated_length": 701.8487915039062, "completions/min_length": 0.0, "completions/min_terminated_length": 187.8, "epoch": 0.44399445006937416, "grad_norm": 0.0004112598253414035, "learning_rate": 6.927710843373495e-07, "loss": -0.0072, "num_tokens": 395793691.0, "reward": 1.0613837242126465, "reward_std": 0.12879109233617783, "rewards/accuracy_reward": 0.692187488079071, "rewards/brier_reward": 0.8143394708633422, "rewards/confidence_uniqueness_reward": 0.9319449424743652, "rewards/format_reward": 0.9901041746139526, "rewards/frontier_aurc_reward": -0.0021198054775595663, "rewards/frontier_coverage_0": 0.01427230816334486, "rewards/frontier_coverage_1": 0.01427230816334486, "rewards/frontier_coverage_10": 0.01427230816334486, "rewards/frontier_coverage_15": 0.016747461259365083, "rewards/frontier_coverage_20": 0.02341715954244137, "rewards/frontier_coverage_25": 0.05537489578127861, "rewards/frontier_coverage_5": 0.01427230816334486, "rewards/true_frontier_ece_gap_only_reward": -0.003194707864895463, "signal/accuracy_reward/centered_abs_mean": 0.16829426884651183, "signal/accuracy_reward/group_std_mean": 0.2190181851387024, "signal/accuracy_reward/group_zero_std_frac": 0.38333333730697633, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08414713442325591, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08414713442325591, "signal/advantage_abs_mean": 0.09635478109121323, "signal/advantage_pre_scale_abs_mean": 0.09635478109121323, "signal/advantage_pre_scale_std": 0.1639193296432495, "signal/advantage_std": 0.1639193296432495, "signal/brier_reward/centered_abs_mean": 0.12017861008644104, "signal/brier_reward/group_std_mean": 0.15563611090183258, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01502232626080513, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01502232626080513, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.037514998018741606, "signal/confidence_uniqueness_reward/group_std_mean": 0.05578840374946594, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004689374752342701, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004689374752342701, "signal/format_reward/centered_abs_mean": 0.016894531436264515, "signal/format_reward/group_std_mean": 0.030554963275790215, "signal/format_reward/group_zero_std_frac": 0.8777777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008447265718132257, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008447265718132257, "signal/frontier_aurc_reward/centered_abs_mean": 0.002720799436792731, "signal/frontier_aurc_reward/group_std_mean": 0.004833174217492342, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2512491199886425e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2512491199886425e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.130600044131279, "signal/frontier_coverage_0/group_std_mean": 0.18123140037059784, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002040625689551234, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002040625689551234, "signal/frontier_coverage_1/centered_abs_mean": 0.130600044131279, "signal/frontier_coverage_1/group_std_mean": 0.18123140037059784, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002040625689551234, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002040625689551234, "signal/frontier_coverage_10/centered_abs_mean": 0.130600044131279, "signal/frontier_coverage_10/group_std_mean": 0.18123140037059784, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002040625689551234, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002040625689551234, "signal/frontier_coverage_15/centered_abs_mean": 0.08408356457948685, "signal/frontier_coverage_15/group_std_mean": 0.11986269503831863, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001313805696554482, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001313805696554482, "signal/frontier_coverage_20/centered_abs_mean": 0.0467585064470768, "signal/frontier_coverage_20/group_std_mean": 0.06627060770988465, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000730601663235575, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000730601663235575, "signal/frontier_coverage_25/centered_abs_mean": 0.04904806688427925, "signal/frontier_coverage_25/group_std_mean": 0.06319007501006127, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007663760450668633, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007663760450668633, "signal/frontier_coverage_5/centered_abs_mean": 0.130600044131279, "signal/frontier_coverage_5/group_std_mean": 0.18123140037059784, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002040625689551234, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002040625689551234, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.003226568968966603, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.004763441625982523, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0004033211211208254, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0004033211211208254, "step": 185 }, { "calibration/aurc": 0.16887821111438306, "calibration/batch_distribution_entropy": 0.7166309619833362, "calibration/batch_entropy_100bins": 0.793829835616982, "calibration/batch_entropy_10bins": 0.7166309619833362, "calibration/batch_entropy_50bins": 0.7992505076218754, "calibration/batch_uniqueness": 0.9261153678295246, "calibration/buffer_distribution_entropy": 0.800176021818779, "calibration/buffer_entropy_100bins": 0.8647032984957514, "calibration/buffer_entropy_10bins": 0.800176021818779, "calibration/buffer_entropy_50bins": 0.8680643724938966, "calibration/confidence_entropy": 0.4969127807971727, "calibration/coverage@0%": 0.013662280701754386, "calibration/coverage@1%": 0.013662280701754386, "calibration/coverage@10%": 0.38303179824561406, "calibration/coverage@15%": 0.5142105263157895, "calibration/coverage@20%": 0.5489473684210526, "calibration/coverage@25%": 0.938843201754386, "calibration/coverage@30%": 0.9942105263157895, "calibration/coverage@5%": 0.0999780701754386, "calibration/distribution_entropy_10": 0.7166309619833362, "calibration/distribution_entropy_100": 0.793829835616982, "calibration/ece": 0.11243645289887176, "calibration/mean_confidence": 0.6973970449081458, "calibration/unique_confidence_per_question": 0.18958333333333335, "calibration/unique_confidences": 72.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00512152777777779, "completions/max_length": 3413.8, "completions/max_terminated_length": 3413.8, "completions/mean_length": 683.5370727539063, "completions/mean_terminated_length": 687.0613037109375, "completions/min_length": 0.0, "completions/min_terminated_length": 192.6, "epoch": 0.45599430007124914, "grad_norm": 0.0003837902913801372, "learning_rate": 5.421686746987952e-07, "loss": -0.0035, "num_tokens": 406750982.0, "reward": 1.0861162424087525, "reward_std": 0.11900279521942139, "rewards/accuracy_reward": 0.7318576335906982, "rewards/brier_reward": 0.8348490834236145, "rewards/confidence_uniqueness_reward": 0.92904052734375, "rewards/format_reward": 0.9947916746139527, "rewards/frontier_aurc_reward": -0.0019866148009896278, "rewards/frontier_coverage_0": 0.013782516145147384, "rewards/frontier_coverage_1": 0.013782516145147384, "rewards/frontier_coverage_10": 0.014211940788663923, "rewards/frontier_coverage_15": 0.018956656288355588, "rewards/frontier_coverage_20": 0.02874315045773983, "rewards/frontier_coverage_25": 0.07212998867034912, "rewards/frontier_coverage_5": 0.013782516145147384, "rewards/true_frontier_ece_gap_only_reward": -0.003232320211827755, "signal/accuracy_reward/centered_abs_mean": 0.16196288764476777, "signal/accuracy_reward/group_std_mean": 0.21733182072639465, "signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08098144382238388, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08098144382238388, "signal/advantage_abs_mean": 0.08545112162828446, "signal/advantage_pre_scale_abs_mean": 0.08545112162828446, "signal/advantage_pre_scale_std": 0.15151307582855225, "signal/advantage_std": 0.15151307582855225, "signal/brier_reward/centered_abs_mean": 0.10914837270975113, "signal/brier_reward/group_std_mean": 0.14498610198497772, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01364354658871889, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01364354658871889, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03477521277964115, "signal/confidence_uniqueness_reward/group_std_mean": 0.05167415216565132, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004346901597455144, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004346901597455144, "signal/format_reward/centered_abs_mean": 0.009429253498092293, "signal/format_reward/group_std_mean": 0.021391174383461477, "signal/format_reward/group_zero_std_frac": 0.8972222208976746, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004714626749046147, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004714626749046147, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025213475339114664, "signal/frontier_aurc_reward/group_std_mean": 0.004476012662053108, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.939605521736666e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.939605521736666e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.13185926526784897, "signal/frontier_coverage_0/group_std_mean": 0.18062789738178253, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00206030101981014, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00206030101981014, "signal/frontier_coverage_1/centered_abs_mean": 0.13185926526784897, "signal/frontier_coverage_1/group_std_mean": 0.18062789738178253, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00206030101981014, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00206030101981014, "signal/frontier_coverage_10/centered_abs_mean": 0.12977752983570098, "signal/frontier_coverage_10/group_std_mean": 0.1778223305940628, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002027773903682828, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002027773903682828, "signal/frontier_coverage_15/centered_abs_mean": 0.08260580152273178, "signal/frontier_coverage_15/group_std_mean": 0.11576226651668549, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001290715648792684, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001290715648792684, "signal/frontier_coverage_20/centered_abs_mean": 0.04762231633067131, "signal/frontier_coverage_20/group_std_mean": 0.06589499711990357, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007440986926667392, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007440986926667392, "signal/frontier_coverage_25/centered_abs_mean": 0.05049858167767525, "signal/frontier_coverage_25/group_std_mean": 0.06524901390075684, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007890403387136758, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007890403387136758, "signal/frontier_coverage_5/centered_abs_mean": 0.13185926526784897, "signal/frontier_coverage_5/group_std_mean": 0.18062789738178253, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00206030101981014, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00206030101981014, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.003113184357061982, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.004526341799646616, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00038914804463274777, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00038914804463274777, "step": 190 }, { "calibration/aurc": 0.2062296307283377, "calibration/batch_distribution_entropy": 0.8093306883536086, "calibration/batch_entropy_100bins": 0.8432700346966481, "calibration/batch_entropy_10bins": 0.8093306883536086, "calibration/batch_entropy_50bins": 0.859267476518579, "calibration/batch_uniqueness": 0.9426119233178885, "calibration/buffer_distribution_entropy": 0.7945642543079567, "calibration/buffer_entropy_100bins": 0.8605465543658124, "calibration/buffer_entropy_10bins": 0.7945642543079567, "calibration/buffer_entropy_50bins": 0.8645292173196768, "calibration/confidence_entropy": 0.5176270953297042, "calibration/coverage@0%": 0.004699738903394255, "calibration/coverage@1%": 0.004699738903394255, "calibration/coverage@10%": 0.28507615891758575, "calibration/coverage@15%": 0.4576479260387029, "calibration/coverage@20%": 0.5506327631461108, "calibration/coverage@25%": 0.617482242623319, "calibration/coverage@30%": 0.772239810615784, "calibration/coverage@5%": 0.09712793733681462, "calibration/distribution_entropy_10": 0.8093306883536086, "calibration/distribution_entropy_100": 0.8432700346966481, "calibration/ece": 0.13137105508504543, "calibration/mean_confidence": 0.6472678487210273, "calibration/unique_confidence_per_question": 0.21197916666666666, "calibration/unique_confidences": 81.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011545138888888884, "completions/max_length": 3631.2, "completions/max_terminated_length": 3631.2, "completions/mean_length": 703.418408203125, "completions/mean_terminated_length": 711.7278564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.46799415007312406, "grad_norm": 0.00042763527017086744, "learning_rate": 3.91566265060241e-07, "loss": -0.011, "num_tokens": 417935226.0, "reward": 1.0512622594833374, "reward_std": 0.12501877546310425, "rewards/accuracy_reward": 0.6756076455116272, "rewards/brier_reward": 0.8063122510910035, "rewards/confidence_uniqueness_reward": 0.9281278252601624, "rewards/format_reward": 0.9883680462837219, "rewards/frontier_aurc_reward": -0.0024975771084427834, "rewards/frontier_coverage_0": 0.018676279671490194, "rewards/frontier_coverage_1": 0.018676279671490194, "rewards/frontier_coverage_10": 0.01876285169273615, "rewards/frontier_coverage_15": 0.0209655387327075, "rewards/frontier_coverage_20": 0.02632690779864788, "rewards/frontier_coverage_25": 0.06297426149249077, "rewards/frontier_coverage_5": 0.018676279671490194, "rewards/true_frontier_ece_gap_only_reward": -0.0030649449676275254, "signal/accuracy_reward/centered_abs_mean": 0.156504987180233, "signal/accuracy_reward/group_std_mean": 0.20543068647384644, "signal/accuracy_reward/group_zero_std_frac": 0.4194444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0782524935901165, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0782524935901165, "signal/advantage_abs_mean": 0.09245173931121826, "signal/advantage_pre_scale_abs_mean": 0.09245173931121826, "signal/advantage_pre_scale_std": 0.1626460701227188, "signal/advantage_std": 0.1626460701227188, "signal/brier_reward/centered_abs_mean": 0.1210327297449112, "signal/brier_reward/group_std_mean": 0.15611167550086974, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0151290912181139, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0151290912181139, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04194310083985329, "signal/confidence_uniqueness_reward/group_std_mean": 0.06149864494800568, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005242887604981661, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005242887604981661, "signal/format_reward/centered_abs_mean": 0.01963975690305233, "signal/format_reward/group_std_mean": 0.03504836894571781, "signal/format_reward/group_zero_std_frac": 0.8611111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009819878451526164, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009819878451526164, "signal/frontier_aurc_reward/centered_abs_mean": 0.003186128893867135, "signal/frontier_aurc_reward/group_std_mean": 0.005603937339037657, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.9783263966673986e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.9783263966673986e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.12809589505195618, "signal/frontier_coverage_0/group_std_mean": 0.17424156665802001, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020014983601868153, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020014983601868153, "signal/frontier_coverage_1/centered_abs_mean": 0.12809589505195618, "signal/frontier_coverage_1/group_std_mean": 0.17424156665802001, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020014983601868153, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020014983601868153, "signal/frontier_coverage_10/centered_abs_mean": 0.12744116485118867, "signal/frontier_coverage_10/group_std_mean": 0.17343433499336242, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001991268200799823, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001991268200799823, "signal/frontier_coverage_15/centered_abs_mean": 0.07363787293434143, "signal/frontier_coverage_15/group_std_mean": 0.10297145694494247, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011505917645990849, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011505917645990849, "signal/frontier_coverage_20/centered_abs_mean": 0.044591452926397324, "signal/frontier_coverage_20/group_std_mean": 0.06145058870315552, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006967414519749582, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006967414519749582, "signal/frontier_coverage_25/centered_abs_mean": 0.052361331135034564, "signal/frontier_coverage_25/group_std_mean": 0.0672803521156311, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008181457989849151, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008181457989849151, "signal/frontier_coverage_5/centered_abs_mean": 0.12809589505195618, "signal/frontier_coverage_5/group_std_mean": 0.17424156665802001, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020014983601868153, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020014983601868153, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0031045635230839254, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.004452465567737818, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0003880704403854907, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0003880704403854907, "step": 195 }, { "calibration/aurc": 0.18052983140791248, "calibration/batch_distribution_entropy": 0.7398158763396785, "calibration/batch_entropy_100bins": 0.8093075678083164, "calibration/batch_entropy_10bins": 0.7398158763396785, "calibration/batch_entropy_50bins": 0.8162096039603671, "calibration/batch_uniqueness": 0.9336981234577946, "calibration/buffer_distribution_entropy": 0.7927739625462163, "calibration/buffer_entropy_100bins": 0.8589941803683226, "calibration/buffer_entropy_10bins": 0.7927739625462163, "calibration/buffer_entropy_50bins": 0.8634857187639527, "calibration/confidence_entropy": 0.5153811073665431, "calibration/coverage@0%": 0.0062856144931519396, "calibration/coverage@1%": 0.0062856144931519396, "calibration/coverage@10%": 0.1866731368237827, "calibration/coverage@15%": 0.38207938252943063, "calibration/coverage@20%": 0.838360745614035, "calibration/coverage@25%": 0.9291008771929825, "calibration/coverage@30%": 0.9573848684210526, "calibration/coverage@5%": 0.0062856144931519396, "calibration/distribution_entropy_10": 0.7398158763396785, "calibration/distribution_entropy_100": 0.8093075678083164, "calibration/ece": 0.109616134077216, "calibration/mean_confidence": 0.6974732587107646, "calibration/unique_confidence_per_question": 0.19479166666666664, "calibration/unique_confidences": 74.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007899305555555559, "completions/max_length": 3355.0, "completions/max_terminated_length": 3355.0, "completions/mean_length": 681.3518310546875, "completions/mean_terminated_length": 686.8287475585937, "completions/min_length": 0.0, "completions/min_terminated_length": 195.0, "epoch": 0.47999400007499904, "grad_norm": 0.0004486938123591244, "learning_rate": 2.409638554216868e-07, "loss": -0.0059, "num_tokens": 428852207.0, "reward": 1.070695161819458, "reward_std": 0.11748828440904617, "rewards/accuracy_reward": 0.7063368082046508, "rewards/brier_reward": 0.8224664568901062, "rewards/confidence_uniqueness_reward": 0.9281678915023803, "rewards/format_reward": 0.9921006917953491, "rewards/frontier_aurc_reward": -0.002603556914255023, "rewards/frontier_coverage_0": 0.01670327754691243, "rewards/frontier_coverage_1": 0.01670327754691243, "rewards/frontier_coverage_10": 0.016874231677502394, "rewards/frontier_coverage_15": 0.021660929918289183, "rewards/frontier_coverage_20": 0.030010566860437394, "rewards/frontier_coverage_25": 0.07551120072603226, "rewards/frontier_coverage_5": 0.01670327754691243, "rewards/true_frontier_ece_gap_only_reward": -0.0027688577305525542, "signal/accuracy_reward/centered_abs_mean": 0.14654405266046525, "signal/accuracy_reward/group_std_mean": 0.19502569139003753, "signal/accuracy_reward/group_zero_std_frac": 0.4277777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07327202633023262, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07327202633023262, "signal/advantage_abs_mean": 0.08604095876216888, "signal/advantage_pre_scale_abs_mean": 0.08604095876216888, "signal/advantage_pre_scale_std": 0.15435749292373657, "signal/advantage_std": 0.15435749292373657, "signal/brier_reward/centered_abs_mean": 0.10942392647266388, "signal/brier_reward/group_std_mean": 0.14457024335861207, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013677990809082986, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013677990809082986, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03794047012925148, "signal/confidence_uniqueness_reward/group_std_mean": 0.055311404168605804, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004742558766156435, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004742558766156435, "signal/format_reward/centered_abs_mean": 0.01360134556889534, "signal/format_reward/group_std_mean": 0.025955809652805327, "signal/format_reward/group_zero_std_frac": 0.8916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00680067278444767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00680067278444767, "signal/frontier_aurc_reward/centered_abs_mean": 0.003289140481501818, "signal/frontier_aurc_reward/group_std_mean": 0.005822925828397274, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1392820023465904e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1392820023465904e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1157380223274231, "signal/frontier_coverage_0/group_std_mean": 0.16240898966789247, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001808406598865986, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001808406598865986, "signal/frontier_coverage_1/centered_abs_mean": 0.1157380223274231, "signal/frontier_coverage_1/group_std_mean": 0.16240898966789247, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001808406598865986, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001808406598865986, "signal/frontier_coverage_10/centered_abs_mean": 0.11215617209672928, "signal/frontier_coverage_10/group_std_mean": 0.15783893167972565, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001752440189011395, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001752440189011395, "signal/frontier_coverage_15/centered_abs_mean": 0.06608396619558335, "signal/frontier_coverage_15/group_std_mean": 0.09578151851892472, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010325619718059898, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010325619718059898, "signal/frontier_coverage_20/centered_abs_mean": 0.04160864725708961, "signal/frontier_coverage_20/group_std_mean": 0.058051402866840365, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006501351133920252, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006501351133920252, "signal/frontier_coverage_25/centered_abs_mean": 0.05538794472813606, "signal/frontier_coverage_25/group_std_mean": 0.07133743911981583, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008654366363771259, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008654366363771259, "signal/frontier_coverage_5/centered_abs_mean": 0.1157380223274231, "signal/frontier_coverage_5/group_std_mean": 0.16240898966789247, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001808406598865986, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001808406598865986, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0029893687460571527, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.004549006605520845, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0003736710932571441, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0003736710932571441, "step": 200 }, { "epoch": 0.47999400007499904, "eval_completions/clipped_ratio": 0.008680555555555544, "eval_completions/max_length": 2405.5, "eval_completions/max_terminated_length": 2405.5, "eval_completions/mean_length": 691.6689249674479, "eval_completions/mean_terminated_length": 697.7305806477865, "eval_completions/min_length": 56.833333333333336, "eval_completions/min_terminated_length": 249.66666666666666, "eval_loss": 0.0, "eval_num_tokens": 428852207.0, "eval_reward": 1.0510073900222778, "eval_reward_std": 0.2505972956617673, "eval_rewards/accuracy_reward": 0.6796875, "eval_rewards/brier_reward": 0.8202938636144003, "eval_rewards/confidence_uniqueness_reward": 0.8755098978678385, "eval_rewards/format_reward": 0.9913194378217062, "eval_rewards/frontier_aurc_reward": -0.002243300104358544, "eval_rewards/frontier_coverage_0": 0.029487861630817253, "eval_rewards/frontier_coverage_1": 0.029487861630817253, "eval_rewards/frontier_coverage_10": 0.02955207011351983, "eval_rewards/frontier_coverage_15": 0.02661541321625312, "eval_rewards/frontier_coverage_20": 0.03148760460317135, "eval_rewards/frontier_coverage_25": 0.0767225877692302, "eval_rewards/frontier_coverage_5": 0.029487861630817253, "eval_rewards/true_frontier_ece_gap_only_reward": -0.0030973663087934256, "eval_runtime": 184.977, "eval_samples_per_second": 5.406, "eval_signal/accuracy_reward/centered_abs_mean": 0.4178059895833333, "eval_signal/accuracy_reward/group_std_mean": 0.4625024398167928, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20890299479166666, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20890299479166666, "eval_signal/advantage_abs_mean": 0.21793826669454575, "eval_signal/advantage_pre_scale_abs_mean": 0.21793826669454575, "eval_signal/advantage_pre_scale_std": 0.24974885831276575, "eval_signal/advantage_std": 0.24974885831276575, "eval_signal/brier_reward/centered_abs_mean": 0.18156319856643677, "eval_signal/brier_reward/group_std_mean": 0.2326709752281507, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022695399820804596, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.022695399820804596, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0633502888182799, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08953885920345783, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007918786102284988, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007918786102284988, "eval_signal/format_reward/centered_abs_mean": 0.01671006918574373, "eval_signal/format_reward/group_std_mean": 0.04611522859583298, "eval_signal/format_reward/group_zero_std_frac": 0.750000019868215, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.008355034592871865, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.008355034592871865, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0037845204351469874, "eval_signal/frontier_aurc_reward/group_std_mean": 0.00772972172126174, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.913313179917168e-05, "eval_signal/frontier_aurc_reward/weight": 0.015625, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.913313179917168e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.1768440529704094, "eval_signal/frontier_coverage_0/group_std_mean": 0.275893231232961, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027631883276626468, "eval_signal/frontier_coverage_0/weight": 0.015625, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027631883276626468, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.1768440529704094, "eval_signal/frontier_coverage_1/group_std_mean": 0.275893231232961, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027631883276626468, "eval_signal/frontier_coverage_1/weight": 0.015625, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027631883276626468, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.17100265125433603, "eval_signal/frontier_coverage_10/group_std_mean": 0.2681894302368164, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026719164258490005, "eval_signal/frontier_coverage_10/weight": 0.015625, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026719164258490005, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.09313676009575526, "eval_signal/frontier_coverage_15/group_std_mean": 0.16024632503588995, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001455261876496176, "eval_signal/frontier_coverage_15/weight": 0.015625, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001455261876496176, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.05307722526292006, "eval_signal/frontier_coverage_20/group_std_mean": 0.08430640151103337, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008293316447331259, "eval_signal/frontier_coverage_20/weight": 0.015625, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008293316447331259, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.09919273108243942, "eval_signal/frontier_coverage_25/group_std_mean": 0.11935225501656532, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001549886423163116, "eval_signal/frontier_coverage_25/weight": 0.015625, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001549886423163116, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.1768440529704094, "eval_signal/frontier_coverage_5/group_std_mean": 0.275893231232961, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027631883276626468, "eval_signal/frontier_coverage_5/weight": 0.015625, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027631883276626468, "eval_signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.00442255346570164, "eval_signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.0076944112467269106, "eval_signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "eval_signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.000552819183212705, "eval_signal/true_frontier_ece_gap_only_reward/weight": 0.125, "eval_signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.000552819183212705, "eval_steps_per_second": 0.032, "step": 200 }, { "epoch": 0.47999400007499904, "step": 200, "train_probe_completions/clipped_ratio": 0.012847222222222232, "train_probe_completions/max_length": 2355.3333333333335, "train_probe_completions/max_terminated_length": 2355.3333333333335, "train_probe_completions/mean_length": 680.9791870117188, "train_probe_completions/mean_terminated_length": 689.9359436035156, "train_probe_completions/min_length": 0.0, "train_probe_completions/min_terminated_length": 217.16666666666666, "train_probe_loss": 0.0, "train_probe_num_tokens": 428852207.0, "train_probe_reward": 1.067217191060384, "train_probe_reward_std": 0.25106702248255414, "train_probe_rewards/accuracy_reward": 0.7170139054457346, "train_probe_rewards/brier_reward": 0.8235729734102885, "train_probe_rewards/confidence_uniqueness_reward": 0.8701435724894205, "train_probe_rewards/format_reward": 0.9895833333333334, "train_probe_rewards/frontier_aurc_reward": -0.002555853434993575, "train_probe_rewards/frontier_coverage_0": 0.00876838636274139, "train_probe_rewards/frontier_coverage_1": 0.00876838636274139, "train_probe_rewards/frontier_coverage_10": 0.009510708196709553, "train_probe_rewards/frontier_coverage_15": 0.017479141689060878, "train_probe_rewards/frontier_coverage_20": 0.02941159127900998, "train_probe_rewards/frontier_coverage_25": 0.08159822722276051, "train_probe_rewards/frontier_coverage_5": 0.00876838636274139, "train_probe_rewards/true_frontier_ece_gap_only_reward": -0.002586768241599202, "train_probe_runtime": 203.222, "train_probe_samples_per_second": 4.921, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3974609375, "train_probe_signal/accuracy_reward/group_std_mean": 0.4516189793745677, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.19873046875, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.19873046875, "train_probe_signal/advantage_abs_mean": 0.21062870572010675, "train_probe_signal/advantage_pre_scale_abs_mean": 0.21062870572010675, "train_probe_signal/advantage_pre_scale_std": 0.24998972316582999, "train_probe_signal/advantage_std": 0.24998972316582999, "train_probe_signal/brier_reward/centered_abs_mean": 0.1768066460887591, "train_probe_signal/brier_reward/group_std_mean": 0.2324258784453074, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022100830761094887, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.022100830761094887, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06669201205174129, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.1005245956281821, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00833650150646766, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00833650150646766, "train_probe_signal/format_reward/centered_abs_mean": 0.02007378451526165, "train_probe_signal/format_reward/group_std_mean": 0.05593615584075451, "train_probe_signal/format_reward/group_zero_std_frac": 0.6944444676240286, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.010036892257630825, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.010036892257630825, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.004501550691202283, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.010145407247667512, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.033672955003567e-05, "train_probe_signal/frontier_aurc_reward/weight": 0.015625, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.033672955003567e-05, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.1579719434181849, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.25707169622182846, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002468311615909139, "train_probe_signal/frontier_coverage_0/weight": 0.015625, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002468311615909139, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.1579719434181849, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.25707169622182846, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002468311615909139, "train_probe_signal/frontier_coverage_1/weight": 0.015625, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002468311615909139, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.15405935049057007, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.25156734387079877, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024071773514151573, "train_probe_signal/frontier_coverage_10/weight": 0.015625, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024071773514151573, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.08175658682982127, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.14768946915864944, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012774466692159574, "train_probe_signal/frontier_coverage_15/weight": 0.015625, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012774466692159574, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.04819720300535361, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.07652890309691429, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007530812969586501, "train_probe_signal/frontier_coverage_20/weight": 0.015625, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007530812969586501, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.09637204806009929, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.11713628967603047, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015058132509390514, "train_probe_signal/frontier_coverage_25/weight": 0.015625, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015058132509390514, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.1579719434181849, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.25707169622182846, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002468311615909139, "train_probe_signal/frontier_coverage_5/weight": 0.015625, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002468311615909139, "train_probe_signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0036215446889400482, "train_probe_signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.006501481092224519, "train_probe_signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "train_probe_signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00045269308611750603, "train_probe_signal/true_frontier_ece_gap_only_reward/weight": 0.125, "train_probe_signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00045269308611750603, "train_probe_steps_per_second": 0.03 }, { "calibration/aurc": 0.16907717783248571, "calibration/batch_distribution_entropy": 0.7094331746984237, "calibration/batch_entropy_100bins": 0.7936049195126413, "calibration/batch_entropy_10bins": 0.7094331746984237, "calibration/batch_entropy_50bins": 0.7939230371346102, "calibration/batch_uniqueness": 0.924174709187571, "calibration/buffer_distribution_entropy": 0.7921673058072191, "calibration/buffer_entropy_100bins": 0.8584895422787383, "calibration/buffer_entropy_10bins": 0.7921673058072191, "calibration/buffer_entropy_50bins": 0.8628322959175796, "calibration/confidence_entropy": 0.49239157203877826, "calibration/coverage@0%": 0.010430265448215839, "calibration/coverage@1%": 0.010430265448215839, "calibration/coverage@10%": 0.11064104656222802, "calibration/coverage@15%": 0.5501699847693647, "calibration/coverage@20%": 0.8597367275892079, "calibration/coverage@25%": 0.8936221714534378, "calibration/coverage@30%": 0.9400198542210617, "calibration/coverage@5%": 0.010430265448215839, "calibration/distribution_entropy_10": 0.7094331746984237, "calibration/distribution_entropy_100": 0.7936049195126413, "calibration/ece": 0.10543889063662855, "calibration/mean_confidence": 0.6970287687410912, "calibration/unique_confidence_per_question": 0.19322916666666667, "calibration/unique_confidences": 74.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008767361111111116, "completions/max_length": 2892.6, "completions/max_terminated_length": 2892.6, "completions/mean_length": 690.477685546875, "completions/mean_terminated_length": 696.5508056640625, "completions/min_length": 0.0, "completions/min_terminated_length": 157.6, "epoch": 0.491993850076874, "grad_norm": 0.0003582312201615423, "learning_rate": 9.036144578313253e-08, "loss": -0.0072, "num_tokens": 439872462.0, "reward": 1.092501163482666, "reward_std": 0.11384595781564713, "rewards/accuracy_reward": 0.7523437380790711, "rewards/brier_reward": 0.828589677810669, "rewards/confidence_uniqueness_reward": 0.9259551286697387, "rewards/format_reward": 0.9912326335906982, "rewards/frontier_aurc_reward": -0.00198111105710268, "rewards/frontier_coverage_0": -0.00452432045713067, "rewards/frontier_coverage_1": -0.00452432045713067, "rewards/frontier_coverage_10": -0.003602027613669634, "rewards/frontier_coverage_15": 0.012093347311019898, "rewards/frontier_coverage_20": 0.03151162005960941, "rewards/frontier_coverage_25": 0.09083455055952072, "rewards/frontier_coverage_5": -0.00452432045713067, "rewards/true_frontier_ece_gap_only_reward": -0.0032516193110495805, "signal/accuracy_reward/centered_abs_mean": 0.14517686665058135, "signal/accuracy_reward/group_std_mean": 0.19360876083374023, "signal/accuracy_reward/group_zero_std_frac": 0.4444444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07258843332529068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07258843332529068, "signal/advantage_abs_mean": 0.0818178191781044, "signal/advantage_pre_scale_abs_mean": 0.0818178191781044, "signal/advantage_pre_scale_std": 0.1523301661014557, "signal/advantage_std": 0.1523301661014557, "signal/brier_reward/centered_abs_mean": 0.1120417907834053, "signal/brier_reward/group_std_mean": 0.14704422652721405, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014005223847925663, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014005223847925663, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03881465494632721, "signal/confidence_uniqueness_reward/group_std_mean": 0.05692438259720802, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004851831868290901, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004851831868290901, "signal/format_reward/centered_abs_mean": 0.014360894355922938, "signal/format_reward/group_std_mean": 0.027210034802556037, "signal/format_reward/group_zero_std_frac": 0.8888888835906983, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007180447177961469, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007180447177961469, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026562759652733804, "signal/frontier_aurc_reward/group_std_mean": 0.005001515662297606, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.150431195739657e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.150431195739657e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.12333370298147202, "signal/frontier_coverage_0/group_std_mean": 0.16834968626499175, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019270891090855003, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019270891090855003, "signal/frontier_coverage_1/centered_abs_mean": 0.12333370298147202, "signal/frontier_coverage_1/group_std_mean": 0.16834968626499175, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019270891090855003, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019270891090855003, "signal/frontier_coverage_10/centered_abs_mean": 0.11967587620019912, "signal/frontier_coverage_10/group_std_mean": 0.1636903315782547, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018699355656281113, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018699355656281113, "signal/frontier_coverage_15/centered_abs_mean": 0.06968192905187606, "signal/frontier_coverage_15/group_std_mean": 0.09741113483905792, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010887801414355635, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010887801414355635, "signal/frontier_coverage_20/centered_abs_mean": 0.04491528794169426, "signal/frontier_coverage_20/group_std_mean": 0.060000843554735186, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007018013740889729, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007018013740889729, "signal/frontier_coverage_25/centered_abs_mean": 0.057969672977924346, "signal/frontier_coverage_25/group_std_mean": 0.0744464322924614, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009057761402800679, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009057761402800679, "signal/frontier_coverage_5/centered_abs_mean": 0.12333370298147202, "signal/frontier_coverage_5/group_std_mean": 0.16834968626499175, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019270891090855003, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019270891090855003, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0032607629895210267, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.004654625337570906, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00040759537369012834, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00040759537369012834, "step": 205 }, { "calibration/aurc": 0.1370533125922073, "calibration/batch_distribution_entropy": 0.7072075783416373, "calibration/batch_entropy_100bins": 0.7965424913173278, "calibration/batch_entropy_10bins": 0.7072075783416373, "calibration/batch_entropy_50bins": 0.7991626918689265, "calibration/batch_uniqueness": 0.9300988239372975, "calibration/buffer_distribution_entropy": 0.7932410370024624, "calibration/buffer_entropy_100bins": 0.8591231164142057, "calibration/buffer_entropy_10bins": 0.7932410370024624, "calibration/buffer_entropy_50bins": 0.8632449929488756, "calibration/confidence_entropy": 0.5134386595908284, "calibration/coverage@0%": 0.007853439020572171, "calibration/coverage@1%": 0.007853439020572171, "calibration/coverage@10%": 0.21872482437993335, "calibration/coverage@15%": 0.7426824379239719, "calibration/coverage@20%": 0.8840740183815656, "calibration/coverage@25%": 0.9695113893711355, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.04005535372901429, "calibration/distribution_entropy_10": 0.7072075783416373, "calibration/distribution_entropy_100": 0.7965424913173278, "calibration/ece": 0.07591815575628273, "calibration/mean_confidence": 0.7097033007115973, "calibration/unique_confidence_per_question": 0.18229166666666666, "calibration/unique_confidences": 70.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005353009259259227, "completions/max_length": 3599.0, "completions/max_terminated_length": 3599.0, "completions/mean_length": 694.3878784179688, "completions/mean_terminated_length": 698.0981852213541, "completions/min_length": 0.0, "completions/min_terminated_length": 190.66666666666666, "epoch": 0.49919376007799904, "num_tokens": 446538119.0, "reward": 1.0708950360616047, "reward_std": 0.11581993599732716, "rewards/accuracy_reward": 0.7042824029922485, "rewards/brier_reward": 0.8196952740351359, "rewards/confidence_uniqueness_reward": 0.9326375126838684, "rewards/format_reward": 0.9945023059844971, "rewards/frontier_aurc_reward": -0.002047328627668321, "rewards/frontier_coverage_0": 0.012841465882956982, "rewards/frontier_coverage_1": 0.012841465882956982, "rewards/frontier_coverage_10": 0.012699058279395103, "rewards/frontier_coverage_15": 0.017329357874890167, "rewards/frontier_coverage_20": 0.0331996213644743, "rewards/frontier_coverage_25": 0.0871302808324496, "rewards/frontier_coverage_5": 0.012841465882956982, "rewards/true_frontier_ece_gap_only_reward": -0.00366590932632486, "signal/accuracy_reward/centered_abs_mean": 0.15892650187015533, "signal/accuracy_reward/group_std_mean": 0.20545404652754465, "signal/accuracy_reward/group_zero_std_frac": 0.43981483578681946, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07946325093507767, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07946325093507767, "signal/advantage_abs_mean": 0.08434396361311276, "signal/advantage_pre_scale_abs_mean": 0.08434396361311276, "signal/advantage_pre_scale_std": 0.15035154422124228, "signal/advantage_std": 0.15035154422124228, "signal/brier_reward/centered_abs_mean": 0.11334347476561864, "signal/brier_reward/group_std_mean": 0.14777959883213043, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01416793434570233, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01416793434570233, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03333321896692117, "signal/confidence_uniqueness_reward/group_std_mean": 0.051341903706391655, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004166652370865147, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004166652370865147, "signal/format_reward/centered_abs_mean": 0.010308159670482079, "signal/format_reward/group_std_mean": 0.02368570367495219, "signal/format_reward/group_zero_std_frac": 0.8888888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005154079835241039, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005154079835241039, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027586812308679023, "signal/frontier_aurc_reward/group_std_mean": 0.0049862076217929525, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.310439423231097e-05, "signal/frontier_aurc_reward/weight": 0.015625, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.310439423231097e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1345667690038681, "signal/frontier_coverage_0/group_std_mean": 0.18024377524852753, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002102605765685439, "signal/frontier_coverage_0/weight": 0.015625, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002102605765685439, "signal/frontier_coverage_1/centered_abs_mean": 0.1345667690038681, "signal/frontier_coverage_1/group_std_mean": 0.18024377524852753, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002102605765685439, "signal/frontier_coverage_1/weight": 0.015625, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002102605765685439, "signal/frontier_coverage_10/centered_abs_mean": 0.12825309236844382, "signal/frontier_coverage_10/group_std_mean": 0.17192438741525015, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020039545682569346, "signal/frontier_coverage_10/weight": 0.015625, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020039545682569346, "signal/frontier_coverage_15/centered_abs_mean": 0.07049262523651123, "signal/frontier_coverage_15/group_std_mean": 0.09754702945550282, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001101447269320488, "signal/frontier_coverage_15/weight": 0.015625, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001101447269320488, "signal/frontier_coverage_20/centered_abs_mean": 0.04566365604599317, "signal/frontier_coverage_20/group_std_mean": 0.06064340099692345, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007134946257186433, "signal/frontier_coverage_20/weight": 0.015625, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007134946257186433, "signal/frontier_coverage_25/centered_abs_mean": 0.05877576395869255, "signal/frontier_coverage_25/group_std_mean": 0.07547732442617416, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009183713118545711, "signal/frontier_coverage_25/weight": 0.015625, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009183713118545711, "signal/frontier_coverage_5/centered_abs_mean": 0.1345667690038681, "signal/frontier_coverage_5/group_std_mean": 0.18024377524852753, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002102605765685439, "signal/frontier_coverage_5/weight": 0.015625, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002102605765685439, "signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0035362955338011184, "signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.0049490658566355705, "signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0, "signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0004420369417251398, "signal/true_frontier_ece_gap_only_reward/weight": 0.125, "signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0004420369417251398, "step": 208, "total_flos": 0.0, "train_loss": -0.00872318486038309, "train_runtime": 40755.4175, "train_samples_per_second": 0.368, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 208, "num_input_tokens_seen": 446538119, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }