Files
RLCR-v4-ks-uniqueness-cov0-…/trainer_state.json
ModelHub XC 56504f4f75 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-uniqueness-cov0-gapece-cold-math
Source: Original Platform
2026-05-09 19:32:44 +08:00

6892 lines
436 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.49919376007799904,
"eval_steps": 50,
"global_step": 208,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.4755090430338697,
"calibration/batch_distribution_entropy": 0.26199859861521857,
"calibration/batch_entropy_100bins": 0.3438930495423692,
"calibration/batch_entropy_10bins": 0.26199859861521857,
"calibration/batch_entropy_50bins": 0.3997214906203269,
"calibration/batch_uniqueness": 0.4832166822381069,
"calibration/confidence_entropy": 0.20919231184298712,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/distribution_entropy_10": 0.26199859861521857,
"calibration/distribution_entropy_100": 0.3438930495423692,
"calibration/ece": 0.4549087624937119,
"calibration/mean_confidence": 0.9209537398939647,
"calibration/unique_confidence_per_question": 0.03177083333333333,
"calibration/unique_confidences": 12.2,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.019357638888888907,
"completions/max_length": 3991.8,
"completions/max_terminated_length": 3991.8,
"completions/mean_length": 515.6087646484375,
"completions/mean_terminated_length": 525.7942260742187,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.011999850001874977,
"grad_norm": 0.004724407568573952,
"learning_rate": 5.952380952380953e-07,
"loss": 0.0065,
"num_tokens": 9054021.0,
"reward": 0.5780223369598388,
"reward_std": 0.5210743069648742,
"rewards/accuracy_reward": 0.26449652314186095,
"rewards/brier_reward": 0.3152239501476288,
"rewards/confidence_uniqueness_reward": 0.2885810971260071,
"rewards/format_reward": 0.6014756917953491,
"rewards/frontier_aurc_reward": 0.27824242115020753,
"rewards/frontier_coverage_0": 0.27824242115020753,
"rewards/frontier_coverage_1": 0.27824242115020753,
"rewards/frontier_coverage_10": 0.27824242115020753,
"rewards/frontier_coverage_15": 0.27824242115020753,
"rewards/frontier_coverage_20": 0.27824242115020753,
"rewards/frontier_coverage_25": 0.27824242115020753,
"rewards/frontier_coverage_5": 0.27824242115020753,
"rewards/true_frontier_ece_gap_only_reward": 0.27824242115020753,
"signal/accuracy_reward/centered_abs_mean": 0.31138780117034914,
"signal/accuracy_reward/group_std_mean": 0.37181236147880553,
"signal/accuracy_reward/group_zero_std_frac": 0.08055555745959282,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15569390058517457,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15569390058517457,
"signal/advantage_abs_mean": 0.4485911726951599,
"signal/advantage_pre_scale_abs_mean": 0.4485911726951599,
"signal/advantage_pre_scale_std": 0.5264933466911316,
"signal/advantage_std": 0.5264933466911316,
"signal/brier_reward/centered_abs_mean": 0.3207183539867401,
"signal/brier_reward/group_std_mean": 0.37424429655075075,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04008979424834251,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.04008979424834251,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.23610488772392274,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2880967080593109,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029513110965490343,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.029513110965490343,
"signal/format_reward/centered_abs_mean": 0.43846028447151186,
"signal/format_reward/group_std_mean": 0.4738844096660614,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.21923014223575593,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.21923014223575593,
"signal/frontier_aurc_reward/centered_abs_mean": 0.3114172875881195,
"signal/frontier_aurc_reward/group_std_mean": 0.36980949640274047,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.004865895118564367,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.004865895118564367,
"signal/frontier_coverage_0/centered_abs_mean": 0.3114172875881195,
"signal/frontier_coverage_0/group_std_mean": 0.36980949640274047,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004865895118564367,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004865895118564367,
"signal/frontier_coverage_1/centered_abs_mean": 0.3114172875881195,
"signal/frontier_coverage_1/group_std_mean": 0.36980949640274047,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004865895118564367,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004865895118564367,
"signal/frontier_coverage_10/centered_abs_mean": 0.3114172875881195,
"signal/frontier_coverage_10/group_std_mean": 0.36980949640274047,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004865895118564367,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004865895118564367,
"signal/frontier_coverage_15/centered_abs_mean": 0.3114172875881195,
"signal/frontier_coverage_15/group_std_mean": 0.36980949640274047,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004865895118564367,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004865895118564367,
"signal/frontier_coverage_20/centered_abs_mean": 0.3114172875881195,
"signal/frontier_coverage_20/group_std_mean": 0.36980949640274047,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004865895118564367,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004865895118564367,
"signal/frontier_coverage_25/centered_abs_mean": 0.3114172875881195,
"signal/frontier_coverage_25/group_std_mean": 0.36980949640274047,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004865895118564367,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004865895118564367,
"signal/frontier_coverage_5/centered_abs_mean": 0.3114172875881195,
"signal/frontier_coverage_5/group_std_mean": 0.36980949640274047,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004865895118564367,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004865895118564367,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.3114172875881195,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.36980949640274047,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.038927160948514936,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.038927160948514936,
"step": 5
},
{
"calibration/aurc": 0.5159006411390681,
"calibration/batch_distribution_entropy": 0.23845090979417666,
"calibration/batch_entropy_100bins": 0.33576880006525267,
"calibration/batch_entropy_10bins": 0.23845090979417666,
"calibration/batch_entropy_50bins": 0.3873401847509245,
"calibration/batch_uniqueness": 0.4823728144800886,
"calibration/confidence_entropy": 0.21192807755010623,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/distribution_entropy_10": 0.23845090979417666,
"calibration/distribution_entropy_100": 0.33576880006525267,
"calibration/ece": 0.48419175646218493,
"calibration/mean_confidence": 0.9245204458265471,
"calibration/unique_confidence_per_question": 0.03072916666666666,
"calibration/unique_confidences": 11.8,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.018750000000000024,
"completions/max_length": 4070.8,
"completions/max_terminated_length": 4070.8,
"completions/mean_length": 476.8085998535156,
"completions/mean_terminated_length": 486.0776733398437,
"completions/min_length": 0.0,
"completions/min_terminated_length": 19.8,
"epoch": 0.023999700003749954,
"grad_norm": 0.06719768047332764,
"learning_rate": 1.1904761904761906e-06,
"loss": 0.0027,
"num_tokens": 17629576.0,
"reward": 0.6722566485404968,
"reward_std": 0.48708855509758,
"rewards/accuracy_reward": 0.29557291269302366,
"rewards/brier_reward": 0.35851759910583497,
"rewards/confidence_uniqueness_reward": 0.3507663607597351,
"rewards/format_reward": 0.7157118082046509,
"rewards/frontier_aurc_reward": 0.3118152379989624,
"rewards/frontier_coverage_0": 0.3118152379989624,
"rewards/frontier_coverage_1": 0.3118152379989624,
"rewards/frontier_coverage_10": 0.3118152379989624,
"rewards/frontier_coverage_15": 0.3118152379989624,
"rewards/frontier_coverage_20": 0.3118152379989624,
"rewards/frontier_coverage_25": 0.3118152379989624,
"rewards/frontier_coverage_5": 0.3118152379989624,
"rewards/true_frontier_ece_gap_only_reward": 0.3118152379989624,
"signal/accuracy_reward/centered_abs_mean": 0.3207736611366272,
"signal/accuracy_reward/group_std_mean": 0.37864009737968446,
"signal/accuracy_reward/group_zero_std_frac": 0.08333333507180214,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1603868305683136,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1603868305683136,
"signal/advantage_abs_mean": 0.4065045177936554,
"signal/advantage_pre_scale_abs_mean": 0.4065045177936554,
"signal/advantage_pre_scale_std": 0.49197044372558596,
"signal/advantage_std": 0.49197044372558596,
"signal/brier_reward/centered_abs_mean": 0.31853480339050294,
"signal/brier_reward/group_std_mean": 0.37187020778656005,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03981685042381287,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.03981685042381287,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.21813510358333588,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2754356682300568,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027266887947916985,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.027266887947916985,
"signal/format_reward/centered_abs_mean": 0.353564453125,
"signal/format_reward/group_std_mean": 0.41884335279464724,
"signal/format_reward/group_zero_std_frac": 0.00555555559694767,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1767822265625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.1767822265625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.3167228579521179,
"signal/frontier_aurc_reward/group_std_mean": 0.37375251650810243,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.004948794655501842,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.004948794655501842,
"signal/frontier_coverage_0/centered_abs_mean": 0.3167228579521179,
"signal/frontier_coverage_0/group_std_mean": 0.37375251650810243,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004948794655501842,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004948794655501842,
"signal/frontier_coverage_1/centered_abs_mean": 0.3167228579521179,
"signal/frontier_coverage_1/group_std_mean": 0.37375251650810243,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004948794655501842,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004948794655501842,
"signal/frontier_coverage_10/centered_abs_mean": 0.3167228579521179,
"signal/frontier_coverage_10/group_std_mean": 0.37375251650810243,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004948794655501842,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004948794655501842,
"signal/frontier_coverage_15/centered_abs_mean": 0.3167228579521179,
"signal/frontier_coverage_15/group_std_mean": 0.37375251650810243,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004948794655501842,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004948794655501842,
"signal/frontier_coverage_20/centered_abs_mean": 0.3167228579521179,
"signal/frontier_coverage_20/group_std_mean": 0.37375251650810243,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004948794655501842,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004948794655501842,
"signal/frontier_coverage_25/centered_abs_mean": 0.3167228579521179,
"signal/frontier_coverage_25/group_std_mean": 0.37375251650810243,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004948794655501842,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004948794655501842,
"signal/frontier_coverage_5/centered_abs_mean": 0.3167228579521179,
"signal/frontier_coverage_5/group_std_mean": 0.37375251650810243,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004948794655501842,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004948794655501842,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.3167228579521179,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.37375251650810243,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.03959035724401474,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.03959035724401474,
"step": 10
},
{
"calibration/aurc": 0.5224229909940956,
"calibration/batch_distribution_entropy": 0.2877403602460552,
"calibration/batch_entropy_100bins": 0.3573186157704617,
"calibration/batch_entropy_10bins": 0.2877403602460552,
"calibration/batch_entropy_50bins": 0.41523268744576436,
"calibration/batch_uniqueness": 0.5167436160191917,
"calibration/confidence_entropy": 0.23103006074957716,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/distribution_entropy_10": 0.2877403602460552,
"calibration/distribution_entropy_100": 0.3573186157704617,
"calibration/ece": 0.4970179613664881,
"calibration/mean_confidence": 0.9129537058033463,
"calibration/unique_confidence_per_question": 0.036979166666666674,
"calibration/unique_confidences": 14.2,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010850694444444442,
"completions/max_length": 3950.0,
"completions/max_terminated_length": 3950.0,
"completions/mean_length": 433.64210815429686,
"completions/mean_terminated_length": 438.44232177734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 59.0,
"epoch": 0.03599955000562493,
"grad_norm": 0.0016660373657941818,
"learning_rate": 1.7857142857142859e-06,
"loss": -0.0113,
"num_tokens": 25727117.0,
"reward": 0.8379699349403381,
"reward_std": 0.3784303069114685,
"rewards/accuracy_reward": 0.32907986640930176,
"rewards/brier_reward": 0.431581848859787,
"rewards/confidence_uniqueness_reward": 0.5059985220432281,
"rewards/format_reward": 0.93359375,
"rewards/frontier_aurc_reward": 0.3577423691749573,
"rewards/frontier_coverage_0": 0.3577423691749573,
"rewards/frontier_coverage_1": 0.3577423691749573,
"rewards/frontier_coverage_10": 0.3577423691749573,
"rewards/frontier_coverage_15": 0.3577423691749573,
"rewards/frontier_coverage_20": 0.3577423691749573,
"rewards/frontier_coverage_25": 0.3577423691749573,
"rewards/frontier_coverage_5": 0.3577423691749573,
"rewards/true_frontier_ece_gap_only_reward": 0.3577423691749573,
"signal/accuracy_reward/centered_abs_mean": 0.3203721702098846,
"signal/accuracy_reward/group_std_mean": 0.37649917006492617,
"signal/accuracy_reward/group_zero_std_frac": 0.09722222462296486,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1601860851049423,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1601860851049423,
"signal/advantage_abs_mean": 0.30626789927482606,
"signal/advantage_pre_scale_abs_mean": 0.30626789927482606,
"signal/advantage_pre_scale_std": 0.388842511177063,
"signal/advantage_std": 0.388842511177063,
"signal/brier_reward/centered_abs_mean": 0.30102636218070983,
"signal/brier_reward/group_std_mean": 0.3518189787864685,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03762829527258873,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.03762829527258873,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.17830342054367065,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2288795828819275,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022287927567958832,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.022287927567958832,
"signal/format_reward/centered_abs_mean": 0.1138617604970932,
"signal/format_reward/group_std_mean": 0.19477857500314713,
"signal/format_reward/group_zero_std_frac": 0.2944444492459297,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0569308802485466,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0569308802485466,
"signal/frontier_aurc_reward/centered_abs_mean": 0.31336275935173036,
"signal/frontier_aurc_reward/group_std_mean": 0.3662181556224823,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.004896293114870787,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.004896293114870787,
"signal/frontier_coverage_0/centered_abs_mean": 0.31336275935173036,
"signal/frontier_coverage_0/group_std_mean": 0.3662181556224823,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004896293114870787,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004896293114870787,
"signal/frontier_coverage_1/centered_abs_mean": 0.31336275935173036,
"signal/frontier_coverage_1/group_std_mean": 0.3662181556224823,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004896293114870787,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004896293114870787,
"signal/frontier_coverage_10/centered_abs_mean": 0.31336275935173036,
"signal/frontier_coverage_10/group_std_mean": 0.3662181556224823,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004896293114870787,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004896293114870787,
"signal/frontier_coverage_15/centered_abs_mean": 0.31336275935173036,
"signal/frontier_coverage_15/group_std_mean": 0.3662181556224823,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004896293114870787,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004896293114870787,
"signal/frontier_coverage_20/centered_abs_mean": 0.31336275935173036,
"signal/frontier_coverage_20/group_std_mean": 0.3662181556224823,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004896293114870787,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004896293114870787,
"signal/frontier_coverage_25/centered_abs_mean": 0.31336275935173036,
"signal/frontier_coverage_25/group_std_mean": 0.3662181556224823,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004896293114870787,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004896293114870787,
"signal/frontier_coverage_5/centered_abs_mean": 0.31336275935173036,
"signal/frontier_coverage_5/group_std_mean": 0.3662181556224823,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004896293114870787,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004896293114870787,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.31336275935173036,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.3662181556224823,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.039170344918966295,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.039170344918966295,
"step": 15
},
{
"calibration/aurc": 0.44171379177515535,
"calibration/batch_distribution_entropy": 0.3765147360973443,
"calibration/batch_entropy_100bins": 0.39381071707061527,
"calibration/batch_entropy_10bins": 0.3765147360973443,
"calibration/batch_entropy_50bins": 0.4577038412907034,
"calibration/batch_uniqueness": 0.5961709299135531,
"calibration/buffer_distribution_entropy": 0.29230688761468687,
"calibration/buffer_entropy_100bins": 0.36493243936626785,
"calibration/buffer_entropy_10bins": 0.29230688761468687,
"calibration/buffer_entropy_50bins": 0.42293687132466956,
"calibration/confidence_entropy": 0.2898413619535891,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.034031413612565446,
"calibration/coverage@30%": 0.07905759162303665,
"calibration/coverage@5%": 0.0,
"calibration/distribution_entropy_10": 0.3765147360973443,
"calibration/distribution_entropy_100": 0.39381071707061527,
"calibration/ece": 0.3790869053038728,
"calibration/mean_confidence": 0.8925066797565309,
"calibration/unique_confidence_per_question": 0.035416666666666666,
"calibration/unique_confidences": 13.6,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010763888888888884,
"completions/max_length": 3739.4,
"completions/max_terminated_length": 3739.4,
"completions/mean_length": 471.2155456542969,
"completions/mean_terminated_length": 476.41375732421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 91.8,
"epoch": 0.04799940000749991,
"grad_norm": 0.0008532739011570811,
"learning_rate": 2.380952380952381e-06,
"loss": -0.0093,
"num_tokens": 34269216.0,
"reward": 0.8856567025184632,
"reward_std": 0.2739575058221817,
"rewards/accuracy_reward": 0.4450520873069763,
"rewards/brier_reward": 0.5553683876991272,
"rewards/confidence_uniqueness_reward": 0.5914790272712708,
"rewards/format_reward": 0.9831597328186035,
"rewards/frontier_aurc_reward": 0.1789298068732023,
"rewards/frontier_coverage_0": 0.18989355927333235,
"rewards/frontier_coverage_1": 0.18989355927333235,
"rewards/frontier_coverage_10": 0.18989355927333235,
"rewards/frontier_coverage_15": 0.18989355927333235,
"rewards/frontier_coverage_20": 0.18989355927333235,
"rewards/frontier_coverage_25": 0.18989355927333235,
"rewards/frontier_coverage_5": 0.18989355927333235,
"rewards/true_frontier_ece_gap_only_reward": 0.03703599572181702,
"signal/accuracy_reward/centered_abs_mean": 0.2997667074203491,
"signal/accuracy_reward/group_std_mean": 0.36736690402030947,
"signal/accuracy_reward/group_zero_std_frac": 0.06944444626569748,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14988335371017455,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14988335371017455,
"signal/advantage_abs_mean": 0.21930149793624878,
"signal/advantage_pre_scale_abs_mean": 0.21930149793624878,
"signal/advantage_pre_scale_std": 0.28236431181430816,
"signal/advantage_std": 0.28236431181430816,
"signal/brier_reward/centered_abs_mean": 0.2637813687324524,
"signal/brier_reward/group_std_mean": 0.3207400619983673,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03297267109155655,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.03297267109155655,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1622892886400223,
"signal/confidence_uniqueness_reward/group_std_mean": 0.19781720638275146,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020286161080002786,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020286161080002786,
"signal/format_reward/centered_abs_mean": 0.03038194477558136,
"signal/format_reward/group_std_mean": 0.06310995742678642,
"signal/format_reward/group_zero_std_frac": 0.7222222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01519097238779068,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01519097238779068,
"signal/frontier_aurc_reward/centered_abs_mean": 0.11971323965117335,
"signal/frontier_aurc_reward/group_std_mean": 0.14516795333474874,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0018705193695495836,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0018705193695495836,
"signal/frontier_coverage_0/centered_abs_mean": 0.13567787148058413,
"signal/frontier_coverage_0/group_std_mean": 0.1734710790216923,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002119966741884127,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002119966741884127,
"signal/frontier_coverage_1/centered_abs_mean": 0.13567787148058413,
"signal/frontier_coverage_1/group_std_mean": 0.1734710790216923,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002119966741884127,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002119966741884127,
"signal/frontier_coverage_10/centered_abs_mean": 0.13567787148058413,
"signal/frontier_coverage_10/group_std_mean": 0.1734710790216923,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002119966741884127,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002119966741884127,
"signal/frontier_coverage_15/centered_abs_mean": 0.13567787148058413,
"signal/frontier_coverage_15/group_std_mean": 0.1734710790216923,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002119966741884127,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002119966741884127,
"signal/frontier_coverage_20/centered_abs_mean": 0.13567787148058413,
"signal/frontier_coverage_20/group_std_mean": 0.1734710790216923,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002119966741884127,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002119966741884127,
"signal/frontier_coverage_25/centered_abs_mean": 0.13567787148058413,
"signal/frontier_coverage_25/group_std_mean": 0.1734710790216923,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002119966741884127,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002119966741884127,
"signal/frontier_coverage_5/centered_abs_mean": 0.13567787148058413,
"signal/frontier_coverage_5/group_std_mean": 0.1734710790216923,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002119966741884127,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002119966741884127,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.1349403366446495,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.16668230146169663,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.016867542080581187,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.016867542080581187,
"step": 20
},
{
"calibration/aurc": 0.3429455132381247,
"calibration/batch_distribution_entropy": 0.507851007126862,
"calibration/batch_entropy_100bins": 0.44183174453138746,
"calibration/batch_entropy_10bins": 0.507851007126862,
"calibration/batch_entropy_50bins": 0.514838438261066,
"calibration/batch_uniqueness": 0.6844458142688816,
"calibration/buffer_distribution_entropy": 0.3345262025303308,
"calibration/buffer_entropy_100bins": 0.3853994626063969,
"calibration/buffer_entropy_10bins": 0.3345262025303308,
"calibration/buffer_entropy_50bins": 0.44640286826484654,
"calibration/confidence_entropy": 0.34430819143240965,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.05654450261780105,
"calibration/coverage@20%": 0.09424083769633508,
"calibration/coverage@25%": 0.22486700447763291,
"calibration/coverage@30%": 0.43646112600536197,
"calibration/coverage@5%": 0.0,
"calibration/distribution_entropy_10": 0.507851007126862,
"calibration/distribution_entropy_100": 0.44183174453138746,
"calibration/ece": 0.26461993450442634,
"calibration/mean_confidence": 0.8628734329706985,
"calibration/unique_confidence_per_question": 0.0421875,
"calibration/unique_confidences": 16.2,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009895833333333348,
"completions/max_length": 4003.6,
"completions/max_terminated_length": 4003.6,
"completions/mean_length": 524.5051208496094,
"completions/mean_terminated_length": 529.7198486328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 107.6,
"epoch": 0.05999925000937488,
"grad_norm": 0.001525247236713767,
"learning_rate": 2.9761904761904763e-06,
"loss": -0.0065,
"num_tokens": 43435963.0,
"reward": 0.9095749855041504,
"reward_std": 0.21235645115375518,
"rewards/accuracy_reward": 0.5509548485279083,
"rewards/brier_reward": 0.6560544490814209,
"rewards/confidence_uniqueness_reward": 0.6728395104408265,
"rewards/format_reward": 0.9865451335906983,
"rewards/frontier_aurc_reward": -0.004245653562247753,
"rewards/frontier_coverage_0": 0.003410888835787773,
"rewards/frontier_coverage_1": 0.003410888835787773,
"rewards/frontier_coverage_10": 0.003410888835787773,
"rewards/frontier_coverage_15": 0.003410888835787773,
"rewards/frontier_coverage_20": 0.003410888835787773,
"rewards/frontier_coverage_25": 0.003410888835787773,
"rewards/frontier_coverage_5": 0.003410888835787773,
"rewards/true_frontier_ece_gap_only_reward": -0.20474808514118195,
"signal/accuracy_reward/centered_abs_mean": 0.26726887822151185,
"signal/accuracy_reward/group_std_mean": 0.333760267496109,
"signal/accuracy_reward/group_zero_std_frac": 0.12777777910232543,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.13363443911075593,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.13363443911075593,
"signal/advantage_abs_mean": 0.1648347020149231,
"signal/advantage_pre_scale_abs_mean": 0.1648347020149231,
"signal/advantage_pre_scale_std": 0.22817236185073853,
"signal/advantage_std": 0.22817236185073853,
"signal/brier_reward/centered_abs_mean": 0.21432596445083618,
"signal/brier_reward/group_std_mean": 0.2680306822061539,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026790745556354523,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.026790745556354523,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.12359119206666946,
"signal/confidence_uniqueness_reward/group_std_mean": 0.15292936861515044,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015448899008333683,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015448899008333683,
"signal/format_reward/centered_abs_mean": 0.024397786147892474,
"signal/format_reward/group_std_mean": 0.051703880354762075,
"signal/format_reward/group_zero_std_frac": 0.7694444537162781,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012198893073946237,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012198893073946237,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0031622422859072684,
"signal/frontier_aurc_reward/group_std_mean": 0.004759848862886429,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.941003571730107e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.941003571730107e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.03383462205529213,
"signal/frontier_coverage_0/group_std_mean": 0.0554510623216629,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0005286659696139395,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0005286659696139395,
"signal/frontier_coverage_1/centered_abs_mean": 0.03383462205529213,
"signal/frontier_coverage_1/group_std_mean": 0.0554510623216629,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0005286659696139395,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0005286659696139395,
"signal/frontier_coverage_10/centered_abs_mean": 0.03383462205529213,
"signal/frontier_coverage_10/group_std_mean": 0.0554510623216629,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0005286659696139395,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0005286659696139395,
"signal/frontier_coverage_15/centered_abs_mean": 0.03383462205529213,
"signal/frontier_coverage_15/group_std_mean": 0.0554510623216629,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0005286659696139395,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0005286659696139395,
"signal/frontier_coverage_20/centered_abs_mean": 0.03383462205529213,
"signal/frontier_coverage_20/group_std_mean": 0.0554510623216629,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005286659696139395,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005286659696139395,
"signal/frontier_coverage_25/centered_abs_mean": 0.03383462205529213,
"signal/frontier_coverage_25/group_std_mean": 0.0554510623216629,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005286659696139395,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005286659696139395,
"signal/frontier_coverage_5/centered_abs_mean": 0.03383462205529213,
"signal/frontier_coverage_5/group_std_mean": 0.0554510623216629,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0005286659696139395,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0005286659696139395,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.03566240519285202,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.04645907133817673,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0044578006491065025,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0044578006491065025,
"step": 25
},
{
"calibration/aurc": 0.3084429936468077,
"calibration/batch_distribution_entropy": 0.6357372712721332,
"calibration/batch_entropy_100bins": 0.4579116256178331,
"calibration/batch_entropy_10bins": 0.6357372712721332,
"calibration/batch_entropy_50bins": 0.5364450128476048,
"calibration/batch_uniqueness": 0.7160179009317768,
"calibration/buffer_distribution_entropy": 0.4059973934246511,
"calibration/buffer_entropy_100bins": 0.4151252075471626,
"calibration/buffer_entropy_10bins": 0.4059973934246511,
"calibration/buffer_entropy_50bins": 0.4819283775367138,
"calibration/confidence_entropy": 0.46384456781483224,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.005759162303664921,
"calibration/coverage@15%": 0.04192937506962237,
"calibration/coverage@20%": 0.04298485017266347,
"calibration/coverage@25%": 0.14497929495647485,
"calibration/coverage@30%": 0.48877619011061546,
"calibration/coverage@5%": 0.0,
"calibration/distribution_entropy_10": 0.6357372712721332,
"calibration/distribution_entropy_100": 0.4579116256178331,
"calibration/ece": 0.1707801967802031,
"calibration/mean_confidence": 0.7917407206813284,
"calibration/unique_confidence_per_question": 0.0359375,
"calibration/unique_confidences": 13.8,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015711805555555534,
"completions/max_length": 4078.4,
"completions/max_terminated_length": 4078.4,
"completions/mean_length": 598.9315185546875,
"completions/mean_terminated_length": 608.5085327148438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 125.6,
"epoch": 0.07199910001124986,
"grad_norm": 0.0004900748026557267,
"learning_rate": 3.5714285714285718e-06,
"loss": -0.0094,
"num_tokens": 53445574.0,
"reward": 0.9443552374839783,
"reward_std": 0.1934140741825104,
"rewards/accuracy_reward": 0.5914930582046509,
"rewards/brier_reward": 0.7071029067039489,
"rewards/confidence_uniqueness_reward": 0.6907591581344604,
"rewards/format_reward": 0.9809895753860474,
"rewards/frontier_aurc_reward": -0.0032980738673359157,
"rewards/frontier_coverage_0": -0.006392185157164931,
"rewards/frontier_coverage_1": -0.006392185157164931,
"rewards/frontier_coverage_10": -0.006392185157164931,
"rewards/frontier_coverage_15": -0.006392185157164931,
"rewards/frontier_coverage_20": -0.006392185157164931,
"rewards/frontier_coverage_25": -0.006392185157164931,
"rewards/frontier_coverage_5": -0.006392185157164931,
"rewards/true_frontier_ece_gap_only_reward": -0.1269455760717392,
"signal/accuracy_reward/centered_abs_mean": 0.23853081464767456,
"signal/accuracy_reward/group_std_mean": 0.2985024094581604,
"signal/accuracy_reward/group_zero_std_frac": 0.21111111491918563,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11926540732383728,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.11926540732383728,
"signal/advantage_abs_mean": 0.14833838045597075,
"signal/advantage_pre_scale_abs_mean": 0.14833838045597075,
"signal/advantage_pre_scale_std": 0.21972199380397797,
"signal/advantage_std": 0.21972199380397797,
"signal/brier_reward/centered_abs_mean": 0.17149352431297302,
"signal/brier_reward/group_std_mean": 0.21699302196502684,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021436690539121627,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021436690539121627,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.11525630950927734,
"signal/confidence_uniqueness_reward/group_std_mean": 0.14727450013160706,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.014407038688659668,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014407038688659668,
"signal/format_reward/centered_abs_mean": 0.03138563372194767,
"signal/format_reward/group_std_mean": 0.06032953634858131,
"signal/format_reward/group_zero_std_frac": 0.7444444537162781,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.015692816860973836,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.015692816860973836,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001896983222104609,
"signal/frontier_aurc_reward/group_std_mean": 0.0030401549767702816,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9640362845384517e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9640362845384517e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.04802608713507652,
"signal/frontier_coverage_0/group_std_mean": 0.06869390532374382,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0007504076114855706,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0007504076114855706,
"signal/frontier_coverage_1/centered_abs_mean": 0.04802608713507652,
"signal/frontier_coverage_1/group_std_mean": 0.06869390532374382,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0007504076114855706,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0007504076114855706,
"signal/frontier_coverage_10/centered_abs_mean": 0.04802608713507652,
"signal/frontier_coverage_10/group_std_mean": 0.06869390532374382,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0007504076114855706,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007504076114855706,
"signal/frontier_coverage_15/centered_abs_mean": 0.04802608713507652,
"signal/frontier_coverage_15/group_std_mean": 0.06869390532374382,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007504076114855706,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007504076114855706,
"signal/frontier_coverage_20/centered_abs_mean": 0.04802608713507652,
"signal/frontier_coverage_20/group_std_mean": 0.06869390532374382,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007504076114855706,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007504076114855706,
"signal/frontier_coverage_25/centered_abs_mean": 0.04802608713507652,
"signal/frontier_coverage_25/group_std_mean": 0.06869390532374382,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007504076114855706,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007504076114855706,
"signal/frontier_coverage_5/centered_abs_mean": 0.04802608713507652,
"signal/frontier_coverage_5/group_std_mean": 0.06869390532374382,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0007504076114855706,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0007504076114855706,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.03984055146574974,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.0505749449133873,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.004980068933218717,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.004980068933218717,
"step": 30
},
{
"calibration/aurc": 0.250599098148718,
"calibration/batch_distribution_entropy": 0.6323867153697071,
"calibration/batch_entropy_100bins": 0.42821853049324937,
"calibration/batch_entropy_10bins": 0.6323867153697071,
"calibration/batch_entropy_50bins": 0.5033903372089894,
"calibration/batch_uniqueness": 0.6367832585438589,
"calibration/buffer_distribution_entropy": 0.5053599848783283,
"calibration/buffer_entropy_100bins": 0.45108017532774436,
"calibration/buffer_entropy_10bins": 0.5053599848783283,
"calibration/buffer_entropy_50bins": 0.5249452336686906,
"calibration/confidence_entropy": 0.5795754506284359,
"calibration/coverage@0%": 0.01114940403252757,
"calibration/coverage@1%": 0.01114940403252757,
"calibration/coverage@10%": 0.023336877784522418,
"calibration/coverage@15%": 0.045042137534781396,
"calibration/coverage@20%": 0.21714213102924745,
"calibration/coverage@25%": 0.5910950020422562,
"calibration/coverage@30%": 0.8375,
"calibration/coverage@5%": 0.01114940403252757,
"calibration/distribution_entropy_10": 0.6323867153697071,
"calibration/distribution_entropy_100": 0.42821853049324937,
"calibration/ece": 0.10070792051637412,
"calibration/mean_confidence": 0.6918897482432823,
"calibration/unique_confidence_per_question": 0.0375,
"calibration/unique_confidences": 14.4,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017534722222222233,
"completions/max_length": 4016.6,
"completions/max_terminated_length": 4016.6,
"completions/mean_length": 656.1796997070312,
"completions/mean_terminated_length": 667.9824096679688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 189.6,
"epoch": 0.08399895001312484,
"grad_norm": 0.0004635561490431428,
"learning_rate": 4.166666666666667e-06,
"loss": -0.0114,
"num_tokens": 64082204.0,
"reward": 0.9670976400375366,
"reward_std": 0.17328265607357024,
"rewards/accuracy_reward": 0.6295138835906983,
"rewards/brier_reward": 0.7479526400566101,
"rewards/confidence_uniqueness_reward": 0.6298671245574952,
"rewards/format_reward": 0.9805555582046509,
"rewards/frontier_aurc_reward": -0.002723962301388383,
"rewards/frontier_coverage_0": -0.023209616425447166,
"rewards/frontier_coverage_1": -0.023209616425447166,
"rewards/frontier_coverage_10": -0.023209616425447166,
"rewards/frontier_coverage_15": -0.023209616425447166,
"rewards/frontier_coverage_20": -0.023209616425447166,
"rewards/frontier_coverage_25": -0.023209616425447166,
"rewards/frontier_coverage_5": -0.023209616425447166,
"rewards/true_frontier_ece_gap_only_reward": -0.060667777061462404,
"signal/accuracy_reward/centered_abs_mean": 0.21026475727558136,
"signal/accuracy_reward/group_std_mean": 0.26806623935699464,
"signal/accuracy_reward/group_zero_std_frac": 0.2722222238779068,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10513237863779068,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10513237863779068,
"signal/advantage_abs_mean": 0.13171991258859633,
"signal/advantage_pre_scale_abs_mean": 0.13171991258859633,
"signal/advantage_pre_scale_std": 0.19843848645687104,
"signal/advantage_std": 0.19843848645687104,
"signal/brier_reward/centered_abs_mean": 0.12574937492609023,
"signal/brier_reward/group_std_mean": 0.16387327909469604,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01571867186576128,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01571867186576128,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.17439252138137817,
"signal/confidence_uniqueness_reward/group_std_mean": 0.20724063515663146,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02179906517267227,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02179906517267227,
"signal/format_reward/centered_abs_mean": 0.03100043386220932,
"signal/format_reward/group_std_mean": 0.055044320225715634,
"signal/format_reward/group_zero_std_frac": 0.7833333492279053,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01550021693110466,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01550021693110466,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0009950165753252805,
"signal/frontier_aurc_reward/group_std_mean": 0.001629676064476371,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5547133989457508e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5547133989457508e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.07657658159732819,
"signal/frontier_coverage_0/group_std_mean": 0.09882448017597198,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001196509087458253,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001196509087458253,
"signal/frontier_coverage_1/centered_abs_mean": 0.07657658159732819,
"signal/frontier_coverage_1/group_std_mean": 0.09882448017597198,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001196509087458253,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001196509087458253,
"signal/frontier_coverage_10/centered_abs_mean": 0.07657658159732819,
"signal/frontier_coverage_10/group_std_mean": 0.09882448017597198,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001196509087458253,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001196509087458253,
"signal/frontier_coverage_15/centered_abs_mean": 0.07657658159732819,
"signal/frontier_coverage_15/group_std_mean": 0.09882448017597198,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001196509087458253,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001196509087458253,
"signal/frontier_coverage_20/centered_abs_mean": 0.07657658159732819,
"signal/frontier_coverage_20/group_std_mean": 0.09882448017597198,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001196509087458253,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001196509087458253,
"signal/frontier_coverage_25/centered_abs_mean": 0.07657658159732819,
"signal/frontier_coverage_25/group_std_mean": 0.09882448017597198,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001196509087458253,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001196509087458253,
"signal/frontier_coverage_5/centered_abs_mean": 0.07657658159732819,
"signal/frontier_coverage_5/group_std_mean": 0.09882448017597198,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001196509087458253,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001196509087458253,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.030044597759842872,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.04015489742159843,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.003755574719980359,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.003755574719980359,
"step": 35
},
{
"calibration/aurc": 0.33153256701336514,
"calibration/batch_distribution_entropy": 0.5854273557012318,
"calibration/batch_entropy_100bins": 0.4253124440379624,
"calibration/batch_entropy_10bins": 0.5854273557012318,
"calibration/batch_entropy_50bins": 0.4994252045234801,
"calibration/batch_uniqueness": 0.6444065784609718,
"calibration/buffer_distribution_entropy": 0.5848560770733751,
"calibration/buffer_entropy_100bins": 0.4806630298602917,
"calibration/buffer_entropy_10bins": 0.5848560770733751,
"calibration/buffer_entropy_50bins": 0.5604947004501042,
"calibration/confidence_entropy": 0.6171869593013805,
"calibration/coverage@0%": 0.006288407488631675,
"calibration/coverage@1%": 0.006288407488631675,
"calibration/coverage@10%": 0.006288407488631675,
"calibration/coverage@15%": 0.020387624198814444,
"calibration/coverage@20%": 0.020387624198814444,
"calibration/coverage@25%": 0.22147370367411084,
"calibration/coverage@30%": 0.2721291791843428,
"calibration/coverage@5%": 0.006288407488631675,
"calibration/distribution_entropy_10": 0.5854273557012318,
"calibration/distribution_entropy_100": 0.4253124440379624,
"calibration/ece": 0.09023499731095522,
"calibration/mean_confidence": 0.6499715924246285,
"calibration/unique_confidence_per_question": 0.03958333333333333,
"calibration/unique_confidences": 15.2,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012760416666666653,
"completions/max_length": 3917.2,
"completions/max_terminated_length": 3917.2,
"completions/mean_length": 703.5712768554688,
"completions/mean_terminated_length": 712.6527587890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 197.0,
"epoch": 0.09599880001499982,
"grad_norm": 0.0004154318303335458,
"learning_rate": 4.761904761904762e-06,
"loss": -0.0103,
"num_tokens": 75306865.0,
"reward": 0.9813725113868713,
"reward_std": 0.15861513316631318,
"rewards/accuracy_reward": 0.6460069417953491,
"rewards/brier_reward": 0.7578154802322388,
"rewards/confidence_uniqueness_reward": 0.629870867729187,
"rewards/format_reward": 0.985156238079071,
"rewards/frontier_aurc_reward": -0.002467139856889844,
"rewards/frontier_coverage_0": -0.03592981658875942,
"rewards/frontier_coverage_1": -0.03592981658875942,
"rewards/frontier_coverage_10": -0.03592981658875942,
"rewards/frontier_coverage_15": -0.03592981658875942,
"rewards/frontier_coverage_20": -0.03592981658875942,
"rewards/frontier_coverage_25": -0.03592981658875942,
"rewards/frontier_coverage_5": -0.03592981658875942,
"rewards/true_frontier_ece_gap_only_reward": -0.029612084105610847,
"signal/accuracy_reward/centered_abs_mean": 0.19372829794883728,
"signal/accuracy_reward/group_std_mean": 0.2543206661939621,
"signal/accuracy_reward/group_zero_std_frac": 0.286111119389534,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09686414897441864,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09686414897441864,
"signal/advantage_abs_mean": 0.11751253008842469,
"signal/advantage_pre_scale_abs_mean": 0.11751253008842469,
"signal/advantage_pre_scale_std": 0.186165389418602,
"signal/advantage_std": 0.186165389418602,
"signal/brier_reward/centered_abs_mean": 0.11083936840295791,
"signal/brier_reward/group_std_mean": 0.14510888755321502,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013854921050369739,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013854921050369739,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1665905848145485,
"signal/confidence_uniqueness_reward/group_std_mean": 0.1974548101425171,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020823823101818562,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020823823101818562,
"signal/format_reward/centered_abs_mean": 0.02540690116584301,
"signal/format_reward/group_std_mean": 0.04733345359563827,
"signal/format_reward/group_zero_std_frac": 0.8055555701255799,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012703450582921504,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012703450582921504,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0008696626755408943,
"signal/frontier_aurc_reward/group_std_mean": 0.0013225122122094036,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.3588479305326473e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.3588479305326473e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.09154021292924881,
"signal/frontier_coverage_0/group_std_mean": 0.11824491173028946,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014303158270195127,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014303158270195127,
"signal/frontier_coverage_1/centered_abs_mean": 0.09154021292924881,
"signal/frontier_coverage_1/group_std_mean": 0.11824491173028946,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014303158270195127,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014303158270195127,
"signal/frontier_coverage_10/centered_abs_mean": 0.09154021292924881,
"signal/frontier_coverage_10/group_std_mean": 0.11824491173028946,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014303158270195127,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014303158270195127,
"signal/frontier_coverage_15/centered_abs_mean": 0.09154021292924881,
"signal/frontier_coverage_15/group_std_mean": 0.11824491173028946,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014303158270195127,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014303158270195127,
"signal/frontier_coverage_20/centered_abs_mean": 0.09154021292924881,
"signal/frontier_coverage_20/group_std_mean": 0.11824491173028946,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014303158270195127,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014303158270195127,
"signal/frontier_coverage_25/centered_abs_mean": 0.09154021292924881,
"signal/frontier_coverage_25/group_std_mean": 0.11824491173028946,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014303158270195127,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014303158270195127,
"signal/frontier_coverage_5/centered_abs_mean": 0.09154021292924881,
"signal/frontier_coverage_5/group_std_mean": 0.11824491173028946,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014303158270195127,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014303158270195127,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.02095247954130173,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.03012901544570923,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.002619059942662716,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.002619059942662716,
"step": 40
},
{
"calibration/aurc": 0.20306210885118028,
"calibration/batch_distribution_entropy": 0.6887549737306013,
"calibration/batch_entropy_100bins": 0.4745983011730009,
"calibration/batch_entropy_10bins": 0.6887549737306013,
"calibration/batch_entropy_50bins": 0.5543744512155419,
"calibration/batch_uniqueness": 0.6833862362276909,
"calibration/buffer_distribution_entropy": 0.6364236887118315,
"calibration/buffer_entropy_100bins": 0.5046055763785582,
"calibration/buffer_entropy_10bins": 0.6364236887118315,
"calibration/buffer_entropy_50bins": 0.5889470060117332,
"calibration/confidence_entropy": 0.5883243712866314,
"calibration/coverage@0%": 0.014789383258954939,
"calibration/coverage@1%": 0.014789383258954939,
"calibration/coverage@10%": 0.057257313784553766,
"calibration/coverage@15%": 0.1596987720979517,
"calibration/coverage@20%": 0.4277598387998176,
"calibration/coverage@25%": 0.8117097398897354,
"calibration/coverage@30%": 0.9646739130434783,
"calibration/coverage@5%": 0.014789383258954939,
"calibration/distribution_entropy_10": 0.6887549737306013,
"calibration/distribution_entropy_100": 0.4745983011730009,
"calibration/ece": 0.07153040992310632,
"calibration/mean_confidence": 0.6690040608217023,
"calibration/unique_confidence_per_question": 0.06197916666666666,
"calibration/unique_confidences": 23.8,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015104166666666651,
"completions/max_length": 3774.0,
"completions/max_terminated_length": 3774.0,
"completions/mean_length": 735.8694580078125,
"completions/mean_terminated_length": 747.207177734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 239.4,
"epoch": 0.1079986500168748,
"grad_norm": 0.0004037077887915075,
"learning_rate": 4.909638554216868e-06,
"loss": -0.0114,
"num_tokens": 86919345.0,
"reward": 0.9962880134582519,
"reward_std": 0.15157434940338135,
"rewards/accuracy_reward": 0.659375,
"rewards/brier_reward": 0.7679201841354371,
"rewards/confidence_uniqueness_reward": 0.677590298652649,
"rewards/format_reward": 0.9844618201255798,
"rewards/frontier_aurc_reward": -0.002255662181414664,
"rewards/frontier_coverage_0": -0.028669605404138564,
"rewards/frontier_coverage_1": -0.028669605404138564,
"rewards/frontier_coverage_10": -0.028669605404138564,
"rewards/frontier_coverage_15": -0.028669605404138564,
"rewards/frontier_coverage_20": -0.028669605404138564,
"rewards/frontier_coverage_25": -0.028669605404138564,
"rewards/frontier_coverage_5": -0.028669605404138564,
"rewards/true_frontier_ece_gap_only_reward": -0.02518573999404907,
"signal/accuracy_reward/centered_abs_mean": 0.1809027761220932,
"signal/accuracy_reward/group_std_mean": 0.24004943072795867,
"signal/accuracy_reward/group_zero_std_frac": 0.31666667461395265,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0904513880610466,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0904513880610466,
"signal/advantage_abs_mean": 0.11121969670057297,
"signal/advantage_pre_scale_abs_mean": 0.11121969670057297,
"signal/advantage_pre_scale_std": 0.1811255246400833,
"signal/advantage_std": 0.1811255246400833,
"signal/brier_reward/centered_abs_mean": 0.11833977550268174,
"signal/brier_reward/group_std_mean": 0.1528250217437744,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014792471937835217,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014792471937835217,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.15492647886276245,
"signal/confidence_uniqueness_reward/group_std_mean": 0.18688772320747377,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019365809857845306,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.019365809857845306,
"signal/format_reward/centered_abs_mean": 0.02518988773226738,
"signal/format_reward/group_std_mean": 0.04387797862291336,
"signal/format_reward/group_zero_std_frac": 0.8305555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01259494386613369,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01259494386613369,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012202380341477693,
"signal/frontier_aurc_reward/group_std_mean": 0.001992561621591449,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9066219283558895e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9066219283558895e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.09129920750856399,
"signal/frontier_coverage_0/group_std_mean": 0.12056645601987839,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014265501173213123,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014265501173213123,
"signal/frontier_coverage_1/centered_abs_mean": 0.09129920750856399,
"signal/frontier_coverage_1/group_std_mean": 0.12056645601987839,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014265501173213123,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014265501173213123,
"signal/frontier_coverage_10/centered_abs_mean": 0.09129920750856399,
"signal/frontier_coverage_10/group_std_mean": 0.12056645601987839,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014265501173213123,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014265501173213123,
"signal/frontier_coverage_15/centered_abs_mean": 0.09129920750856399,
"signal/frontier_coverage_15/group_std_mean": 0.12056645601987839,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014265501173213123,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014265501173213123,
"signal/frontier_coverage_20/centered_abs_mean": 0.09129920750856399,
"signal/frontier_coverage_20/group_std_mean": 0.12056645601987839,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014265501173213123,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014265501173213123,
"signal/frontier_coverage_25/centered_abs_mean": 0.09129920750856399,
"signal/frontier_coverage_25/group_std_mean": 0.12056645601987839,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014265501173213123,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014265501173213123,
"signal/frontier_coverage_5/centered_abs_mean": 0.09129920750856399,
"signal/frontier_coverage_5/group_std_mean": 0.12056645601987839,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014265501173213123,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014265501173213123,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.021791164949536323,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.03154192678630352,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0027238956186920404,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0027238956186920404,
"step": 45
},
{
"calibration/aurc": 0.39036076960907307,
"calibration/batch_distribution_entropy": 0.7862094401826989,
"calibration/batch_entropy_100bins": 0.6110341298979247,
"calibration/batch_entropy_10bins": 0.7862094401826989,
"calibration/batch_entropy_50bins": 0.6934297857057619,
"calibration/batch_uniqueness": 0.8247381911119701,
"calibration/buffer_distribution_entropy": 0.6734957884749172,
"calibration/buffer_entropy_100bins": 0.5282027696652414,
"calibration/buffer_entropy_10bins": 0.6734957884749172,
"calibration/buffer_entropy_50bins": 0.6156049219048293,
"calibration/confidence_entropy": 0.5949417037140304,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.003655352480417755,
"calibration/coverage@20%": 0.03593048475555003,
"calibration/coverage@25%": 0.0391050879301532,
"calibration/coverage@30%": 0.2087096269843971,
"calibration/coverage@5%": 0.0,
"calibration/distribution_entropy_10": 0.7862094401826989,
"calibration/distribution_entropy_100": 0.6110341298979247,
"calibration/ece": 0.12655827505964012,
"calibration/mean_confidence": 0.6225528034108609,
"calibration/unique_confidence_per_question": 0.10677083333333334,
"calibration/unique_confidences": 41.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011979166666666652,
"completions/max_length": 3659.4,
"completions/max_terminated_length": 3659.4,
"completions/mean_length": 737.67744140625,
"completions/mean_terminated_length": 746.5686889648438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 213.2,
"epoch": 0.11999850001874976,
"grad_norm": 0.0004018676117993891,
"learning_rate": 4.759036144578314e-06,
"loss": -0.0112,
"num_tokens": 98514989.0,
"reward": 1.000908660888672,
"reward_std": 0.14541010558605194,
"rewards/accuracy_reward": 0.6365451455116272,
"rewards/brier_reward": 0.7578566431999206,
"rewards/confidence_uniqueness_reward": 0.8036070704460144,
"rewards/format_reward": 0.9878472208976745,
"rewards/frontier_aurc_reward": -0.0020980457309633495,
"rewards/frontier_coverage_0": -0.028162200190126895,
"rewards/frontier_coverage_1": -0.028162200190126895,
"rewards/frontier_coverage_10": -0.028162200190126895,
"rewards/frontier_coverage_15": -0.028162200190126895,
"rewards/frontier_coverage_20": -0.028162200190126895,
"rewards/frontier_coverage_25": -0.028162200190126895,
"rewards/frontier_coverage_5": -0.028162200190126895,
"rewards/true_frontier_ece_gap_only_reward": -0.026859960705041885,
"signal/accuracy_reward/centered_abs_mean": 0.1797797292470932,
"signal/accuracy_reward/group_std_mean": 0.2389494448900223,
"signal/accuracy_reward/group_zero_std_frac": 0.3166666805744171,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0898898646235466,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0898898646235466,
"signal/advantage_abs_mean": 0.105811907351017,
"signal/advantage_pre_scale_abs_mean": 0.105811907351017,
"signal/advantage_pre_scale_std": 0.1739170879125595,
"signal/advantage_std": 0.1739170879125595,
"signal/brier_reward/centered_abs_mean": 0.12420935779809952,
"signal/brier_reward/group_std_mean": 0.16102492213249206,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01552616972476244,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01552616972476244,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.10721739381551743,
"signal/confidence_uniqueness_reward/group_std_mean": 0.13165029883384705,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013402174226939678,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013402174226939678,
"signal/format_reward/centered_abs_mean": 0.02109375,
"signal/format_reward/group_std_mean": 0.03988752476871014,
"signal/format_reward/group_zero_std_frac": 0.8361111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010546875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010546875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016509333858266474,
"signal/frontier_aurc_reward/group_std_mean": 0.0027218869887292384,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5795834153541365e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5795834153541365e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.11604345738887786,
"signal/frontier_coverage_0/group_std_mean": 0.1531293898820877,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018131790217012166,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018131790217012166,
"signal/frontier_coverage_1/centered_abs_mean": 0.11604345738887786,
"signal/frontier_coverage_1/group_std_mean": 0.1531293898820877,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018131790217012166,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018131790217012166,
"signal/frontier_coverage_10/centered_abs_mean": 0.11604345738887786,
"signal/frontier_coverage_10/group_std_mean": 0.1531293898820877,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018131790217012166,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018131790217012166,
"signal/frontier_coverage_15/centered_abs_mean": 0.11604345738887786,
"signal/frontier_coverage_15/group_std_mean": 0.1531293898820877,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018131790217012166,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018131790217012166,
"signal/frontier_coverage_20/centered_abs_mean": 0.11604345738887786,
"signal/frontier_coverage_20/group_std_mean": 0.1531293898820877,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018131790217012166,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018131790217012166,
"signal/frontier_coverage_25/centered_abs_mean": 0.11604345738887786,
"signal/frontier_coverage_25/group_std_mean": 0.1531293898820877,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018131790217012166,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018131790217012166,
"signal/frontier_coverage_5/centered_abs_mean": 0.11604345738887786,
"signal/frontier_coverage_5/group_std_mean": 0.1531293898820877,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018131790217012166,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018131790217012166,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.02609681598842144,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.03738295584917069,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00326210199855268,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00326210199855268,
"step": 50
},
{
"epoch": 0.11999850001874976,
"eval_completions/clipped_ratio": 0.013888888888888895,
"eval_completions/max_length": 1763.6666666666667,
"eval_completions/max_terminated_length": 1763.6666666666667,
"eval_completions/mean_length": 710.3668518066406,
"eval_completions/mean_terminated_length": 720.3878885904948,
"eval_completions/min_length": 65.33333333333333,
"eval_completions/min_terminated_length": 283.0,
"eval_loss": 0.0,
"eval_num_tokens": 98514989.0,
"eval_reward": 1.0152363975842793,
"eval_reward_std": 0.25018754849831265,
"eval_rewards/accuracy_reward": 0.671875,
"eval_rewards/brier_reward": 0.7494580149650574,
"eval_rewards/confidence_uniqueness_reward": 0.8206921716531118,
"eval_rewards/format_reward": 0.984375,
"eval_rewards/frontier_aurc_reward": -0.0020469005879325173,
"eval_rewards/frontier_coverage_0": -0.05519990002115568,
"eval_rewards/frontier_coverage_1": -0.05519990002115568,
"eval_rewards/frontier_coverage_10": -0.05519990002115568,
"eval_rewards/frontier_coverage_15": -0.05519990002115568,
"eval_rewards/frontier_coverage_20": -0.05519990002115568,
"eval_rewards/frontier_coverage_25": -0.05519990002115568,
"eval_rewards/frontier_coverage_5": -0.05519990002115568,
"eval_rewards/true_frontier_ece_gap_only_reward": -0.024703877978026867,
"eval_runtime": 205.888,
"eval_samples_per_second": 4.857,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4297960052887599,
"eval_signal/accuracy_reward/group_std_mean": 0.4702196568250656,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21489800264437994,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21489800264437994,
"eval_signal/advantage_abs_mean": 0.21398618072271347,
"eval_signal/advantage_pre_scale_abs_mean": 0.21398618072271347,
"eval_signal/advantage_pre_scale_std": 0.2496974691748619,
"eval_signal/advantage_std": 0.2496974691748619,
"eval_signal/brier_reward/centered_abs_mean": 0.14907778551181158,
"eval_signal/brier_reward/group_std_mean": 0.20007833590110144,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018634723188976448,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.018634723188976448,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.09062439575791359,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.12857400501767793,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011328049469739199,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011328049469739199,
"eval_signal/format_reward/centered_abs_mean": 0.029947916977107525,
"eval_signal/format_reward/group_std_mean": 0.0794201207657655,
"eval_signal/format_reward/group_zero_std_frac": 0.5833333432674408,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.014973958488553762,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.014973958488553762,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0015480444611360629,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.003234441547344128,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4188194705250982e-05,
"eval_signal/frontier_aurc_reward/weight": 0.015625,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4188194705250982e-05,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.20009989539782205,
"eval_signal/frontier_coverage_0/group_std_mean": 0.26012368500232697,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0031265608655909696,
"eval_signal/frontier_coverage_0/weight": 0.015625,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0031265608655909696,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.20009989539782205,
"eval_signal/frontier_coverage_1/group_std_mean": 0.26012368500232697,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031265608655909696,
"eval_signal/frontier_coverage_1/weight": 0.015625,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031265608655909696,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.20009989539782205,
"eval_signal/frontier_coverage_10/group_std_mean": 0.26012368500232697,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031265608655909696,
"eval_signal/frontier_coverage_10/weight": 0.015625,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031265608655909696,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.20009989539782205,
"eval_signal/frontier_coverage_15/group_std_mean": 0.26012368500232697,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031265608655909696,
"eval_signal/frontier_coverage_15/weight": 0.015625,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031265608655909696,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.20009989539782205,
"eval_signal/frontier_coverage_20/group_std_mean": 0.26012368500232697,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031265608655909696,
"eval_signal/frontier_coverage_20/weight": 0.015625,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031265608655909696,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.20009989539782205,
"eval_signal/frontier_coverage_25/group_std_mean": 0.26012368500232697,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031265608655909696,
"eval_signal/frontier_coverage_25/weight": 0.015625,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031265608655909696,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.20009989539782205,
"eval_signal/frontier_coverage_5/group_std_mean": 0.26012368500232697,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031265608655909696,
"eval_signal/frontier_coverage_5/weight": 0.015625,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031265608655909696,
"eval_signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.025939644935230415,
"eval_signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.04151128667096297,
"eval_signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"eval_signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.003242455616903802,
"eval_signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"eval_signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.003242455616903802,
"eval_steps_per_second": 0.029,
"step": 50
},
{
"epoch": 0.11999850001874976,
"step": 50,
"train_probe_completions/clipped_ratio": 0.013715277777777776,
"train_probe_completions/max_length": 2407.6666666666665,
"train_probe_completions/max_terminated_length": 2407.6666666666665,
"train_probe_completions/mean_length": 720.0277811686198,
"train_probe_completions/mean_terminated_length": 730.0629069010416,
"train_probe_completions/min_length": 0.0,
"train_probe_completions/min_terminated_length": 217.16666666666666,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 98514989.0,
"train_probe_reward": 1.0258092880249023,
"train_probe_reward_std": 0.24107951919237772,
"train_probe_rewards/accuracy_reward": 0.6814236144224802,
"train_probe_rewards/brier_reward": 0.7728658020496368,
"train_probe_rewards/confidence_uniqueness_reward": 0.825143297513326,
"train_probe_rewards/format_reward": 0.9869791666666666,
"train_probe_rewards/frontier_aurc_reward": -0.0015906431168938677,
"train_probe_rewards/frontier_coverage_0": -0.041856971802189946,
"train_probe_rewards/frontier_coverage_1": -0.041856971802189946,
"train_probe_rewards/frontier_coverage_10": -0.041856971802189946,
"train_probe_rewards/frontier_coverage_15": -0.041856971802189946,
"train_probe_rewards/frontier_coverage_20": -0.041856971802189946,
"train_probe_rewards/frontier_coverage_25": -0.041856971802189946,
"train_probe_rewards/frontier_coverage_5": -0.041856971802189946,
"train_probe_rewards/true_frontier_ece_gap_only_reward": -0.028322534635663033,
"train_probe_runtime": 203.2167,
"train_probe_samples_per_second": 4.921,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.4200846354166667,
"train_probe_signal/accuracy_reward/group_std_mean": 0.46454379459222156,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21004231770833334,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.21004231770833334,
"train_probe_signal/advantage_abs_mean": 0.20514148473739624,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.20514148473739624,
"train_probe_signal/advantage_pre_scale_std": 0.24087134500344595,
"train_probe_signal/advantage_std": 0.24087134500344595,
"train_probe_signal/brier_reward/centered_abs_mean": 0.1424630656838417,
"train_probe_signal/brier_reward/group_std_mean": 0.189873273173968,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017807883210480213,
"train_probe_signal/brier_reward/weight": 0.125,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.017807883210480213,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.09026772528886795,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.1251646839082241,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011283465661108494,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011283465661108494,
"train_probe_signal/format_reward/centered_abs_mean": 0.025010850746184587,
"train_probe_signal/format_reward/group_std_mean": 0.06767813768237829,
"train_probe_signal/format_reward/group_zero_std_frac": 0.6388889054457346,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.012505425373092294,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.012505425373092294,
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0015710045505935948,
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0031676616442079344,
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.454694610302492e-05,
"train_probe_signal/frontier_aurc_reward/weight": 0.015625,
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.454694610302492e-05,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.2021650398770968,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.266250138481458,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0031588287480796375,
"train_probe_signal/frontier_coverage_0/weight": 0.015625,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0031588287480796375,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.2021650398770968,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.266250138481458,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031588287480796375,
"train_probe_signal/frontier_coverage_1/weight": 0.015625,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031588287480796375,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.2021650398770968,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.266250138481458,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031588287480796375,
"train_probe_signal/frontier_coverage_10/weight": 0.015625,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031588287480796375,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.2021650398770968,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.266250138481458,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031588287480796375,
"train_probe_signal/frontier_coverage_15/weight": 0.015625,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031588287480796375,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.2021650398770968,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.266250138481458,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031588287480796375,
"train_probe_signal/frontier_coverage_20/weight": 0.015625,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031588287480796375,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.2021650398770968,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.266250138481458,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031588287480796375,
"train_probe_signal/frontier_coverage_25/weight": 0.015625,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031588287480796375,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.2021650398770968,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.266250138481458,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031588287480796375,
"train_probe_signal/frontier_coverage_5/weight": 0.015625,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031588287480796375,
"train_probe_signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.02938245516270399,
"train_probe_signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.04563416292270025,
"train_probe_signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"train_probe_signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.003672806895337999,
"train_probe_signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"train_probe_signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.003672806895337999,
"train_probe_steps_per_second": 0.03
},
{
"calibration/aurc": 0.30551913172089973,
"calibration/batch_distribution_entropy": 0.8602443760010654,
"calibration/batch_entropy_100bins": 0.736749609443292,
"calibration/batch_entropy_10bins": 0.8602443760010654,
"calibration/batch_entropy_50bins": 0.8128175073562515,
"calibration/batch_uniqueness": 0.9014400949713448,
"calibration/buffer_distribution_entropy": 0.7210148263994508,
"calibration/buffer_entropy_100bins": 0.5682523629363754,
"calibration/buffer_entropy_10bins": 0.7210148263994508,
"calibration/buffer_entropy_50bins": 0.6571883430263827,
"calibration/confidence_entropy": 0.5935431137939096,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.062436579806646866,
"calibration/coverage@20%": 0.27323636460432554,
"calibration/coverage@25%": 0.43223015091863515,
"calibration/coverage@30%": 0.5437253937007874,
"calibration/coverage@5%": 0.0,
"calibration/distribution_entropy_10": 0.8602443760010654,
"calibration/distribution_entropy_100": 0.736749609443292,
"calibration/ece": 0.1438151896191343,
"calibration/mean_confidence": 0.5757818524322698,
"calibration/unique_confidence_per_question": 0.17395833333333335,
"calibration/unique_confidences": 66.8,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.014496527777777768,
"completions/max_length": 3464.4,
"completions/max_terminated_length": 3464.4,
"completions/mean_length": 735.163037109375,
"completions/mean_terminated_length": 746.039599609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 199.2,
"epoch": 0.13199835002062474,
"grad_norm": 0.0003842144215013832,
"learning_rate": 4.60843373493976e-06,
"loss": -0.0115,
"num_tokens": 110064643.0,
"reward": 1.0145560026168823,
"reward_std": 0.13707308173179628,
"rewards/accuracy_reward": 0.649218738079071,
"rewards/brier_reward": 0.7490468740463256,
"rewards/confidence_uniqueness_reward": 0.8952165722846985,
"rewards/format_reward": 0.9855034708976745,
"rewards/frontier_aurc_reward": -0.001880918419919908,
"rewards/frontier_coverage_0": -0.042861418426036836,
"rewards/frontier_coverage_1": -0.042861418426036836,
"rewards/frontier_coverage_10": -0.042861418426036836,
"rewards/frontier_coverage_15": -0.042861418426036836,
"rewards/frontier_coverage_20": -0.042861418426036836,
"rewards/frontier_coverage_25": -0.042861418426036836,
"rewards/frontier_coverage_5": -0.042861418426036836,
"rewards/true_frontier_ece_gap_only_reward": -0.028965843096375465,
"signal/accuracy_reward/centered_abs_mean": 0.17503797709941865,
"signal/accuracy_reward/group_std_mean": 0.22645011842250823,
"signal/accuracy_reward/group_zero_std_frac": 0.37500000596046446,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08751898854970933,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08751898854970933,
"signal/advantage_abs_mean": 0.10272245854139328,
"signal/advantage_pre_scale_abs_mean": 0.10272245854139328,
"signal/advantage_pre_scale_std": 0.16644595563411713,
"signal/advantage_std": 0.16644595563411713,
"signal/brier_reward/centered_abs_mean": 0.1416968137025833,
"signal/brier_reward/group_std_mean": 0.1823040783405304,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017712101712822913,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017712101712822913,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06181478276848793,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08373434096574783,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0077268478460609915,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0077268478460609915,
"signal/format_reward/centered_abs_mean": 0.022303602285683156,
"signal/format_reward/group_std_mean": 0.03928558751940727,
"signal/format_reward/group_zero_std_frac": 0.8472222208976745,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011151801142841578,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011151801142841578,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001758693833835423,
"signal/frontier_aurc_reward/group_std_mean": 0.0033450972754508258,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7479591153678484e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7479591153678484e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.16558919548988343,
"signal/frontier_coverage_0/group_std_mean": 0.21207553446292876,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025873311795294287,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025873311795294287,
"signal/frontier_coverage_1/centered_abs_mean": 0.16558919548988343,
"signal/frontier_coverage_1/group_std_mean": 0.21207553446292876,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025873311795294287,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025873311795294287,
"signal/frontier_coverage_10/centered_abs_mean": 0.16558919548988343,
"signal/frontier_coverage_10/group_std_mean": 0.21207553446292876,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025873311795294287,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025873311795294287,
"signal/frontier_coverage_15/centered_abs_mean": 0.16558919548988343,
"signal/frontier_coverage_15/group_std_mean": 0.21207553446292876,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025873311795294287,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025873311795294287,
"signal/frontier_coverage_20/centered_abs_mean": 0.16558919548988343,
"signal/frontier_coverage_20/group_std_mean": 0.21207553446292876,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025873311795294287,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025873311795294287,
"signal/frontier_coverage_25/centered_abs_mean": 0.16558919548988343,
"signal/frontier_coverage_25/group_std_mean": 0.21207553446292876,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025873311795294287,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025873311795294287,
"signal/frontier_coverage_5/centered_abs_mean": 0.16558919548988343,
"signal/frontier_coverage_5/group_std_mean": 0.21207553446292876,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025873311795294287,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025873311795294287,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.026407453045248986,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.038250190764665605,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0033009316306561232,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0033009316306561232,
"step": 55
},
{
"calibration/aurc": 0.3289672986883583,
"calibration/batch_distribution_entropy": 0.8569993905562576,
"calibration/batch_entropy_100bins": 0.7871838425661777,
"calibration/batch_entropy_10bins": 0.8569993905562576,
"calibration/batch_entropy_50bins": 0.8397181934509937,
"calibration/batch_uniqueness": 0.9215534730345482,
"calibration/buffer_distribution_entropy": 0.7594609879037215,
"calibration/buffer_entropy_100bins": 0.6073773228204736,
"calibration/buffer_entropy_10bins": 0.7594609879037215,
"calibration/buffer_entropy_50bins": 0.6953888937595927,
"calibration/confidence_entropy": 0.5982193870283902,
"calibration/coverage@0%": 0.004736842105263158,
"calibration/coverage@1%": 0.004736842105263158,
"calibration/coverage@10%": 0.090082667401488,
"calibration/coverage@15%": 0.20160099200881784,
"calibration/coverage@20%": 0.3591953706255167,
"calibration/coverage@25%": 0.4010801873794434,
"calibration/coverage@30%": 0.47177459355194273,
"calibration/coverage@5%": 0.004736842105263158,
"calibration/distribution_entropy_10": 0.8569993905562576,
"calibration/distribution_entropy_100": 0.7871838425661777,
"calibration/ece": 0.19379714223757522,
"calibration/mean_confidence": 0.5902312308233718,
"calibration/unique_confidence_per_question": 0.2088541666666667,
"calibration/unique_confidences": 80.2,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01519097222222221,
"completions/max_length": 3634.8,
"completions/max_terminated_length": 3634.8,
"completions/mean_length": 732.9686767578125,
"completions/mean_terminated_length": 744.3640380859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 216.0,
"epoch": 0.14399820002249972,
"grad_norm": 0.0004008902469649911,
"learning_rate": 4.457831325301205e-06,
"loss": -0.0135,
"num_tokens": 121605018.0,
"reward": 1.0072944164276123,
"reward_std": 0.15234946310520173,
"rewards/accuracy_reward": 0.6269965291023254,
"rewards/brier_reward": 0.7545630693435669,
"rewards/confidence_uniqueness_reward": 0.9078129887580871,
"rewards/format_reward": 0.9844618082046509,
"rewards/frontier_aurc_reward": -0.0017762274481356144,
"rewards/frontier_coverage_0": -0.019636033568531275,
"rewards/frontier_coverage_1": -0.019636033568531275,
"rewards/frontier_coverage_10": -0.019636033568531275,
"rewards/frontier_coverage_15": -0.019636033568531275,
"rewards/frontier_coverage_20": -0.019636033568531275,
"rewards/frontier_coverage_25": -0.019636033568531275,
"rewards/frontier_coverage_5": -0.019636033568531275,
"rewards/true_frontier_ece_gap_only_reward": -0.032450299337506296,
"signal/accuracy_reward/centered_abs_mean": 0.19198676347732543,
"signal/accuracy_reward/group_std_mean": 0.24801050424575805,
"signal/accuracy_reward/group_zero_std_frac": 0.3194444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09599338173866272,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09599338173866272,
"signal/advantage_abs_mean": 0.11215932667255402,
"signal/advantage_pre_scale_abs_mean": 0.11215932667255402,
"signal/advantage_pre_scale_std": 0.17976947426795958,
"signal/advantage_std": 0.17976947426795958,
"signal/brier_reward/centered_abs_mean": 0.14585065245628356,
"signal/brier_reward/group_std_mean": 0.18760787844657897,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018231331557035445,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.018231331557035445,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06241054162383079,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09066012054681778,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007801317702978849,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007801317702978849,
"signal/format_reward/centered_abs_mean": 0.025927734375,
"signal/format_reward/group_std_mean": 0.05031422972679138,
"signal/format_reward/group_zero_std_frac": 0.7861111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0129638671875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0129638671875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002507622819393873,
"signal/frontier_aurc_reward/group_std_mean": 0.004252730589359999,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.918160655302927e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.918160655302927e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.15634405016899108,
"signal/frontier_coverage_0/group_std_mean": 0.20499549806118011,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024428757838904857,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024428757838904857,
"signal/frontier_coverage_1/centered_abs_mean": 0.15634405016899108,
"signal/frontier_coverage_1/group_std_mean": 0.20499549806118011,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024428757838904857,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024428757838904857,
"signal/frontier_coverage_10/centered_abs_mean": 0.15634405016899108,
"signal/frontier_coverage_10/group_std_mean": 0.20499549806118011,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024428757838904857,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024428757838904857,
"signal/frontier_coverage_15/centered_abs_mean": 0.15634405016899108,
"signal/frontier_coverage_15/group_std_mean": 0.20499549806118011,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024428757838904857,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024428757838904857,
"signal/frontier_coverage_20/centered_abs_mean": 0.15634405016899108,
"signal/frontier_coverage_20/group_std_mean": 0.20499549806118011,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024428757838904857,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024428757838904857,
"signal/frontier_coverage_25/centered_abs_mean": 0.15634405016899108,
"signal/frontier_coverage_25/group_std_mean": 0.20499549806118011,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024428757838904857,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024428757838904857,
"signal/frontier_coverage_5/centered_abs_mean": 0.15634405016899108,
"signal/frontier_coverage_5/group_std_mean": 0.20499549806118011,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024428757838904857,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024428757838904857,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.029747573658823967,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.04069453105330467,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.003718446707352996,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.003718446707352996,
"step": 60
},
{
"calibration/aurc": 0.27763096415418204,
"calibration/batch_distribution_entropy": 0.8015767599827139,
"calibration/batch_entropy_100bins": 0.7749875634194134,
"calibration/batch_entropy_10bins": 0.8015767599827139,
"calibration/batch_entropy_50bins": 0.8129389718529344,
"calibration/batch_uniqueness": 0.9117152414913241,
"calibration/buffer_distribution_entropy": 0.7814829282050988,
"calibration/buffer_entropy_100bins": 0.6385089459762243,
"calibration/buffer_entropy_10bins": 0.7814829282050988,
"calibration/buffer_entropy_50bins": 0.7230478450681841,
"calibration/confidence_entropy": 0.5883703123093914,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.005774278215223097,
"calibration/coverage@15%": 0.2747384205246596,
"calibration/coverage@20%": 0.5393372634703384,
"calibration/coverage@25%": 0.6022603839441535,
"calibration/coverage@30%": 0.7034666666666667,
"calibration/coverage@5%": 0.0,
"calibration/distribution_entropy_10": 0.8015767599827139,
"calibration/distribution_entropy_100": 0.7749875634194134,
"calibration/ece": 0.13966504962011914,
"calibration/mean_confidence": 0.639796467979542,
"calibration/unique_confidence_per_question": 0.18958333333333333,
"calibration/unique_confidences": 72.8,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013368055555555581,
"completions/max_length": 3414.8,
"completions/max_terminated_length": 3414.8,
"completions/mean_length": 708.5317016601563,
"completions/mean_terminated_length": 718.0807495117188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 209.4,
"epoch": 0.1559980500243747,
"grad_norm": 0.0004595128120854497,
"learning_rate": 4.307228915662651e-06,
"loss": -0.0113,
"num_tokens": 132861351.0,
"reward": 1.0280081152915954,
"reward_std": 0.13622777462005614,
"rewards/accuracy_reward": 0.6611979246139527,
"rewards/brier_reward": 0.7732649683952332,
"rewards/confidence_uniqueness_reward": 0.9014915823936462,
"rewards/format_reward": 0.9864583373069763,
"rewards/frontier_aurc_reward": -0.0018865561811253428,
"rewards/frontier_coverage_0": -0.024941197596490383,
"rewards/frontier_coverage_1": -0.024941197596490383,
"rewards/frontier_coverage_10": -0.024941197596490383,
"rewards/frontier_coverage_15": -0.024941197596490383,
"rewards/frontier_coverage_20": -0.024941197596490383,
"rewards/frontier_coverage_25": -0.024941197596490383,
"rewards/frontier_coverage_5": -0.024941197596490383,
"rewards/true_frontier_ece_gap_only_reward": -0.019257388636469842,
"signal/accuracy_reward/centered_abs_mean": 0.16076931357383728,
"signal/accuracy_reward/group_std_mean": 0.21609613299369812,
"signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08038465678691864,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08038465678691864,
"signal/advantage_abs_mean": 0.09798353910446167,
"signal/advantage_pre_scale_abs_mean": 0.09798353910446167,
"signal/advantage_pre_scale_std": 0.1702731281518936,
"signal/advantage_std": 0.1702731281518936,
"signal/brier_reward/centered_abs_mean": 0.11966974288225174,
"signal/brier_reward/group_std_mean": 0.1555977314710617,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014958717860281467,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014958717860281467,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07107813656330109,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09667231291532516,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008884767070412636,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008884767070412636,
"signal/format_reward/centered_abs_mean": 0.02330729179084301,
"signal/format_reward/group_std_mean": 0.043983825296163556,
"signal/format_reward/group_zero_std_frac": 0.8222222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011653645895421505,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011653645895421505,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014523085206747054,
"signal/frontier_aurc_reward/group_std_mean": 0.0024229245027527213,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2692320635542272e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2692320635542272e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.10854218900203705,
"signal/frontier_coverage_0/group_std_mean": 0.14894305765628815,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016959717031568289,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016959717031568289,
"signal/frontier_coverage_1/centered_abs_mean": 0.10854218900203705,
"signal/frontier_coverage_1/group_std_mean": 0.14894305765628815,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016959717031568289,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016959717031568289,
"signal/frontier_coverage_10/centered_abs_mean": 0.10854218900203705,
"signal/frontier_coverage_10/group_std_mean": 0.14894305765628815,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016959717031568289,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016959717031568289,
"signal/frontier_coverage_15/centered_abs_mean": 0.10854218900203705,
"signal/frontier_coverage_15/group_std_mean": 0.14894305765628815,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016959717031568289,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016959717031568289,
"signal/frontier_coverage_20/centered_abs_mean": 0.10854218900203705,
"signal/frontier_coverage_20/group_std_mean": 0.14894305765628815,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016959717031568289,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016959717031568289,
"signal/frontier_coverage_25/centered_abs_mean": 0.10854218900203705,
"signal/frontier_coverage_25/group_std_mean": 0.14894305765628815,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016959717031568289,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016959717031568289,
"signal/frontier_coverage_5/centered_abs_mean": 0.10854218900203705,
"signal/frontier_coverage_5/group_std_mean": 0.14894305765628815,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016959717031568289,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016959717031568289,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0190825667232275,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.02821722887456417,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0023853208404034376,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0023853208404034376,
"step": 65
},
{
"calibration/aurc": 0.2942477916112748,
"calibration/batch_distribution_entropy": 0.7283107979539343,
"calibration/batch_entropy_100bins": 0.7784214972740136,
"calibration/batch_entropy_10bins": 0.7283107979539343,
"calibration/batch_entropy_50bins": 0.7914896701898086,
"calibration/batch_uniqueness": 0.9236069702763444,
"calibration/buffer_distribution_entropy": 0.794101813716716,
"calibration/buffer_entropy_100bins": 0.6672103969274696,
"calibration/buffer_entropy_10bins": 0.794101813716716,
"calibration/buffer_entropy_50bins": 0.746071032044594,
"calibration/confidence_entropy": 0.5846905333336511,
"calibration/coverage@0%": 0.0199668754084534,
"calibration/coverage@1%": 0.0199668754084534,
"calibration/coverage@10%": 0.05303774155018569,
"calibration/coverage@15%": 0.07666255160682335,
"calibration/coverage@20%": 0.23386797400915085,
"calibration/coverage@25%": 0.3064000917049651,
"calibration/coverage@30%": 0.49787412509417434,
"calibration/coverage@5%": 0.0330902349885059,
"calibration/distribution_entropy_10": 0.7283107979539343,
"calibration/distribution_entropy_100": 0.7784214972740136,
"calibration/ece": 0.091677989362732,
"calibration/mean_confidence": 0.6715764005231176,
"calibration/unique_confidence_per_question": 0.171875,
"calibration/unique_confidences": 66.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010156250000000023,
"completions/max_length": 3671.2,
"completions/max_terminated_length": 3671.2,
"completions/mean_length": 689.2970703125,
"completions/mean_terminated_length": 696.3696044921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 175.0,
"epoch": 0.16799790002624967,
"grad_norm": 0.00040550867561250925,
"learning_rate": 4.156626506024097e-06,
"loss": -0.0087,
"num_tokens": 143880197.0,
"reward": 1.0235817909240723,
"reward_std": 0.13608680069446563,
"rewards/accuracy_reward": 0.6411458253860474,
"rewards/brier_reward": 0.771563458442688,
"rewards/confidence_uniqueness_reward": 0.9243876576423645,
"rewards/format_reward": 0.9894965291023254,
"rewards/frontier_aurc_reward": -0.0020083141047507525,
"rewards/frontier_coverage_0": -0.014133398490957915,
"rewards/frontier_coverage_1": -0.014133398490957915,
"rewards/frontier_coverage_10": -0.014133398490957915,
"rewards/frontier_coverage_15": -0.014133398490957915,
"rewards/frontier_coverage_20": -0.014133398490957915,
"rewards/frontier_coverage_25": -0.014133398490957915,
"rewards/frontier_coverage_5": -0.014133398490957915,
"rewards/true_frontier_ece_gap_only_reward": -0.017248846217989923,
"signal/accuracy_reward/centered_abs_mean": 0.1690972238779068,
"signal/accuracy_reward/group_std_mean": 0.2230025738477707,
"signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0845486119389534,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0845486119389534,
"signal/advantage_abs_mean": 0.09818485230207444,
"signal/advantage_pre_scale_abs_mean": 0.09818485230207444,
"signal/advantage_pre_scale_std": 0.16889992356300354,
"signal/advantage_std": 0.16889992356300354,
"signal/brier_reward/centered_abs_mean": 0.11540952920913697,
"signal/brier_reward/group_std_mean": 0.1515179991722107,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014426191151142121,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014426191151142121,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.047419081628322604,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07018115222454072,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0059273852035403255,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0059273852035403255,
"signal/format_reward/centered_abs_mean": 0.01873372420668602,
"signal/format_reward/group_std_mean": 0.03724060095846653,
"signal/format_reward/group_zero_std_frac": 0.8416666626930237,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00936686210334301,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00936686210334301,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013382966397330164,
"signal/frontier_aurc_reward/group_std_mean": 0.0020241386722773314,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0910884995828382e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0910884995828382e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.10258018672466278,
"signal/frontier_coverage_0/group_std_mean": 0.14305810928344725,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001602815417572856,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001602815417572856,
"signal/frontier_coverage_1/centered_abs_mean": 0.10258018672466278,
"signal/frontier_coverage_1/group_std_mean": 0.14305810928344725,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001602815417572856,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001602815417572856,
"signal/frontier_coverage_10/centered_abs_mean": 0.10258018672466278,
"signal/frontier_coverage_10/group_std_mean": 0.14305810928344725,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001602815417572856,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001602815417572856,
"signal/frontier_coverage_15/centered_abs_mean": 0.10258018672466278,
"signal/frontier_coverage_15/group_std_mean": 0.14305810928344725,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001602815417572856,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001602815417572856,
"signal/frontier_coverage_20/centered_abs_mean": 0.10258018672466278,
"signal/frontier_coverage_20/group_std_mean": 0.14305810928344725,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001602815417572856,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001602815417572856,
"signal/frontier_coverage_25/centered_abs_mean": 0.10258018672466278,
"signal/frontier_coverage_25/group_std_mean": 0.14305810928344725,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001602815417572856,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001602815417572856,
"signal/frontier_coverage_5/centered_abs_mean": 0.10258018672466278,
"signal/frontier_coverage_5/group_std_mean": 0.14305810928344725,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001602815417572856,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001602815417572856,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.016778473183512686,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.023937665671110154,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.002097309147939086,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.002097309147939086,
"step": 70
},
{
"calibration/aurc": 0.26809441942504275,
"calibration/batch_distribution_entropy": 0.6838393931393899,
"calibration/batch_entropy_100bins": 0.7729783643288141,
"calibration/batch_entropy_10bins": 0.6838393931393899,
"calibration/batch_entropy_50bins": 0.7797124119634354,
"calibration/batch_uniqueness": 0.9257592753961467,
"calibration/buffer_distribution_entropy": 0.8014343512506427,
"calibration/buffer_entropy_100bins": 0.6965847069678281,
"calibration/buffer_entropy_10bins": 0.8014343512506427,
"calibration/buffer_entropy_50bins": 0.7671058988973287,
"calibration/confidence_entropy": 0.5651903684955479,
"calibration/coverage@0%": 0.007869979733363341,
"calibration/coverage@1%": 0.007869979733363341,
"calibration/coverage@10%": 0.007869979733363341,
"calibration/coverage@15%": 0.22780781645114204,
"calibration/coverage@20%": 0.257160283163038,
"calibration/coverage@25%": 0.46021289135147203,
"calibration/coverage@30%": 0.6366343893697362,
"calibration/coverage@5%": 0.007869979733363341,
"calibration/distribution_entropy_10": 0.6838393931393899,
"calibration/distribution_entropy_100": 0.7729783643288141,
"calibration/ece": 0.11109141408517027,
"calibration/mean_confidence": 0.7044527597089597,
"calibration/unique_confidence_per_question": 0.16354166666666664,
"calibration/unique_confidences": 62.8,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00703125,
"completions/max_length": 3003.8,
"completions/max_terminated_length": 3003.8,
"completions/mean_length": 662.574658203125,
"completions/mean_terminated_length": 667.2730590820313,
"completions/min_length": 0.0,
"completions/min_terminated_length": 191.8,
"epoch": 0.17999775002812465,
"grad_norm": 0.000408834166591987,
"learning_rate": 4.006024096385543e-06,
"loss": -0.0048,
"num_tokens": 154577953.0,
"reward": 1.052880334854126,
"reward_std": 0.13121603578329086,
"rewards/accuracy_reward": 0.6934027791023254,
"rewards/brier_reward": 0.7911527037620545,
"rewards/confidence_uniqueness_reward": 0.9237256646156311,
"rewards/format_reward": 0.9928819537162781,
"rewards/frontier_aurc_reward": -0.001983778248541057,
"rewards/frontier_coverage_0": -0.02244817279279232,
"rewards/frontier_coverage_1": -0.02244817279279232,
"rewards/frontier_coverage_10": -0.02244817279279232,
"rewards/frontier_coverage_15": -0.02244817279279232,
"rewards/frontier_coverage_20": -0.02244817279279232,
"rewards/frontier_coverage_25": -0.02244817279279232,
"rewards/frontier_coverage_5": -0.02244817279279232,
"rewards/true_frontier_ece_gap_only_reward": -0.01708451323211193,
"signal/accuracy_reward/centered_abs_mean": 0.17001952826976777,
"signal/accuracy_reward/group_std_mean": 0.22074966430664061,
"signal/accuracy_reward/group_zero_std_frac": 0.38333333730697633,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08500976413488388,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08500976413488388,
"signal/advantage_abs_mean": 0.09748768210411071,
"signal/advantage_pre_scale_abs_mean": 0.09748768210411071,
"signal/advantage_pre_scale_std": 0.16811644434928893,
"signal/advantage_std": 0.16811644434928893,
"signal/brier_reward/centered_abs_mean": 0.110137939453125,
"signal/brier_reward/group_std_mean": 0.14241448044776917,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013767242431640625,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013767242431640625,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0404249906539917,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05972475409507751,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005053123831748963,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005053123831748963,
"signal/format_reward/centered_abs_mean": 0.012988281436264515,
"signal/format_reward/group_std_mean": 0.02832689881324768,
"signal/format_reward/group_zero_std_frac": 0.8694444537162781,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0064941407181322575,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0064941407181322575,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014353625476360321,
"signal/frontier_aurc_reward/group_std_mean": 0.002100939303636551,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2427539806813002e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2427539806813002e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.07951787561178207,
"signal/frontier_coverage_0/group_std_mean": 0.1129288211464882,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012424668064340949,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012424668064340949,
"signal/frontier_coverage_1/centered_abs_mean": 0.07951787561178207,
"signal/frontier_coverage_1/group_std_mean": 0.1129288211464882,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012424668064340949,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012424668064340949,
"signal/frontier_coverage_10/centered_abs_mean": 0.07951787561178207,
"signal/frontier_coverage_10/group_std_mean": 0.1129288211464882,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012424668064340949,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012424668064340949,
"signal/frontier_coverage_15/centered_abs_mean": 0.07951787561178207,
"signal/frontier_coverage_15/group_std_mean": 0.1129288211464882,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012424668064340949,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012424668064340949,
"signal/frontier_coverage_20/centered_abs_mean": 0.07951787561178207,
"signal/frontier_coverage_20/group_std_mean": 0.1129288211464882,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012424668064340949,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012424668064340949,
"signal/frontier_coverage_25/centered_abs_mean": 0.07951787561178207,
"signal/frontier_coverage_25/group_std_mean": 0.1129288211464882,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012424668064340949,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012424668064340949,
"signal/frontier_coverage_5/centered_abs_mean": 0.07951787561178207,
"signal/frontier_coverage_5/group_std_mean": 0.1129288211464882,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012424668064340949,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012424668064340949,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.016805017180740834,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.023270204663276672,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0021006271475926042,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0021006271475926042,
"step": 75
},
{
"calibration/aurc": 0.21834242019243638,
"calibration/batch_distribution_entropy": 0.6654728425240195,
"calibration/batch_entropy_100bins": 0.7709733639402805,
"calibration/batch_entropy_10bins": 0.6654728425240195,
"calibration/batch_entropy_50bins": 0.7749415689063472,
"calibration/batch_uniqueness": 0.9253690248872815,
"calibration/buffer_distribution_entropy": 0.8056100350450295,
"calibration/buffer_entropy_100bins": 0.7215675673965605,
"calibration/buffer_entropy_10bins": 0.8056100350450295,
"calibration/buffer_entropy_50bins": 0.7838614791193015,
"calibration/confidence_entropy": 0.545574600148968,
"calibration/coverage@0%": 0.02220438511574777,
"calibration/coverage@1%": 0.02220438511574777,
"calibration/coverage@10%": 0.23966102127612107,
"calibration/coverage@15%": 0.34606994679725095,
"calibration/coverage@20%": 0.5504157493778901,
"calibration/coverage@25%": 0.6507980412356587,
"calibration/coverage@30%": 0.7648198585964071,
"calibration/coverage@5%": 0.07932105432846319,
"calibration/distribution_entropy_10": 0.6654728425240195,
"calibration/distribution_entropy_100": 0.7709733639402805,
"calibration/ece": 0.12195687052074375,
"calibration/mean_confidence": 0.7164405583247829,
"calibration/unique_confidence_per_question": 0.1703125,
"calibration/unique_confidences": 65.4,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01137152777777779,
"completions/max_length": 3956.8,
"completions/max_terminated_length": 3956.8,
"completions/mean_length": 682.140283203125,
"completions/mean_terminated_length": 690.0593139648438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 193.0,
"epoch": 0.19199760002999963,
"grad_norm": 0.0004445287340786308,
"learning_rate": 3.855421686746989e-06,
"loss": -0.0085,
"num_tokens": 165489489.0,
"reward": 1.0320314049720765,
"reward_std": 0.1364010527729988,
"rewards/accuracy_reward": 0.6573784589767456,
"rewards/brier_reward": 0.7777002453804016,
"rewards/confidence_uniqueness_reward": 0.919914448261261,
"rewards/format_reward": 0.9884548544883728,
"rewards/frontier_aurc_reward": -0.002187199471518397,
"rewards/frontier_coverage_0": -0.009425394237041473,
"rewards/frontier_coverage_1": -0.009425394237041473,
"rewards/frontier_coverage_10": -0.009425394237041473,
"rewards/frontier_coverage_15": -0.009425394237041473,
"rewards/frontier_coverage_20": -0.009425394237041473,
"rewards/frontier_coverage_25": -0.009425394237041473,
"rewards/frontier_coverage_5": -0.009425394237041473,
"rewards/true_frontier_ece_gap_only_reward": -0.016176528483629226,
"signal/accuracy_reward/centered_abs_mean": 0.16724717915058135,
"signal/accuracy_reward/group_std_mean": 0.22416210770606995,
"signal/accuracy_reward/group_zero_std_frac": 0.35,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08362358957529067,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08362358957529067,
"signal/advantage_abs_mean": 0.09906900972127915,
"signal/advantage_pre_scale_abs_mean": 0.09906900972127915,
"signal/advantage_pre_scale_std": 0.16992701590061188,
"signal/advantage_std": 0.16992701590061188,
"signal/brier_reward/centered_abs_mean": 0.11396953910589218,
"signal/brier_reward/group_std_mean": 0.14948717951774598,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014246192388236522,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014246192388236522,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.041234496235847476,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06095789596438408,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0051543120294809345,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0051543120294809345,
"signal/format_reward/centered_abs_mean": 0.01700846329331398,
"signal/format_reward/group_std_mean": 0.03214373588562012,
"signal/format_reward/group_zero_std_frac": 0.8666666746139526,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00850423164665699,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00850423164665699,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015931333182379603,
"signal/frontier_aurc_reward/group_std_mean": 0.0023470679763704537,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.489270809746813e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.489270809746813e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.07731934040784835,
"signal/frontier_coverage_0/group_std_mean": 0.10898190438747406,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012081146938726305,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012081146938726305,
"signal/frontier_coverage_1/centered_abs_mean": 0.07731934040784835,
"signal/frontier_coverage_1/group_std_mean": 0.10898190438747406,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012081146938726305,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012081146938726305,
"signal/frontier_coverage_10/centered_abs_mean": 0.07731934040784835,
"signal/frontier_coverage_10/group_std_mean": 0.10898190438747406,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012081146938726305,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012081146938726305,
"signal/frontier_coverage_15/centered_abs_mean": 0.07731934040784835,
"signal/frontier_coverage_15/group_std_mean": 0.10898190438747406,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012081146938726305,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012081146938726305,
"signal/frontier_coverage_20/centered_abs_mean": 0.07731934040784835,
"signal/frontier_coverage_20/group_std_mean": 0.10898190438747406,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012081146938726305,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012081146938726305,
"signal/frontier_coverage_25/centered_abs_mean": 0.07731934040784835,
"signal/frontier_coverage_25/group_std_mean": 0.10898190438747406,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012081146938726305,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012081146938726305,
"signal/frontier_coverage_5/centered_abs_mean": 0.07731934040784835,
"signal/frontier_coverage_5/group_std_mean": 0.10898190438747406,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012081146938726305,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012081146938726305,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.015519179962575435,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.021731919422745705,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0019398974953219294,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0019398974953219294,
"step": 80
},
{
"calibration/aurc": 0.223200967338523,
"calibration/batch_distribution_entropy": 0.7470308632593083,
"calibration/batch_entropy_100bins": 0.8097957063944881,
"calibration/batch_entropy_10bins": 0.7470308632593083,
"calibration/batch_entropy_50bins": 0.8235600550504187,
"calibration/batch_uniqueness": 0.9389559027110078,
"calibration/buffer_distribution_entropy": 0.810030947507659,
"calibration/buffer_entropy_100bins": 0.7424741081330337,
"calibration/buffer_entropy_10bins": 0.810030947507659,
"calibration/buffer_entropy_50bins": 0.7981797753927561,
"calibration/confidence_entropy": 0.5852915959437024,
"calibration/coverage@0%": 0.009550042580097244,
"calibration/coverage@1%": 0.009550042580097244,
"calibration/coverage@10%": 0.13673516904381933,
"calibration/coverage@15%": 0.25685554970229674,
"calibration/coverage@20%": 0.41418009626638747,
"calibration/coverage@25%": 0.6514727170160993,
"calibration/coverage@30%": 0.8271656731884673,
"calibration/coverage@5%": 0.04056608536084591,
"calibration/distribution_entropy_10": 0.7470308632593083,
"calibration/distribution_entropy_100": 0.8097957063944881,
"calibration/ece": 0.09254755119264485,
"calibration/mean_confidence": 0.6642736722712816,
"calibration/unique_confidence_per_question": 0.1880208333333333,
"calibration/unique_confidences": 72.2,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008159722222222231,
"completions/max_length": 3769.2,
"completions/max_terminated_length": 3769.2,
"completions/mean_length": 685.8019897460938,
"completions/mean_terminated_length": 691.5063598632812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 200.0,
"epoch": 0.2039974500318746,
"grad_norm": 0.00041797629091888666,
"learning_rate": 3.7048192771084342e-06,
"loss": -0.0068,
"num_tokens": 176477128.0,
"reward": 1.0475164890289306,
"reward_std": 0.1313488855957985,
"rewards/accuracy_reward": 0.6796006917953491,
"rewards/brier_reward": 0.7933130860328674,
"rewards/confidence_uniqueness_reward": 0.9283158540725708,
"rewards/format_reward": 0.9916666746139526,
"rewards/frontier_aurc_reward": -0.0017628843430429696,
"rewards/frontier_coverage_0": -0.01574636101722717,
"rewards/frontier_coverage_1": -0.01574636101722717,
"rewards/frontier_coverage_10": -0.01574636101722717,
"rewards/frontier_coverage_15": -0.01574636101722717,
"rewards/frontier_coverage_20": -0.01574636101722717,
"rewards/frontier_coverage_25": -0.01574636101722717,
"rewards/frontier_coverage_5": -0.01574636101722717,
"rewards/true_frontier_ece_gap_only_reward": -0.01256832219660282,
"signal/accuracy_reward/centered_abs_mean": 0.1685926616191864,
"signal/accuracy_reward/group_std_mean": 0.22146643698215485,
"signal/accuracy_reward/group_zero_std_frac": 0.36666667461395264,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0842963308095932,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0842963308095932,
"signal/advantage_abs_mean": 0.096477310359478,
"signal/advantage_pre_scale_abs_mean": 0.096477310359478,
"signal/advantage_pre_scale_std": 0.16670409142971038,
"signal/advantage_std": 0.16670409142971038,
"signal/brier_reward/centered_abs_mean": 0.10519883632659913,
"signal/brier_reward/group_std_mean": 0.13862771689891815,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01314985454082489,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01314985454082489,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.037385327741503716,
"signal/confidence_uniqueness_reward/group_std_mean": 0.058187781274318694,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0046731659676879644,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0046731659676879644,
"signal/format_reward/centered_abs_mean": 0.014822048507630825,
"signal/format_reward/group_std_mean": 0.03110768012702465,
"signal/format_reward/group_zero_std_frac": 0.8583333253860473,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007411024253815413,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007411024253815413,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013182483380660415,
"signal/frontier_aurc_reward/group_std_mean": 0.0019752333406358956,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0597630282281898e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0597630282281898e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.08810206353664399,
"signal/frontier_coverage_0/group_std_mean": 0.1211852788925171,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013765947427600623,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013765947427600623,
"signal/frontier_coverage_1/centered_abs_mean": 0.08810206353664399,
"signal/frontier_coverage_1/group_std_mean": 0.1211852788925171,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013765947427600623,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013765947427600623,
"signal/frontier_coverage_10/centered_abs_mean": 0.08810206353664399,
"signal/frontier_coverage_10/group_std_mean": 0.1211852788925171,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013765947427600623,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013765947427600623,
"signal/frontier_coverage_15/centered_abs_mean": 0.08810206353664399,
"signal/frontier_coverage_15/group_std_mean": 0.1211852788925171,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013765947427600623,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013765947427600623,
"signal/frontier_coverage_20/centered_abs_mean": 0.08810206353664399,
"signal/frontier_coverage_20/group_std_mean": 0.1211852788925171,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013765947427600623,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013765947427600623,
"signal/frontier_coverage_25/centered_abs_mean": 0.08810206353664399,
"signal/frontier_coverage_25/group_std_mean": 0.1211852788925171,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013765947427600623,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013765947427600623,
"signal/frontier_coverage_5/centered_abs_mean": 0.08810206353664399,
"signal/frontier_coverage_5/group_std_mean": 0.1211852788925171,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013765947427600623,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013765947427600623,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.01288688350468874,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.019262754917144777,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0016108604380860926,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0016108604380860926,
"step": 85
},
{
"calibration/aurc": 0.1750125348445183,
"calibration/batch_distribution_entropy": 0.7160344664199889,
"calibration/batch_entropy_100bins": 0.791538051421172,
"calibration/batch_entropy_10bins": 0.7160344664199889,
"calibration/batch_entropy_50bins": 0.803385649917867,
"calibration/batch_uniqueness": 0.9360382132701994,
"calibration/buffer_distribution_entropy": 0.8130252160812972,
"calibration/buffer_entropy_100bins": 0.7592675172132644,
"calibration/buffer_entropy_10bins": 0.8130252160812972,
"calibration/buffer_entropy_50bins": 0.8095419172681041,
"calibration/confidence_entropy": 0.5752702254035462,
"calibration/coverage@0%": 0.021076727388243136,
"calibration/coverage@1%": 0.021076727388243136,
"calibration/coverage@10%": 0.28257062422924945,
"calibration/coverage@15%": 0.46590979360010365,
"calibration/coverage@20%": 0.6646229888828084,
"calibration/coverage@25%": 0.8164021164021165,
"calibration/coverage@30%": 0.8899470899470898,
"calibration/coverage@5%": 0.05624733106278381,
"calibration/distribution_entropy_10": 0.7160344664199889,
"calibration/distribution_entropy_100": 0.791538051421172,
"calibration/ece": 0.07857751360789,
"calibration/mean_confidence": 0.6838434656307941,
"calibration/unique_confidence_per_question": 0.16197916666666667,
"calibration/unique_confidences": 62.2,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011892361111111116,
"completions/max_length": 2865.4,
"completions/max_terminated_length": 2865.4,
"completions/mean_length": 644.41875,
"completions/mean_terminated_length": 652.1775756835938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 177.6,
"epoch": 0.2159973000337496,
"grad_norm": 0.0004885443486273289,
"learning_rate": 3.5542168674698798e-06,
"loss": -0.01,
"num_tokens": 186969504.0,
"reward": 1.0422258853912354,
"reward_std": 0.1346314489841461,
"rewards/accuracy_reward": 0.6743055701255798,
"rewards/brier_reward": 0.7906572461128235,
"rewards/confidence_uniqueness_reward": 0.9217118740081787,
"rewards/format_reward": 0.9880208373069763,
"rewards/frontier_aurc_reward": -0.001758960704319179,
"rewards/frontier_coverage_0": -0.01478583961725235,
"rewards/frontier_coverage_1": -0.01478583961725235,
"rewards/frontier_coverage_10": -0.01478583961725235,
"rewards/frontier_coverage_15": -0.01478583961725235,
"rewards/frontier_coverage_20": -0.01478583961725235,
"rewards/frontier_coverage_25": -0.01478583961725235,
"rewards/frontier_coverage_5": -0.01478583961725235,
"rewards/true_frontier_ece_gap_only_reward": -0.01070992909371853,
"signal/accuracy_reward/centered_abs_mean": 0.16719835251569748,
"signal/accuracy_reward/group_std_mean": 0.2174463987350464,
"signal/accuracy_reward/group_zero_std_frac": 0.38888888955116274,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08359917625784874,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08359917625784874,
"signal/advantage_abs_mean": 0.0994023248553276,
"signal/advantage_pre_scale_abs_mean": 0.0994023248553276,
"signal/advantage_pre_scale_std": 0.1722485601902008,
"signal/advantage_std": 0.1722485601902008,
"signal/brier_reward/centered_abs_mean": 0.10387470126152039,
"signal/brier_reward/group_std_mean": 0.13654259741306304,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012984337657690049,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012984337657690049,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.043320811539888385,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06586214751005173,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005415101442486048,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005415101442486048,
"signal/format_reward/centered_abs_mean": 0.02032335065305233,
"signal/format_reward/group_std_mean": 0.03864026740193367,
"signal/format_reward/group_zero_std_frac": 0.8388888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010161675326526166,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010161675326526166,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012993402313441038,
"signal/frontier_aurc_reward/group_std_mean": 0.0018939806381240488,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.030219111475162e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.030219111475162e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.07952361851930619,
"signal/frontier_coverage_0/group_std_mean": 0.10869008004665374,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012425565393641592,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012425565393641592,
"signal/frontier_coverage_1/centered_abs_mean": 0.07952361851930619,
"signal/frontier_coverage_1/group_std_mean": 0.10869008004665374,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012425565393641592,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012425565393641592,
"signal/frontier_coverage_10/centered_abs_mean": 0.07952361851930619,
"signal/frontier_coverage_10/group_std_mean": 0.10869008004665374,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012425565393641592,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012425565393641592,
"signal/frontier_coverage_15/centered_abs_mean": 0.07952361851930619,
"signal/frontier_coverage_15/group_std_mean": 0.10869008004665374,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012425565393641592,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012425565393641592,
"signal/frontier_coverage_20/centered_abs_mean": 0.07952361851930619,
"signal/frontier_coverage_20/group_std_mean": 0.10869008004665374,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012425565393641592,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012425565393641592,
"signal/frontier_coverage_25/centered_abs_mean": 0.07952361851930619,
"signal/frontier_coverage_25/group_std_mean": 0.10869008004665374,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012425565393641592,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012425565393641592,
"signal/frontier_coverage_5/centered_abs_mean": 0.07952361851930619,
"signal/frontier_coverage_5/group_std_mean": 0.10869008004665374,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012425565393641592,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012425565393641592,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.010778117179870605,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.01537728812545538,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0013472646474838256,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0013472646474838256,
"step": 90
},
{
"calibration/aurc": 0.20718600424081587,
"calibration/batch_distribution_entropy": 0.7145419993415512,
"calibration/batch_entropy_100bins": 0.7940045869247137,
"calibration/batch_entropy_10bins": 0.7145419993415512,
"calibration/batch_entropy_50bins": 0.8040417294748641,
"calibration/batch_uniqueness": 0.9367676324025431,
"calibration/buffer_distribution_entropy": 0.8145125153648216,
"calibration/buffer_entropy_100bins": 0.7725883097693098,
"calibration/buffer_entropy_10bins": 0.8145125153648216,
"calibration/buffer_entropy_50bins": 0.817964486415557,
"calibration/confidence_entropy": 0.5770644535474301,
"calibration/coverage@0%": 0.018359180375690363,
"calibration/coverage@1%": 0.018359180375690363,
"calibration/coverage@10%": 0.3142551794049478,
"calibration/coverage@15%": 0.47292650936860287,
"calibration/coverage@20%": 0.5374353404763397,
"calibration/coverage@25%": 0.6198096590711307,
"calibration/coverage@30%": 0.70205074477485,
"calibration/coverage@5%": 0.15136133782852185,
"calibration/distribution_entropy_10": 0.7145419993415512,
"calibration/distribution_entropy_100": 0.7940045869247137,
"calibration/ece": 0.12296266760467423,
"calibration/mean_confidence": 0.6829029134911659,
"calibration/unique_confidence_per_question": 0.15885416666666669,
"calibration/unique_confidences": 61.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00824652777777779,
"completions/max_length": 3075.4,
"completions/max_terminated_length": 3075.4,
"completions/mean_length": 649.2624145507813,
"completions/mean_terminated_length": 654.6318359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 189.0,
"epoch": 0.22799715003562457,
"grad_norm": 0.00040514481952413917,
"learning_rate": 3.4036144578313257e-06,
"loss": -0.0068,
"num_tokens": 197540687.0,
"reward": 1.0414008617401123,
"reward_std": 0.12309487164020538,
"rewards/accuracy_reward": 0.6670138955116272,
"rewards/brier_reward": 0.7899128198623657,
"rewards/confidence_uniqueness_reward": 0.926601231098175,
"rewards/format_reward": 0.9917534589767456,
"rewards/frontier_aurc_reward": -0.0018277077469974756,
"rewards/frontier_coverage_0": -0.012428297474980355,
"rewards/frontier_coverage_1": -0.012428297474980355,
"rewards/frontier_coverage_10": -0.012428297474980355,
"rewards/frontier_coverage_15": -0.012428297474980355,
"rewards/frontier_coverage_20": -0.012428297474980355,
"rewards/frontier_coverage_25": -0.012428297474980355,
"rewards/frontier_coverage_5": -0.012428297474980355,
"rewards/true_frontier_ece_gap_only_reward": -0.009273872710764408,
"signal/accuracy_reward/centered_abs_mean": 0.15275607407093048,
"signal/accuracy_reward/group_std_mean": 0.2017911434173584,
"signal/accuracy_reward/group_zero_std_frac": 0.4277777850627899,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07637803703546524,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07637803703546524,
"signal/advantage_abs_mean": 0.08974921703338623,
"signal/advantage_pre_scale_abs_mean": 0.08974921703338623,
"signal/advantage_pre_scale_std": 0.16027459800243377,
"signal/advantage_std": 0.16027459800243377,
"signal/brier_reward/centered_abs_mean": 0.09923952370882035,
"signal/brier_reward/group_std_mean": 0.13173434436321257,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012404940463602543,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012404940463602543,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03722200207412243,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05656850188970566,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0046527502592653034,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0046527502592653034,
"signal/format_reward/centered_abs_mean": 0.014653862826526165,
"signal/format_reward/group_std_mean": 0.0300372663885355,
"signal/format_reward/group_zero_std_frac": 0.8694444537162781,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007326931413263083,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007326931413263083,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001311829499900341,
"signal/frontier_aurc_reward/group_std_mean": 0.0018943335162475705,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0497335935942828e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0497335935942828e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.07780194133520127,
"signal/frontier_coverage_0/group_std_mean": 0.10721372663974763,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012156553333625198,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012156553333625198,
"signal/frontier_coverage_1/centered_abs_mean": 0.07780194133520127,
"signal/frontier_coverage_1/group_std_mean": 0.10721372663974763,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012156553333625198,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012156553333625198,
"signal/frontier_coverage_10/centered_abs_mean": 0.07780194133520127,
"signal/frontier_coverage_10/group_std_mean": 0.10721372663974763,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012156553333625198,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012156553333625198,
"signal/frontier_coverage_15/centered_abs_mean": 0.07780194133520127,
"signal/frontier_coverage_15/group_std_mean": 0.10721372663974763,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012156553333625198,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012156553333625198,
"signal/frontier_coverage_20/centered_abs_mean": 0.07780194133520127,
"signal/frontier_coverage_20/group_std_mean": 0.10721372663974763,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012156553333625198,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012156553333625198,
"signal/frontier_coverage_25/centered_abs_mean": 0.07780194133520127,
"signal/frontier_coverage_25/group_std_mean": 0.10721372663974763,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012156553333625198,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012156553333625198,
"signal/frontier_coverage_5/centered_abs_mean": 0.07780194133520127,
"signal/frontier_coverage_5/group_std_mean": 0.10721372663974763,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012156553333625198,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012156553333625198,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.009020444191992282,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.012560129538178444,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0011275555239990353,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0011275555239990353,
"step": 95
},
{
"calibration/aurc": 0.18912161303723035,
"calibration/batch_distribution_entropy": 0.7509673473140573,
"calibration/batch_entropy_100bins": 0.8107610890298,
"calibration/batch_entropy_10bins": 0.7509673473140573,
"calibration/batch_entropy_50bins": 0.8259464172988149,
"calibration/batch_uniqueness": 0.9409531377801335,
"calibration/buffer_distribution_entropy": 0.8153438800401813,
"calibration/buffer_entropy_100bins": 0.7838405798633927,
"calibration/buffer_entropy_10bins": 0.8153438800401813,
"calibration/buffer_entropy_50bins": 0.8251483429084174,
"calibration/confidence_entropy": 0.5673696735876815,
"calibration/coverage@0%": 0.005273188206117388,
"calibration/coverage@1%": 0.005273188206117388,
"calibration/coverage@10%": 0.14509694130614492,
"calibration/coverage@15%": 0.41793198434036133,
"calibration/coverage@20%": 0.6852799572454381,
"calibration/coverage@25%": 0.8459034433939087,
"calibration/coverage@30%": 0.9333465402575349,
"calibration/coverage@5%": 0.005273188206117388,
"calibration/distribution_entropy_10": 0.7509673473140573,
"calibration/distribution_entropy_100": 0.8107610890298,
"calibration/ece": 0.08220890696497,
"calibration/mean_confidence": 0.6813237522023581,
"calibration/unique_confidence_per_question": 0.17552083333333335,
"calibration/unique_confidences": 67.4,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012934027777777768,
"completions/max_length": 3705.4,
"completions/max_terminated_length": 3705.4,
"completions/mean_length": 671.973193359375,
"completions/mean_terminated_length": 680.76796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 188.0,
"epoch": 0.23999700003749952,
"grad_norm": 0.0004094898758921772,
"learning_rate": 3.2530120481927713e-06,
"loss": -0.0104,
"num_tokens": 208380890.0,
"reward": 1.048251223564148,
"reward_std": 0.13225021511316298,
"rewards/accuracy_reward": 0.6796875,
"rewards/brier_reward": 0.8040428400039673,
"rewards/confidence_uniqueness_reward": 0.9271102786064148,
"rewards/format_reward": 0.9870659828186035,
"rewards/frontier_aurc_reward": -0.0017124064732342958,
"rewards/frontier_coverage_0": -0.0011624779552221298,
"rewards/frontier_coverage_1": -0.0011624779552221298,
"rewards/frontier_coverage_10": -0.0011624779552221298,
"rewards/frontier_coverage_15": -0.0011624779552221298,
"rewards/frontier_coverage_20": -0.0011624779552221298,
"rewards/frontier_coverage_25": -0.0011624779552221298,
"rewards/frontier_coverage_5": -0.0011624779552221298,
"rewards/true_frontier_ece_gap_only_reward": -0.010926000401377678,
"signal/accuracy_reward/centered_abs_mean": 0.16735026240348816,
"signal/accuracy_reward/group_std_mean": 0.21467447876930237,
"signal/accuracy_reward/group_zero_std_frac": 0.4138888895511627,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08367513120174408,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08367513120174408,
"signal/advantage_abs_mean": 0.09893292784690857,
"signal/advantage_pre_scale_abs_mean": 0.09893292784690857,
"signal/advantage_pre_scale_std": 0.17071661055088044,
"signal/advantage_std": 0.17071661055088044,
"signal/brier_reward/centered_abs_mean": 0.1086144745349884,
"signal/brier_reward/group_std_mean": 0.141890849173069,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01357680931687355,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01357680931687355,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04100620374083519,
"signal/confidence_uniqueness_reward/group_std_mean": 0.060381069034338,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005125775467604399,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005125775467604399,
"signal/format_reward/centered_abs_mean": 0.020198567770421505,
"signal/format_reward/group_std_mean": 0.03544421307742596,
"signal/format_reward/group_zero_std_frac": 0.8638888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010099283885210752,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010099283885210752,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001432186597958207,
"signal/frontier_aurc_reward/group_std_mean": 0.002116669714450836,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2377915593096985e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2377915593096985e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.09051385223865509,
"signal/frontier_coverage_0/group_std_mean": 0.12151473313570023,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014142789412289857,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014142789412289857,
"signal/frontier_coverage_1/centered_abs_mean": 0.09051385223865509,
"signal/frontier_coverage_1/group_std_mean": 0.12151473313570023,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014142789412289857,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014142789412289857,
"signal/frontier_coverage_10/centered_abs_mean": 0.09051385223865509,
"signal/frontier_coverage_10/group_std_mean": 0.12151473313570023,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014142789412289857,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014142789412289857,
"signal/frontier_coverage_15/centered_abs_mean": 0.09051385223865509,
"signal/frontier_coverage_15/group_std_mean": 0.12151473313570023,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014142789412289857,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014142789412289857,
"signal/frontier_coverage_20/centered_abs_mean": 0.09051385223865509,
"signal/frontier_coverage_20/group_std_mean": 0.12151473313570023,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014142789412289857,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014142789412289857,
"signal/frontier_coverage_25/centered_abs_mean": 0.09051385223865509,
"signal/frontier_coverage_25/group_std_mean": 0.12151473313570023,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014142789412289857,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014142789412289857,
"signal/frontier_coverage_5/centered_abs_mean": 0.09051385223865509,
"signal/frontier_coverage_5/group_std_mean": 0.12151473313570023,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014142789412289857,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014142789412289857,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.00986800417304039,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.014217101410031319,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0012335005216300488,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0012335005216300488,
"step": 100
},
{
"epoch": 0.23999700003749952,
"eval_completions/clipped_ratio": 0.013020833333333334,
"eval_completions/max_length": 2601.6666666666665,
"eval_completions/max_terminated_length": 2601.6666666666665,
"eval_completions/mean_length": 658.6519266764323,
"eval_completions/mean_terminated_length": 667.3694864908854,
"eval_completions/min_length": 49.166666666666664,
"eval_completions/min_terminated_length": 235.33333333333334,
"eval_loss": 0.0,
"eval_num_tokens": 208380890.0,
"eval_reward": 1.0273559093475342,
"eval_reward_std": 0.2644694770375888,
"eval_rewards/accuracy_reward": 0.65625,
"eval_rewards/brier_reward": 0.7910055716832479,
"eval_rewards/confidence_uniqueness_reward": 0.8701359728972117,
"eval_rewards/format_reward": 0.9861111144224802,
"eval_rewards/frontier_aurc_reward": -0.0019449660709748666,
"eval_rewards/frontier_coverage_0": 9.137632635732491e-05,
"eval_rewards/frontier_coverage_1": 9.137632635732491e-05,
"eval_rewards/frontier_coverage_10": 9.137632635732491e-05,
"eval_rewards/frontier_coverage_15": 9.137632635732491e-05,
"eval_rewards/frontier_coverage_20": 9.137632635732491e-05,
"eval_rewards/frontier_coverage_25": 9.137632635732491e-05,
"eval_rewards/frontier_coverage_5": 9.137632635732491e-05,
"eval_rewards/true_frontier_ece_gap_only_reward": -0.011575784999877214,
"eval_runtime": 208.6217,
"eval_samples_per_second": 4.793,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4365234325329463,
"eval_signal/accuracy_reward/group_std_mean": 0.47344937423865,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21826171626647314,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21826171626647314,
"eval_signal/advantage_abs_mean": 0.23294218629598618,
"eval_signal/advantage_pre_scale_abs_mean": 0.23294218629598618,
"eval_signal/advantage_pre_scale_std": 0.26374371101458866,
"eval_signal/advantage_std": 0.26374371101458866,
"eval_signal/brier_reward/centered_abs_mean": 0.17583947877089182,
"eval_signal/brier_reward/group_std_mean": 0.2251211479306221,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021979934846361477,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.021979934846361477,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06556083882848422,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09988817572593689,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008195104853560528,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008195104853560528,
"eval_signal/format_reward/centered_abs_mean": 0.026258680348594982,
"eval_signal/format_reward/group_std_mean": 0.0630940409998099,
"eval_signal/format_reward/group_zero_std_frac": 0.6944444676240286,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.013129340174297491,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.013129340174297491,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0024230304018904767,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.003932161644722025,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.78598500295387e-05,
"eval_signal/frontier_aurc_reward/weight": 0.015625,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.78598500295387e-05,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.12514647220571837,
"eval_signal/frontier_coverage_0/group_std_mean": 0.1818079153696696,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019554136282143495,
"eval_signal/frontier_coverage_0/weight": 0.015625,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019554136282143495,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.12514647220571837,
"eval_signal/frontier_coverage_1/group_std_mean": 0.1818079153696696,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019554136282143495,
"eval_signal/frontier_coverage_1/weight": 0.015625,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019554136282143495,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.12514647220571837,
"eval_signal/frontier_coverage_10/group_std_mean": 0.1818079153696696,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019554136282143495,
"eval_signal/frontier_coverage_10/weight": 0.015625,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019554136282143495,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.12514647220571837,
"eval_signal/frontier_coverage_15/group_std_mean": 0.1818079153696696,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019554136282143495,
"eval_signal/frontier_coverage_15/weight": 0.015625,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019554136282143495,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.12514647220571837,
"eval_signal/frontier_coverage_20/group_std_mean": 0.1818079153696696,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019554136282143495,
"eval_signal/frontier_coverage_20/weight": 0.015625,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019554136282143495,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.12514647220571837,
"eval_signal/frontier_coverage_25/group_std_mean": 0.1818079153696696,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019554136282143495,
"eval_signal/frontier_coverage_25/weight": 0.015625,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019554136282143495,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.12514647220571837,
"eval_signal/frontier_coverage_5/group_std_mean": 0.1818079153696696,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019554136282143495,
"eval_signal/frontier_coverage_5/weight": 0.015625,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019554136282143495,
"eval_signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.01118487554291884,
"eval_signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.017368461936712265,
"eval_signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"eval_signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.001398109442864855,
"eval_signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"eval_signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.001398109442864855,
"eval_steps_per_second": 0.029,
"step": 100
},
{
"epoch": 0.23999700003749952,
"step": 100,
"train_probe_completions/clipped_ratio": 0.017187500000000022,
"train_probe_completions/max_length": 2913.6666666666665,
"train_probe_completions/max_terminated_length": 2913.6666666666665,
"train_probe_completions/mean_length": 669.9316202799479,
"train_probe_completions/mean_terminated_length": 681.6310424804688,
"train_probe_completions/min_length": 0.0,
"train_probe_completions/min_terminated_length": 213.83333333333334,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 208380890.0,
"train_probe_reward": 1.0431965788205464,
"train_probe_reward_std": 0.2604084312915802,
"train_probe_rewards/accuracy_reward": 0.6848958233992258,
"train_probe_rewards/brier_reward": 0.8070287605126699,
"train_probe_rewards/confidence_uniqueness_reward": 0.8711295028527578,
"train_probe_rewards/format_reward": 0.9852430621782938,
"train_probe_rewards/frontier_aurc_reward": -0.001599111206208666,
"train_probe_rewards/frontier_coverage_0": -0.00111961656754526,
"train_probe_rewards/frontier_coverage_1": -0.00111961656754526,
"train_probe_rewards/frontier_coverage_10": -0.00111961656754526,
"train_probe_rewards/frontier_coverage_15": -0.00111961656754526,
"train_probe_rewards/frontier_coverage_20": -0.00111961656754526,
"train_probe_rewards/frontier_coverage_25": -0.00111961656754526,
"train_probe_rewards/frontier_coverage_5": -0.00111961656754526,
"train_probe_rewards/true_frontier_ece_gap_only_reward": -0.011961817430953184,
"train_probe_runtime": 210.4634,
"train_probe_samples_per_second": 4.751,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.4185655365387599,
"train_probe_signal/accuracy_reward/group_std_mean": 0.4635271529356639,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20928276826937994,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.20928276826937994,
"train_probe_signal/advantage_abs_mean": 0.22382708390553793,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.22382708390553793,
"train_probe_signal/advantage_pre_scale_std": 0.2600322514772415,
"train_probe_signal/advantage_std": 0.2600322514772415,
"train_probe_signal/brier_reward/centered_abs_mean": 0.16734372824430466,
"train_probe_signal/brier_reward/group_std_mean": 0.21692882478237152,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020917966030538082,
"train_probe_signal/brier_reward/weight": 0.125,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.020917966030538082,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06453707938392957,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.10520645851890247,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008067134922991196,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008067134922991196,
"train_probe_signal/format_reward/centered_abs_mean": 0.02783203125,
"train_probe_signal/format_reward/group_std_mean": 0.0701802521944046,
"train_probe_signal/format_reward/group_zero_std_frac": 0.6388889104127884,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.013916015625,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.013916015625,
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.002024289375791947,
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0032714407813424864,
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.162952149674917e-05,
"train_probe_signal/frontier_aurc_reward/weight": 0.015625,
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.162952149674917e-05,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.12542071690162024,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.18846788754065832,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019596987015878162,
"train_probe_signal/frontier_coverage_0/weight": 0.015625,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019596987015878162,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.12542071690162024,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.18846788754065832,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019596987015878162,
"train_probe_signal/frontier_coverage_1/weight": 0.015625,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019596987015878162,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.12542071690162024,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.18846788754065832,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019596987015878162,
"train_probe_signal/frontier_coverage_10/weight": 0.015625,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019596987015878162,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.12542071690162024,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.18846788754065832,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019596987015878162,
"train_probe_signal/frontier_coverage_15/weight": 0.015625,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019596987015878162,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.12542071690162024,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.18846788754065832,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019596987015878162,
"train_probe_signal/frontier_coverage_20/weight": 0.015625,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019596987015878162,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.12542071690162024,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.18846788754065832,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019596987015878162,
"train_probe_signal/frontier_coverage_25/weight": 0.015625,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019596987015878162,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.12542071690162024,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.18846788754065832,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019596987015878162,
"train_probe_signal/frontier_coverage_5/weight": 0.015625,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019596987015878162,
"train_probe_signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.011922950390726328,
"train_probe_signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.018872848711907864,
"train_probe_signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"train_probe_signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.001490368798840791,
"train_probe_signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"train_probe_signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.001490368798840791,
"train_probe_steps_per_second": 0.029
},
{
"calibration/aurc": 0.3267044793348773,
"calibration/batch_distribution_entropy": 0.7514441882328864,
"calibration/batch_entropy_100bins": 0.8095879565010626,
"calibration/batch_entropy_10bins": 0.7514441882328864,
"calibration/batch_entropy_50bins": 0.8265329826402684,
"calibration/batch_uniqueness": 0.9404129011408552,
"calibration/buffer_distribution_entropy": 0.8180824212746325,
"calibration/buffer_entropy_100bins": 0.7960694640163056,
"calibration/buffer_entropy_10bins": 0.8180824212746325,
"calibration/buffer_entropy_50bins": 0.8335788091827643,
"calibration/confidence_entropy": 0.5574560509219397,
"calibration/coverage@0%": 0.00994418656056587,
"calibration/coverage@1%": 0.00994418656056587,
"calibration/coverage@10%": 0.14097866931918657,
"calibration/coverage@15%": 0.1568937886825818,
"calibration/coverage@20%": 0.2546232650242107,
"calibration/coverage@25%": 0.3336462535299578,
"calibration/coverage@30%": 0.389026431209603,
"calibration/coverage@5%": 0.10649591069849691,
"calibration/distribution_entropy_10": 0.7514441882328864,
"calibration/distribution_entropy_100": 0.8095879565010626,
"calibration/ece": 0.14820278446655621,
"calibration/mean_confidence": 0.6889263632869203,
"calibration/unique_confidence_per_question": 0.175,
"calibration/unique_confidences": 67.2,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012065972222222231,
"completions/max_length": 3604.2,
"completions/max_terminated_length": 3604.2,
"completions/mean_length": 673.2842163085937,
"completions/mean_terminated_length": 681.5394897460938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 179.0,
"epoch": 0.2519968500393745,
"grad_norm": 0.0004055156314279884,
"learning_rate": 3.1024096385542172e-06,
"loss": -0.0102,
"num_tokens": 219213988.0,
"reward": 1.044841742515564,
"reward_std": 0.12685696184635162,
"rewards/accuracy_reward": 0.6730902671813965,
"rewards/brier_reward": 0.7991919994354248,
"rewards/confidence_uniqueness_reward": 0.9291059017181397,
"rewards/format_reward": 0.9876736044883728,
"rewards/frontier_aurc_reward": -0.001739606261253357,
"rewards/frontier_coverage_0": -0.001969197951257229,
"rewards/frontier_coverage_1": -0.001969197951257229,
"rewards/frontier_coverage_10": -0.001969197951257229,
"rewards/frontier_coverage_15": -0.001969197951257229,
"rewards/frontier_coverage_20": -0.001969197951257229,
"rewards/frontier_coverage_25": -0.001969197951257229,
"rewards/frontier_coverage_5": -0.001969197951257229,
"rewards/true_frontier_ece_gap_only_reward": -0.010678962059319019,
"signal/accuracy_reward/centered_abs_mean": 0.15438368171453476,
"signal/accuracy_reward/group_std_mean": 0.20449974834918977,
"signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07719184085726738,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07719184085726738,
"signal/advantage_abs_mean": 0.09131217449903488,
"signal/advantage_pre_scale_abs_mean": 0.09131217449903488,
"signal/advantage_pre_scale_std": 0.16192201673984527,
"signal/advantage_std": 0.16192201673984527,
"signal/brier_reward/centered_abs_mean": 0.1098570004105568,
"signal/brier_reward/group_std_mean": 0.14484120011329651,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0137321250513196,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0137321250513196,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04084142223000527,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06246491596102714,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005105177778750658,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005105177778750658,
"signal/format_reward/centered_abs_mean": 0.01923828125,
"signal/format_reward/group_std_mean": 0.03651031218469143,
"signal/format_reward/group_zero_std_frac": 0.850000011920929,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009619140625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009619140625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014394932892173528,
"signal/frontier_aurc_reward/group_std_mean": 0.0021645855624228714,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2492082644021137e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2492082644021137e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.0979076936841011,
"signal/frontier_coverage_0/group_std_mean": 0.13328861594200134,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015298077138140797,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0015298077138140797,
"signal/frontier_coverage_1/centered_abs_mean": 0.0979076936841011,
"signal/frontier_coverage_1/group_std_mean": 0.13328861594200134,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015298077138140797,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015298077138140797,
"signal/frontier_coverage_10/centered_abs_mean": 0.0979076936841011,
"signal/frontier_coverage_10/group_std_mean": 0.13328861594200134,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015298077138140797,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015298077138140797,
"signal/frontier_coverage_15/centered_abs_mean": 0.0979076936841011,
"signal/frontier_coverage_15/group_std_mean": 0.13328861594200134,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015298077138140797,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015298077138140797,
"signal/frontier_coverage_20/centered_abs_mean": 0.0979076936841011,
"signal/frontier_coverage_20/group_std_mean": 0.13328861594200134,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015298077138140797,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015298077138140797,
"signal/frontier_coverage_25/centered_abs_mean": 0.0979076936841011,
"signal/frontier_coverage_25/group_std_mean": 0.13328861594200134,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015298077138140797,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015298077138140797,
"signal/frontier_coverage_5/centered_abs_mean": 0.0979076936841011,
"signal/frontier_coverage_5/group_std_mean": 0.13328861594200134,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015298077138140797,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015298077138140797,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.009452897682785988,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.014724508672952653,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0011816122103482484,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0011816122103482484,
"step": 105
},
{
"calibration/aurc": 0.19653911867283705,
"calibration/batch_distribution_entropy": 0.742189294691922,
"calibration/batch_entropy_100bins": 0.8103696123897912,
"calibration/batch_entropy_10bins": 0.742189294691922,
"calibration/batch_entropy_50bins": 0.8205309171620865,
"calibration/batch_uniqueness": 0.9383345760593518,
"calibration/buffer_distribution_entropy": 0.8196634901938914,
"calibration/buffer_entropy_100bins": 0.8046767967346928,
"calibration/buffer_entropy_10bins": 0.8196634901938914,
"calibration/buffer_entropy_50bins": 0.8391647012059913,
"calibration/confidence_entropy": 0.5437583638987209,
"calibration/coverage@0%": 0.017870423903897183,
"calibration/coverage@1%": 0.017870423903897183,
"calibration/coverage@10%": 0.14836196586429348,
"calibration/coverage@15%": 0.2993953758400066,
"calibration/coverage@20%": 0.5307628916308488,
"calibration/coverage@25%": 0.7679343830030556,
"calibration/coverage@30%": 0.9234316719507646,
"calibration/coverage@5%": 0.04352487416567728,
"calibration/distribution_entropy_10": 0.742189294691922,
"calibration/distribution_entropy_100": 0.8103696123897912,
"calibration/ece": 0.09015899744059616,
"calibration/mean_confidence": 0.6941812575865814,
"calibration/unique_confidence_per_question": 0.19114583333333332,
"calibration/unique_confidences": 73.4,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009982638888888905,
"completions/max_length": 3548.6,
"completions/max_terminated_length": 3548.6,
"completions/mean_length": 683.9628540039063,
"completions/mean_terminated_length": 690.8879516601562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 193.6,
"epoch": 0.2639967000412495,
"grad_norm": 0.0004070218128617853,
"learning_rate": 2.9518072289156627e-06,
"loss": -0.0086,
"num_tokens": 230201688.0,
"reward": 1.0639292001724243,
"reward_std": 0.12081557959318161,
"rewards/accuracy_reward": 0.7053819298744202,
"rewards/brier_reward": 0.8154799818992615,
"rewards/confidence_uniqueness_reward": 0.929580807685852,
"rewards/format_reward": 0.989843738079071,
"rewards/frontier_aurc_reward": -0.001527873962186277,
"rewards/frontier_coverage_0": -0.005524499481543899,
"rewards/frontier_coverage_1": -0.005524499481543899,
"rewards/frontier_coverage_10": -0.005524499481543899,
"rewards/frontier_coverage_15": -0.005524499481543899,
"rewards/frontier_coverage_20": -0.005524499481543899,
"rewards/frontier_coverage_25": -0.005524499481543899,
"rewards/frontier_coverage_5": -0.005524499481543899,
"rewards/true_frontier_ece_gap_only_reward": -0.009504916891455651,
"signal/accuracy_reward/centered_abs_mean": 0.14922960102558136,
"signal/accuracy_reward/group_std_mean": 0.20320949256420134,
"signal/accuracy_reward/group_zero_std_frac": 0.4000000059604645,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07461480051279068,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07461480051279068,
"signal/advantage_abs_mean": 0.0867169290781021,
"signal/advantage_pre_scale_abs_mean": 0.0867169290781021,
"signal/advantage_pre_scale_std": 0.158852681517601,
"signal/advantage_std": 0.158852681517601,
"signal/brier_reward/centered_abs_mean": 0.10190331041812897,
"signal/brier_reward/group_std_mean": 0.13416367769241333,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012737913802266122,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012737913802266122,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03820802196860314,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05503500029444695,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004776002746075392,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004776002746075392,
"signal/format_reward/centered_abs_mean": 0.01648763045668602,
"signal/format_reward/group_std_mean": 0.029129663482308388,
"signal/format_reward/group_zero_std_frac": 0.8833333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00824381522834301,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00824381522834301,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013106558239087463,
"signal/frontier_aurc_reward/group_std_mean": 0.001980750821530819,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.047899724857416e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.047899724857416e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.09209920465946198,
"signal/frontier_coverage_0/group_std_mean": 0.12662244141101836,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014390500728040934,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014390500728040934,
"signal/frontier_coverage_1/centered_abs_mean": 0.09209920465946198,
"signal/frontier_coverage_1/group_std_mean": 0.12662244141101836,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014390500728040934,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014390500728040934,
"signal/frontier_coverage_10/centered_abs_mean": 0.09209920465946198,
"signal/frontier_coverage_10/group_std_mean": 0.12662244141101836,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014390500728040934,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014390500728040934,
"signal/frontier_coverage_15/centered_abs_mean": 0.09209920465946198,
"signal/frontier_coverage_15/group_std_mean": 0.12662244141101836,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014390500728040934,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014390500728040934,
"signal/frontier_coverage_20/centered_abs_mean": 0.09209920465946198,
"signal/frontier_coverage_20/group_std_mean": 0.12662244141101836,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014390500728040934,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014390500728040934,
"signal/frontier_coverage_25/centered_abs_mean": 0.09209920465946198,
"signal/frontier_coverage_25/group_std_mean": 0.12662244141101836,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014390500728040934,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014390500728040934,
"signal/frontier_coverage_5/centered_abs_mean": 0.09209920465946198,
"signal/frontier_coverage_5/group_std_mean": 0.12662244141101836,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014390500728040934,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014390500728040934,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.008595239371061325,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.01369424220174551,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0010744049213826656,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0010744049213826656,
"step": 110
},
{
"calibration/aurc": 0.30524022173226106,
"calibration/batch_distribution_entropy": 0.7818711531082017,
"calibration/batch_entropy_100bins": 0.8248857687848549,
"calibration/batch_entropy_10bins": 0.7818711531082017,
"calibration/batch_entropy_50bins": 0.8433798093291898,
"calibration/batch_uniqueness": 0.9433997785726878,
"calibration/buffer_distribution_entropy": 0.8217132367095644,
"calibration/buffer_entropy_100bins": 0.8122731904636383,
"calibration/buffer_entropy_10bins": 0.8217132367095644,
"calibration/buffer_entropy_50bins": 0.8442563490493079,
"calibration/confidence_entropy": 0.5560049283881041,
"calibration/coverage@0%": 0.006447631273640926,
"calibration/coverage@1%": 0.006447631273640926,
"calibration/coverage@10%": 0.006447631273640926,
"calibration/coverage@15%": 0.14715146010246977,
"calibration/coverage@20%": 0.4041453376318914,
"calibration/coverage@25%": 0.5688995513801839,
"calibration/coverage@30%": 0.6564715896999361,
"calibration/coverage@5%": 0.006447631273640926,
"calibration/distribution_entropy_10": 0.7818711531082017,
"calibration/distribution_entropy_100": 0.8248857687848549,
"calibration/ece": 0.15221433813065566,
"calibration/mean_confidence": 0.6707838972409772,
"calibration/unique_confidence_per_question": 0.18958333333333335,
"calibration/unique_confidences": 72.8,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017708333333333326,
"completions/max_length": 3419.2,
"completions/max_terminated_length": 3419.2,
"completions/mean_length": 679.98916015625,
"completions/mean_terminated_length": 692.3475830078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 207.8,
"epoch": 0.27599655004312446,
"grad_norm": 0.00038507970748469234,
"learning_rate": 2.8012048192771087e-06,
"loss": -0.0156,
"num_tokens": 241114363.0,
"reward": 1.0363726139068603,
"reward_std": 0.13486612737178802,
"rewards/accuracy_reward": 0.6633680582046508,
"rewards/brier_reward": 0.793210256099701,
"rewards/confidence_uniqueness_reward": 0.9223326444625854,
"rewards/format_reward": 0.9822916626930237,
"rewards/frontier_aurc_reward": -0.0019153600791469217,
"rewards/frontier_coverage_0": 0.003437680657953024,
"rewards/frontier_coverage_1": 0.003437680657953024,
"rewards/frontier_coverage_10": 0.003437680657953024,
"rewards/frontier_coverage_15": 0.003437680657953024,
"rewards/frontier_coverage_20": 0.003437680657953024,
"rewards/frontier_coverage_25": 0.003437680657953024,
"rewards/frontier_coverage_5": 0.003437680657953024,
"rewards/true_frontier_ece_gap_only_reward": -0.009969686530530453,
"signal/accuracy_reward/centered_abs_mean": 0.16082899123430253,
"signal/accuracy_reward/group_std_mean": 0.2069980025291443,
"signal/accuracy_reward/group_zero_std_frac": 0.4277777850627899,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08041449561715126,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08041449561715126,
"signal/advantage_abs_mean": 0.09912077933549882,
"signal/advantage_pre_scale_abs_mean": 0.09912077933549882,
"signal/advantage_pre_scale_std": 0.1765537291765213,
"signal/advantage_std": 0.1765537291765213,
"signal/brier_reward/centered_abs_mean": 0.11570018827915192,
"signal/brier_reward/group_std_mean": 0.151495760679245,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01446252353489399,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01446252353489399,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04762213602662087,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07307658642530442,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0059527670033276085,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0059527670033276085,
"signal/format_reward/centered_abs_mean": 0.02702907994389534,
"signal/format_reward/group_std_mean": 0.04844924733042717,
"signal/format_reward/group_zero_std_frac": 0.8111111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01351453997194767,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01351453997194767,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016357111278921365,
"signal/frontier_aurc_reward/group_std_mean": 0.002513893973082304,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5557986373314633e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5557986373314633e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.09530313909053803,
"signal/frontier_coverage_0/group_std_mean": 0.12956467568874358,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014891115482896567,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014891115482896567,
"signal/frontier_coverage_1/centered_abs_mean": 0.09530313909053803,
"signal/frontier_coverage_1/group_std_mean": 0.12956467568874358,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014891115482896567,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014891115482896567,
"signal/frontier_coverage_10/centered_abs_mean": 0.09530313909053803,
"signal/frontier_coverage_10/group_std_mean": 0.12956467568874358,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014891115482896567,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014891115482896567,
"signal/frontier_coverage_15/centered_abs_mean": 0.09530313909053803,
"signal/frontier_coverage_15/group_std_mean": 0.12956467568874358,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014891115482896567,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014891115482896567,
"signal/frontier_coverage_20/centered_abs_mean": 0.09530313909053803,
"signal/frontier_coverage_20/group_std_mean": 0.12956467568874358,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014891115482896567,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014891115482896567,
"signal/frontier_coverage_25/centered_abs_mean": 0.09530313909053803,
"signal/frontier_coverage_25/group_std_mean": 0.12956467568874358,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014891115482896567,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014891115482896567,
"signal/frontier_coverage_5/centered_abs_mean": 0.09530313909053803,
"signal/frontier_coverage_5/group_std_mean": 0.12956467568874358,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014891115482896567,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014891115482896567,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.00945689920336008,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.01578503046184778,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00118211240042001,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00118211240042001,
"step": 115
},
{
"calibration/aurc": 0.2829972784176247,
"calibration/batch_distribution_entropy": 0.7978632783074413,
"calibration/batch_entropy_100bins": 0.8326413576750176,
"calibration/batch_entropy_10bins": 0.7978632783074413,
"calibration/batch_entropy_50bins": 0.8484370752003694,
"calibration/batch_uniqueness": 0.9428754810521134,
"calibration/buffer_distribution_entropy": 0.8256495412935981,
"calibration/buffer_entropy_100bins": 0.8198335167796709,
"calibration/buffer_entropy_10bins": 0.8256495412935981,
"calibration/buffer_entropy_50bins": 0.8499243478716128,
"calibration/confidence_entropy": 0.5454490383451711,
"calibration/coverage@0%": 0.024511889179755674,
"calibration/coverage@1%": 0.024511889179755674,
"calibration/coverage@10%": 0.18489583333333334,
"calibration/coverage@15%": 0.3580170157068063,
"calibration/coverage@20%": 0.47328206806282724,
"calibration/coverage@25%": 0.5411131108202444,
"calibration/coverage@30%": 0.5953125,
"calibration/coverage@5%": 0.04899105584642234,
"calibration/distribution_entropy_10": 0.7978632783074413,
"calibration/distribution_entropy_100": 0.8326413576750176,
"calibration/ece": 0.14527504044433004,
"calibration/mean_confidence": 0.6648941085090072,
"calibration/unique_confidence_per_question": 0.2046875,
"calibration/unique_confidences": 78.6,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011024305555555558,
"completions/max_length": 3318.6,
"completions/max_terminated_length": 3318.6,
"completions/mean_length": 686.3271728515625,
"completions/mean_terminated_length": 693.9168334960938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 229.2,
"epoch": 0.28799640004499943,
"grad_norm": 0.0003715228522196412,
"learning_rate": 2.6506024096385547e-06,
"loss": -0.009,
"num_tokens": 252102708.0,
"reward": 1.048642134666443,
"reward_std": 0.12622617483139037,
"rewards/accuracy_reward": 0.6764756917953492,
"rewards/brier_reward": 0.8032851457595825,
"rewards/confidence_uniqueness_reward": 0.9316941499710083,
"rewards/format_reward": 0.9888888835906983,
"rewards/frontier_aurc_reward": -0.001726908260025084,
"rewards/frontier_coverage_0": 0.0026118648587726057,
"rewards/frontier_coverage_1": 0.0026118648587726057,
"rewards/frontier_coverage_10": 0.0026118648587726057,
"rewards/frontier_coverage_15": 0.0026118648587726057,
"rewards/frontier_coverage_20": 0.0026118648587726057,
"rewards/frontier_coverage_25": 0.0026118648587726057,
"rewards/frontier_coverage_5": 0.0026118648587726057,
"rewards/true_frontier_ece_gap_only_reward": -0.009370057098567485,
"signal/accuracy_reward/centered_abs_mean": 0.16347113847732545,
"signal/accuracy_reward/group_std_mean": 0.21460457444190978,
"signal/accuracy_reward/group_zero_std_frac": 0.38888890147209165,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08173556923866272,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08173556923866272,
"signal/advantage_abs_mean": 0.09389262199401856,
"signal/advantage_pre_scale_abs_mean": 0.09389262199401856,
"signal/advantage_pre_scale_std": 0.16233381628990173,
"signal/advantage_std": 0.16233381628990173,
"signal/brier_reward/centered_abs_mean": 0.10937037020921707,
"signal/brier_reward/group_std_mean": 0.14254879355430602,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013671296276152134,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013671296276152134,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03843596838414669,
"signal/confidence_uniqueness_reward/group_std_mean": 0.057198996841907504,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004804496048018336,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004804496048018336,
"signal/format_reward/centered_abs_mean": 0.017404513992369176,
"signal/format_reward/group_std_mean": 0.032134901732206345,
"signal/format_reward/group_zero_std_frac": 0.8666666626930237,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008702256996184588,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008702256996184588,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014132563257589937,
"signal/frontier_aurc_reward/group_std_mean": 0.0021206842735409736,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2082130089984276e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2082130089984276e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.10615848153829574,
"signal/frontier_coverage_0/group_std_mean": 0.14436171054840088,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001658726274035871,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001658726274035871,
"signal/frontier_coverage_1/centered_abs_mean": 0.10615848153829574,
"signal/frontier_coverage_1/group_std_mean": 0.14436171054840088,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001658726274035871,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001658726274035871,
"signal/frontier_coverage_10/centered_abs_mean": 0.10615848153829574,
"signal/frontier_coverage_10/group_std_mean": 0.14436171054840088,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001658726274035871,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001658726274035871,
"signal/frontier_coverage_15/centered_abs_mean": 0.10615848153829574,
"signal/frontier_coverage_15/group_std_mean": 0.14436171054840088,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001658726274035871,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001658726274035871,
"signal/frontier_coverage_20/centered_abs_mean": 0.10615848153829574,
"signal/frontier_coverage_20/group_std_mean": 0.14436171054840088,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001658726274035871,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001658726274035871,
"signal/frontier_coverage_25/centered_abs_mean": 0.10615848153829574,
"signal/frontier_coverage_25/group_std_mean": 0.14436171054840088,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001658726274035871,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001658726274035871,
"signal/frontier_coverage_5/centered_abs_mean": 0.10615848153829574,
"signal/frontier_coverage_5/group_std_mean": 0.14436171054840088,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001658726274035871,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001658726274035871,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.008557920716702938,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.014681273698806762,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0010697400895878673,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0010697400895878673,
"step": 120
},
{
"calibration/aurc": 0.19671941465770884,
"calibration/batch_distribution_entropy": 0.714069251213561,
"calibration/batch_entropy_100bins": 0.79093280229107,
"calibration/batch_entropy_10bins": 0.714069251213561,
"calibration/batch_entropy_50bins": 0.7976220273158751,
"calibration/batch_uniqueness": 0.9272707791642439,
"calibration/buffer_distribution_entropy": 0.8283623145695502,
"calibration/buffer_entropy_100bins": 0.8261738795237367,
"calibration/buffer_entropy_10bins": 0.8283623145695502,
"calibration/buffer_entropy_50bins": 0.8545030151086038,
"calibration/confidence_entropy": 0.5104855330872758,
"calibration/coverage@0%": 0.014166491343532641,
"calibration/coverage@1%": 0.014166491343532641,
"calibration/coverage@10%": 0.31885399134353265,
"calibration/coverage@15%": 0.3815390808752131,
"calibration/coverage@20%": 0.4841432475418797,
"calibration/coverage@25%": 0.8064810181594071,
"calibration/coverage@30%": 0.9008741303877912,
"calibration/coverage@5%": 0.014166491343532641,
"calibration/distribution_entropy_10": 0.714069251213561,
"calibration/distribution_entropy_100": 0.79093280229107,
"calibration/ece": 0.09618043935237061,
"calibration/mean_confidence": 0.7132224626554917,
"calibration/unique_confidence_per_question": 0.1828125,
"calibration/unique_confidences": 70.2,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012239583333333304,
"completions/max_length": 3284.4,
"completions/max_terminated_length": 3284.4,
"completions/mean_length": 691.3783081054687,
"completions/mean_terminated_length": 700.0879638671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 221.8,
"epoch": 0.2999962500468744,
"grad_norm": 0.00044733521644957364,
"learning_rate": 2.5e-06,
"loss": -0.0113,
"num_tokens": 263185050.0,
"reward": 1.055611777305603,
"reward_std": 0.1275094196200371,
"rewards/accuracy_reward": 0.6895833253860474,
"rewards/brier_reward": 0.8160496830940247,
"rewards/confidence_uniqueness_reward": 0.92153559923172,
"rewards/format_reward": 0.9876736164093017,
"rewards/frontier_aurc_reward": -0.0017230862518772482,
"rewards/frontier_coverage_0": 0.01225762339308858,
"rewards/frontier_coverage_1": 0.01225762339308858,
"rewards/frontier_coverage_10": 0.01225762339308858,
"rewards/frontier_coverage_15": 0.01225762339308858,
"rewards/frontier_coverage_20": 0.01225762339308858,
"rewards/frontier_coverage_25": 0.01225762339308858,
"rewards/frontier_coverage_5": 0.01225762339308858,
"rewards/true_frontier_ece_gap_only_reward": -0.012229060940444469,
"signal/accuracy_reward/centered_abs_mean": 0.16138237714767456,
"signal/accuracy_reward/group_std_mean": 0.21225160956382752,
"signal/accuracy_reward/group_zero_std_frac": 0.397222226858139,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08069118857383728,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08069118857383728,
"signal/advantage_abs_mean": 0.0945181205868721,
"signal/advantage_pre_scale_abs_mean": 0.0945181205868721,
"signal/advantage_pre_scale_std": 0.1663988560438156,
"signal/advantage_std": 0.1663988560438156,
"signal/brier_reward/centered_abs_mean": 0.11373110711574555,
"signal/brier_reward/group_std_mean": 0.1483635872602463,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014216388389468194,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014216388389468194,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04427947551012039,
"signal/confidence_uniqueness_reward/group_std_mean": 0.062184395641088484,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005534934438765049,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005534934438765049,
"signal/format_reward/centered_abs_mean": 0.018576388712972403,
"signal/format_reward/group_std_mean": 0.031818334758281705,
"signal/format_reward/group_zero_std_frac": 0.8777777791023255,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009288194356486201,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009288194356486201,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016881852876394986,
"signal/frontier_aurc_reward/group_std_mean": 0.0025606358423829077,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6377895119367166e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6377895119367166e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.0979716956615448,
"signal/frontier_coverage_0/group_std_mean": 0.1332421526312828,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015308077447116375,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0015308077447116375,
"signal/frontier_coverage_1/centered_abs_mean": 0.0979716956615448,
"signal/frontier_coverage_1/group_std_mean": 0.1332421526312828,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015308077447116375,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015308077447116375,
"signal/frontier_coverage_10/centered_abs_mean": 0.0979716956615448,
"signal/frontier_coverage_10/group_std_mean": 0.1332421526312828,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015308077447116375,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015308077447116375,
"signal/frontier_coverage_15/centered_abs_mean": 0.0979716956615448,
"signal/frontier_coverage_15/group_std_mean": 0.1332421526312828,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015308077447116375,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015308077447116375,
"signal/frontier_coverage_20/centered_abs_mean": 0.0979716956615448,
"signal/frontier_coverage_20/group_std_mean": 0.1332421526312828,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015308077447116375,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015308077447116375,
"signal/frontier_coverage_25/centered_abs_mean": 0.0979716956615448,
"signal/frontier_coverage_25/group_std_mean": 0.1332421526312828,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015308077447116375,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015308077447116375,
"signal/frontier_coverage_5/centered_abs_mean": 0.0979716956615448,
"signal/frontier_coverage_5/group_std_mean": 0.1332421526312828,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015308077447116375,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015308077447116375,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.01217461358755827,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.02020731884986162,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0015218266984447838,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0015218266984447838,
"step": 125
},
{
"calibration/aurc": 0.26231922913161954,
"calibration/batch_distribution_entropy": 0.7849330143793635,
"calibration/batch_entropy_100bins": 0.8313458268638213,
"calibration/batch_entropy_10bins": 0.7849330143793635,
"calibration/batch_entropy_50bins": 0.8469249993347873,
"calibration/batch_uniqueness": 0.9375108076362693,
"calibration/buffer_distribution_entropy": 0.8294780083702189,
"calibration/buffer_entropy_100bins": 0.8315937057293505,
"calibration/buffer_entropy_10bins": 0.8294780083702189,
"calibration/buffer_entropy_50bins": 0.8582865661934059,
"calibration/confidence_entropy": 0.5124842329428232,
"calibration/coverage@0%": 0.012623205773998373,
"calibration/coverage@1%": 0.012623205773998373,
"calibration/coverage@10%": 0.1614250792927287,
"calibration/coverage@15%": 0.2572956123196314,
"calibration/coverage@20%": 0.3132771916457143,
"calibration/coverage@25%": 0.43678116869105654,
"calibration/coverage@30%": 0.8075378762952686,
"calibration/coverage@5%": 0.022596959054838266,
"calibration/distribution_entropy_10": 0.7849330143793635,
"calibration/distribution_entropy_100": 0.8313458268638213,
"calibration/ece": 0.12743665254843256,
"calibration/mean_confidence": 0.6790373502611695,
"calibration/unique_confidence_per_question": 0.21770833333333334,
"calibration/unique_confidences": 83.6,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017795138888888885,
"completions/max_length": 3700.6,
"completions/max_terminated_length": 3700.6,
"completions/mean_length": 712.4034790039062,
"completions/mean_terminated_length": 725.374267578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 191.2,
"epoch": 0.3119961000487494,
"grad_norm": 0.00043796157115139067,
"learning_rate": 2.349397590361446e-06,
"loss": -0.0143,
"num_tokens": 274516738.0,
"reward": 1.0368208050727845,
"reward_std": 0.13528352975845337,
"rewards/accuracy_reward": 0.6607638716697692,
"rewards/brier_reward": 0.8013606786727905,
"rewards/confidence_uniqueness_reward": 0.9176180601119995,
"rewards/format_reward": 0.9821180462837219,
"rewards/frontier_aurc_reward": -0.001984483632259071,
"rewards/frontier_coverage_0": 0.01965160174295306,
"rewards/frontier_coverage_1": 0.01965160174295306,
"rewards/frontier_coverage_10": 0.01965160174295306,
"rewards/frontier_coverage_15": 0.01965160174295306,
"rewards/frontier_coverage_20": 0.01965160174295306,
"rewards/frontier_coverage_25": 0.01965160174295306,
"rewards/frontier_coverage_5": 0.01965160174295306,
"rewards/true_frontier_ece_gap_only_reward": -0.012887386418879033,
"signal/accuracy_reward/centered_abs_mean": 0.1666666716337204,
"signal/accuracy_reward/group_std_mean": 0.21933417916297912,
"signal/accuracy_reward/group_zero_std_frac": 0.3805555641651154,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0833333358168602,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0833333358168602,
"signal/advantage_abs_mean": 0.09987292736768723,
"signal/advantage_pre_scale_abs_mean": 0.09987292736768723,
"signal/advantage_pre_scale_std": 0.17289304733276367,
"signal/advantage_std": 0.17289304733276367,
"signal/brier_reward/centered_abs_mean": 0.11932021975517274,
"signal/brier_reward/group_std_mean": 0.15680161118507385,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014915027469396592,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014915027469396592,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.049394051730632785,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07107506543397904,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006174256466329098,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006174256466329098,
"signal/format_reward/centered_abs_mean": 0.02468533031642437,
"signal/format_reward/group_std_mean": 0.04145882315933704,
"signal/format_reward/group_zero_std_frac": 0.8416666746139526,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012342665158212186,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012342665158212186,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018592241685837507,
"signal/frontier_aurc_reward/group_std_mean": 0.002899319725111127,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9050377634121105e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9050377634121105e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.10627900362014771,
"signal/frontier_coverage_0/group_std_mean": 0.14542074501514435,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001660609431564808,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001660609431564808,
"signal/frontier_coverage_1/centered_abs_mean": 0.10627900362014771,
"signal/frontier_coverage_1/group_std_mean": 0.14542074501514435,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001660609431564808,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001660609431564808,
"signal/frontier_coverage_10/centered_abs_mean": 0.10627900362014771,
"signal/frontier_coverage_10/group_std_mean": 0.14542074501514435,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001660609431564808,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001660609431564808,
"signal/frontier_coverage_15/centered_abs_mean": 0.10627900362014771,
"signal/frontier_coverage_15/group_std_mean": 0.14542074501514435,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001660609431564808,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001660609431564808,
"signal/frontier_coverage_20/centered_abs_mean": 0.10627900362014771,
"signal/frontier_coverage_20/group_std_mean": 0.14542074501514435,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001660609431564808,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001660609431564808,
"signal/frontier_coverage_25/centered_abs_mean": 0.10627900362014771,
"signal/frontier_coverage_25/group_std_mean": 0.14542074501514435,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001660609431564808,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001660609431564808,
"signal/frontier_coverage_5/centered_abs_mean": 0.10627900362014771,
"signal/frontier_coverage_5/group_std_mean": 0.14542074501514435,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001660609431564808,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001660609431564808,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.013442159257829189,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.02378322519361973,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0016802699072286486,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0016802699072286486,
"step": 130
},
{
"calibration/aurc": 0.2100724966240294,
"calibration/batch_distribution_entropy": 0.7932919126000467,
"calibration/batch_entropy_100bins": 0.8307378530163175,
"calibration/batch_entropy_10bins": 0.7932919126000467,
"calibration/batch_entropy_50bins": 0.847089723931882,
"calibration/batch_uniqueness": 0.9421351893583964,
"calibration/buffer_distribution_entropy": 0.8324469338084997,
"calibration/buffer_entropy_100bins": 0.8370055455965371,
"calibration/buffer_entropy_10bins": 0.8324469338084997,
"calibration/buffer_entropy_50bins": 0.862450153507045,
"calibration/confidence_entropy": 0.5360752773461647,
"calibration/coverage@0%": 0.019922239322602862,
"calibration/coverage@1%": 0.019922239322602862,
"calibration/coverage@10%": 0.3430041437433335,
"calibration/coverage@15%": 0.35355823345309606,
"calibration/coverage@20%": 0.4321721670970639,
"calibration/coverage@25%": 0.6357222721233556,
"calibration/coverage@30%": 0.7409831478414641,
"calibration/coverage@5%": 0.30943497381008883,
"calibration/distribution_entropy_10": 0.7932919126000467,
"calibration/distribution_entropy_100": 0.8307378530163175,
"calibration/ece": 0.13974870453897614,
"calibration/mean_confidence": 0.6561501450334012,
"calibration/unique_confidence_per_question": 0.19531250000000003,
"calibration/unique_confidences": 75.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0109375,
"completions/max_length": 3228.4,
"completions/max_terminated_length": 3228.4,
"completions/mean_length": 692.0186767578125,
"completions/mean_terminated_length": 699.7394653320313,
"completions/min_length": 0.0,
"completions/min_terminated_length": 181.8,
"epoch": 0.32399595005062437,
"grad_norm": 0.0004282255540601909,
"learning_rate": 2.1987951807228917e-06,
"loss": -0.0093,
"num_tokens": 285581817.0,
"reward": 1.054863166809082,
"reward_std": 0.12371634542942048,
"rewards/accuracy_reward": 0.6837673544883728,
"rewards/brier_reward": 0.8118860840797424,
"rewards/confidence_uniqueness_reward": 0.9371923685073853,
"rewards/format_reward": 0.9890625,
"rewards/frontier_aurc_reward": -0.0013375790789723397,
"rewards/frontier_coverage_0": 0.006732956040650606,
"rewards/frontier_coverage_1": 0.006732956040650606,
"rewards/frontier_coverage_10": 0.006732956040650606,
"rewards/frontier_coverage_15": 0.006732956040650606,
"rewards/frontier_coverage_20": 0.006732956040650606,
"rewards/frontier_coverage_25": 0.006732956040650606,
"rewards/frontier_coverage_5": 0.006732956040650606,
"rewards/true_frontier_ece_gap_only_reward": -0.007217477634549141,
"signal/accuracy_reward/centered_abs_mean": 0.15847981721162796,
"signal/accuracy_reward/group_std_mean": 0.21153208017349243,
"signal/accuracy_reward/group_zero_std_frac": 0.38888888955116274,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07923990860581398,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07923990860581398,
"signal/advantage_abs_mean": 0.08863486796617508,
"signal/advantage_pre_scale_abs_mean": 0.08863486796617508,
"signal/advantage_pre_scale_std": 0.15808248221874238,
"signal/advantage_std": 0.15808248221874238,
"signal/brier_reward/centered_abs_mean": 0.11114487051963806,
"signal/brier_reward/group_std_mean": 0.14610919654369353,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013893108814954758,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013893108814954758,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03625557161867619,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05811716765165329,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004531946452334523,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004531946452334523,
"signal/format_reward/centered_abs_mean": 0.01872829869389534,
"signal/format_reward/group_std_mean": 0.036873598024249075,
"signal/format_reward/group_zero_std_frac": 0.8416666626930237,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00936414934694767,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00936414934694767,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011374737368896604,
"signal/frontier_aurc_reward/group_std_mean": 0.0018357637338340283,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7773027138900943e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7773027138900943e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.12717922925949096,
"signal/frontier_coverage_0/group_std_mean": 0.17108558714389802,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019871754571795463,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019871754571795463,
"signal/frontier_coverage_1/centered_abs_mean": 0.12717922925949096,
"signal/frontier_coverage_1/group_std_mean": 0.17108558714389802,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019871754571795463,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019871754571795463,
"signal/frontier_coverage_10/centered_abs_mean": 0.12717922925949096,
"signal/frontier_coverage_10/group_std_mean": 0.17108558714389802,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019871754571795463,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019871754571795463,
"signal/frontier_coverage_15/centered_abs_mean": 0.12717922925949096,
"signal/frontier_coverage_15/group_std_mean": 0.17108558714389802,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019871754571795463,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019871754571795463,
"signal/frontier_coverage_20/centered_abs_mean": 0.12717922925949096,
"signal/frontier_coverage_20/group_std_mean": 0.17108558714389802,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019871754571795463,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019871754571795463,
"signal/frontier_coverage_25/centered_abs_mean": 0.12717922925949096,
"signal/frontier_coverage_25/group_std_mean": 0.17108558714389802,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019871754571795463,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019871754571795463,
"signal/frontier_coverage_5/centered_abs_mean": 0.12717922925949096,
"signal/frontier_coverage_5/group_std_mean": 0.17108558714389802,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019871754571795463,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019871754571795463,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.006792500615119934,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.012135511264204979,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0008490625768899918,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0008490625768899918,
"step": 135
},
{
"calibration/aurc": 0.12316983748625196,
"calibration/batch_distribution_entropy": 0.8033321196483671,
"calibration/batch_entropy_100bins": 0.840970506147876,
"calibration/batch_entropy_10bins": 0.8033321196483671,
"calibration/batch_entropy_50bins": 0.8595641216344057,
"calibration/batch_uniqueness": 0.9441387779910293,
"calibration/buffer_distribution_entropy": 0.8356524945448074,
"calibration/buffer_entropy_100bins": 0.844583354188865,
"calibration/buffer_entropy_10bins": 0.8356524945448074,
"calibration/buffer_entropy_50bins": 0.8676924896189366,
"calibration/confidence_entropy": 0.5414429455483087,
"calibration/coverage@0%": 0.024367036493294764,
"calibration/coverage@1%": 0.024367036493294764,
"calibration/coverage@10%": 0.5015683248215262,
"calibration/coverage@15%": 0.7200879000764464,
"calibration/coverage@20%": 0.806558927378318,
"calibration/coverage@25%": 0.887908015749872,
"calibration/coverage@30%": 0.9627324607905436,
"calibration/coverage@5%": 0.21789834997139393,
"calibration/distribution_entropy_10": 0.8033321196483671,
"calibration/distribution_entropy_100": 0.840970506147876,
"calibration/ece": 0.12557287187845612,
"calibration/mean_confidence": 0.6414325364057707,
"calibration/unique_confidence_per_question": 0.20520833333333335,
"calibration/unique_confidences": 78.8,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012847222222222232,
"completions/max_length": 3215.0,
"completions/max_terminated_length": 3215.0,
"completions/mean_length": 675.0425415039062,
"completions/mean_terminated_length": 683.766796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 194.2,
"epoch": 0.33599580005249935,
"grad_norm": 0.0003816323878709227,
"learning_rate": 2.0481927710843377e-06,
"loss": -0.0113,
"num_tokens": 296462531.0,
"reward": 1.0525088548660277,
"reward_std": 0.12087329030036927,
"rewards/accuracy_reward": 0.681163203716278,
"rewards/brier_reward": 0.8102995872497558,
"rewards/confidence_uniqueness_reward": 0.9345194458961487,
"rewards/format_reward": 0.9871527791023255,
"rewards/frontier_aurc_reward": -0.0013744331081397831,
"rewards/frontier_coverage_0": 0.00963379731401801,
"rewards/frontier_coverage_1": 0.00963379731401801,
"rewards/frontier_coverage_10": 0.00963379731401801,
"rewards/frontier_coverage_15": 0.00963379731401801,
"rewards/frontier_coverage_20": 0.00963379731401801,
"rewards/frontier_coverage_25": 0.00963379731401801,
"rewards/frontier_coverage_5": 0.00963379731401801,
"rewards/true_frontier_ece_gap_only_reward": -0.006269952561706304,
"signal/accuracy_reward/centered_abs_mean": 0.14478624165058135,
"signal/accuracy_reward/group_std_mean": 0.19590498208999635,
"signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07239312082529067,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07239312082529067,
"signal/advantage_abs_mean": 0.08584622740745544,
"signal/advantage_pre_scale_abs_mean": 0.08584622740745544,
"signal/advantage_pre_scale_std": 0.15582461655139923,
"signal/advantage_std": 0.15582461655139923,
"signal/brier_reward/centered_abs_mean": 0.11108436435461044,
"signal/brier_reward/group_std_mean": 0.14652538895606995,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013885545544326305,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013885545544326305,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.039488519355654714,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06021577715873718,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004936064919456839,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004936064919456839,
"signal/format_reward/centered_abs_mean": 0.020616319216787814,
"signal/format_reward/group_std_mean": 0.037392809987068176,
"signal/format_reward/group_zero_std_frac": 0.85,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010308159608393907,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010308159608393907,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012093130266293884,
"signal/frontier_aurc_reward/group_std_mean": 0.0018805687082931398,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8895516041084194e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8895516041084194e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.12452564984560013,
"signal/frontier_coverage_0/group_std_mean": 0.17022224068641661,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001945713278837502,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001945713278837502,
"signal/frontier_coverage_1/centered_abs_mean": 0.12452564984560013,
"signal/frontier_coverage_1/group_std_mean": 0.17022224068641661,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001945713278837502,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001945713278837502,
"signal/frontier_coverage_10/centered_abs_mean": 0.12452564984560013,
"signal/frontier_coverage_10/group_std_mean": 0.17022224068641661,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001945713278837502,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001945713278837502,
"signal/frontier_coverage_15/centered_abs_mean": 0.12452564984560013,
"signal/frontier_coverage_15/group_std_mean": 0.17022224068641661,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001945713278837502,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001945713278837502,
"signal/frontier_coverage_20/centered_abs_mean": 0.12452564984560013,
"signal/frontier_coverage_20/group_std_mean": 0.17022224068641661,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001945713278837502,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001945713278837502,
"signal/frontier_coverage_25/centered_abs_mean": 0.12452564984560013,
"signal/frontier_coverage_25/group_std_mean": 0.17022224068641661,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001945713278837502,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001945713278837502,
"signal/frontier_coverage_5/centered_abs_mean": 0.12452564984560013,
"signal/frontier_coverage_5/group_std_mean": 0.17022224068641661,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001945713278837502,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001945713278837502,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0054856881499290465,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.00944354822859168,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0006857110187411308,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0006857110187411308,
"step": 140
},
{
"calibration/aurc": 0.1707190282167193,
"calibration/batch_distribution_entropy": 0.7870297352600305,
"calibration/batch_entropy_100bins": 0.8287951367704729,
"calibration/batch_entropy_10bins": 0.7870297352600305,
"calibration/batch_entropy_50bins": 0.8444396351033697,
"calibration/batch_uniqueness": 0.9381528864738238,
"calibration/buffer_distribution_entropy": 0.836454052730403,
"calibration/buffer_entropy_100bins": 0.8535101399161015,
"calibration/buffer_entropy_10bins": 0.836454052730403,
"calibration/buffer_entropy_50bins": 0.8728050714670547,
"calibration/confidence_entropy": 0.5240414918619417,
"calibration/coverage@0%": 0.026351947388887843,
"calibration/coverage@1%": 0.026351947388887843,
"calibration/coverage@10%": 0.41923410430351626,
"calibration/coverage@15%": 0.5314265164218459,
"calibration/coverage@20%": 0.620989570820157,
"calibration/coverage@25%": 0.751987830680877,
"calibration/coverage@30%": 0.8425736230097444,
"calibration/coverage@5%": 0.19100326031606285,
"calibration/distribution_entropy_10": 0.7870297352600305,
"calibration/distribution_entropy_100": 0.8287951367704729,
"calibration/ece": 0.1261507501639957,
"calibration/mean_confidence": 0.6589088571731689,
"calibration/unique_confidence_per_question": 0.20416666666666666,
"calibration/unique_confidences": 78.4,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01015625,
"completions/max_length": 3217.4,
"completions/max_terminated_length": 3217.4,
"completions/mean_length": 651.4529541015625,
"completions/mean_terminated_length": 658.1746948242187,
"completions/min_length": 0.0,
"completions/min_terminated_length": 206.8,
"epoch": 0.34799565005437433,
"grad_norm": 0.0004082749364897609,
"learning_rate": 1.8975903614457832e-06,
"loss": -0.0092,
"num_tokens": 307031877.0,
"reward": 1.0682907581329346,
"reward_std": 0.11585188210010529,
"rewards/accuracy_reward": 0.7100694179534912,
"rewards/brier_reward": 0.8259410500526428,
"rewards/confidence_uniqueness_reward": 0.9193386673927307,
"rewards/format_reward": 0.9896701335906982,
"rewards/frontier_aurc_reward": -0.0016615271219052375,
"rewards/frontier_coverage_0": 0.010000471444800495,
"rewards/frontier_coverage_1": 0.010000471444800495,
"rewards/frontier_coverage_10": 0.010000471444800495,
"rewards/frontier_coverage_15": 0.010000471444800495,
"rewards/frontier_coverage_20": 0.010000471444800495,
"rewards/frontier_coverage_25": 0.01015151059255004,
"rewards/frontier_coverage_5": 0.010000471444800495,
"rewards/true_frontier_ece_gap_only_reward": -0.006473575532436371,
"signal/accuracy_reward/centered_abs_mean": 0.1404513895511627,
"signal/accuracy_reward/group_std_mean": 0.19066681563854218,
"signal/accuracy_reward/group_zero_std_frac": 0.44166667461395265,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07022569477558135,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07022569477558135,
"signal/advantage_abs_mean": 0.08215740621089936,
"signal/advantage_pre_scale_abs_mean": 0.08215740621089936,
"signal/advantage_pre_scale_std": 0.15350556373596191,
"signal/advantage_std": 0.15350556373596191,
"signal/brier_reward/centered_abs_mean": 0.10650975555181504,
"signal/brier_reward/group_std_mean": 0.13953691720962524,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01331371944397688,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01331371944397688,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04581173062324524,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06800653263926507,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005726466327905655,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005726466327905655,
"signal/format_reward/centered_abs_mean": 0.01798502616584301,
"signal/format_reward/group_std_mean": 0.03467189371585846,
"signal/format_reward/group_zero_std_frac": 0.8555555701255798,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008992513082921505,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008992513082921505,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017546760383993388,
"signal/frontier_aurc_reward/group_std_mean": 0.0026522258296608923,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7416813099989668e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7416813099989668e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.10596181005239487,
"signal/frontier_coverage_0/group_std_mean": 0.1449078232049942,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016556532820686698,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016556532820686698,
"signal/frontier_coverage_1/centered_abs_mean": 0.10596181005239487,
"signal/frontier_coverage_1/group_std_mean": 0.1449078232049942,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016556532820686698,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016556532820686698,
"signal/frontier_coverage_10/centered_abs_mean": 0.10596181005239487,
"signal/frontier_coverage_10/group_std_mean": 0.1449078232049942,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016556532820686698,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016556532820686698,
"signal/frontier_coverage_15/centered_abs_mean": 0.10596181005239487,
"signal/frontier_coverage_15/group_std_mean": 0.1449078232049942,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016556532820686698,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016556532820686698,
"signal/frontier_coverage_20/centered_abs_mean": 0.10596181005239487,
"signal/frontier_coverage_20/group_std_mean": 0.1449078232049942,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016556532820686698,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016556532820686698,
"signal/frontier_coverage_25/centered_abs_mean": 0.1005162313580513,
"signal/frontier_coverage_25/group_std_mean": 0.13775036633014678,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015705661149695515,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015705661149695515,
"signal/frontier_coverage_5/centered_abs_mean": 0.10596181005239487,
"signal/frontier_coverage_5/group_std_mean": 0.1449078232049942,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016556532820686698,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016556532820686698,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.005957813002169132,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.011087938956916333,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0007447266252711415,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0007447266252711415,
"step": 145
},
{
"calibration/aurc": 0.18209448578532905,
"calibration/batch_distribution_entropy": 0.7402098532235222,
"calibration/batch_entropy_100bins": 0.8083018729096372,
"calibration/batch_entropy_10bins": 0.7402098532235222,
"calibration/batch_entropy_50bins": 0.8191539259469073,
"calibration/batch_uniqueness": 0.9301449071354948,
"calibration/buffer_distribution_entropy": 0.8311244629730232,
"calibration/buffer_entropy_100bins": 0.8587688993364191,
"calibration/buffer_entropy_10bins": 0.8311244629730232,
"calibration/buffer_entropy_50bins": 0.8739160708252068,
"calibration/confidence_entropy": 0.5100700509903451,
"calibration/coverage@0%": 0.004201001640614507,
"calibration/coverage@1%": 0.004201001640614507,
"calibration/coverage@10%": 0.34029064401297937,
"calibration/coverage@15%": 0.47469158826861174,
"calibration/coverage@20%": 0.5657170493820617,
"calibration/coverage@25%": 0.7275975167531156,
"calibration/coverage@30%": 0.8418513621822916,
"calibration/coverage@5%": 0.19152198006106938,
"calibration/distribution_entropy_10": 0.7402098532235222,
"calibration/distribution_entropy_100": 0.8083018729096372,
"calibration/ece": 0.12855990872542672,
"calibration/mean_confidence": 0.6749319072008179,
"calibration/unique_confidence_per_question": 0.19739583333333333,
"calibration/unique_confidences": 75.8,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009635416666666674,
"completions/max_length": 3069.4,
"completions/max_terminated_length": 3069.4,
"completions/mean_length": 702.6961791992187,
"completions/mean_terminated_length": 709.4884643554688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 192.8,
"epoch": 0.3599955000562493,
"grad_norm": 0.0004130478191655129,
"learning_rate": 1.7469879518072292e-06,
"loss": -0.0071,
"num_tokens": 318237273.0,
"reward": 1.0614855289459229,
"reward_std": 0.1201270878314972,
"rewards/accuracy_reward": 0.6957465291023255,
"rewards/brier_reward": 0.8208768486976623,
"rewards/confidence_uniqueness_reward": 0.9183289051055908,
"rewards/format_reward": 0.9903645873069763,
"rewards/frontier_aurc_reward": -0.0020163535373285413,
"rewards/frontier_coverage_0": 0.013487431593239308,
"rewards/frontier_coverage_1": 0.013487431593239308,
"rewards/frontier_coverage_10": 0.013487431593239308,
"rewards/frontier_coverage_15": 0.013487431593239308,
"rewards/frontier_coverage_20": 0.012901889439672232,
"rewards/frontier_coverage_25": 0.020031385496258734,
"rewards/frontier_coverage_5": 0.013487431593239308,
"rewards/true_frontier_ece_gap_only_reward": -0.0040603259578347204,
"signal/accuracy_reward/centered_abs_mean": 0.15117729753255843,
"signal/accuracy_reward/group_std_mean": 0.20031063556671141,
"signal/accuracy_reward/group_zero_std_frac": 0.4277777850627899,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07558864876627922,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07558864876627922,
"signal/advantage_abs_mean": 0.08776713460683823,
"signal/advantage_pre_scale_abs_mean": 0.08776713460683823,
"signal/advantage_pre_scale_std": 0.1591554254293442,
"signal/advantage_std": 0.1591554254293442,
"signal/brier_reward/centered_abs_mean": 0.10871631652116776,
"signal/brier_reward/group_std_mean": 0.14245359599590302,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01358953956514597,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01358953956514597,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.042987743765115737,
"signal/confidence_uniqueness_reward/group_std_mean": 0.062032976746559144,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005373467970639467,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005373467970639467,
"signal/format_reward/centered_abs_mean": 0.015554470103234052,
"signal/format_reward/group_std_mean": 0.029216957837343217,
"signal/format_reward/group_zero_std_frac": 0.8805555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007777235051617026,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007777235051617026,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022927817422896623,
"signal/frontier_aurc_reward/group_std_mean": 0.003552949335426092,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.582471472327597e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.582471472327597e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.09796600192785263,
"signal/frontier_coverage_0/group_std_mean": 0.13663864582777024,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015307187801226973,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0015307187801226973,
"signal/frontier_coverage_1/centered_abs_mean": 0.09796600192785263,
"signal/frontier_coverage_1/group_std_mean": 0.13663864582777024,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015307187801226973,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015307187801226973,
"signal/frontier_coverage_10/centered_abs_mean": 0.09796600192785263,
"signal/frontier_coverage_10/group_std_mean": 0.13663864582777024,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015307187801226973,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015307187801226973,
"signal/frontier_coverage_15/centered_abs_mean": 0.09796600192785263,
"signal/frontier_coverage_15/group_std_mean": 0.13663864582777024,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015307187801226973,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015307187801226973,
"signal/frontier_coverage_20/centered_abs_mean": 0.09587855786085128,
"signal/frontier_coverage_20/group_std_mean": 0.13392478972673416,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014981024665758013,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014981024665758013,
"signal/frontier_coverage_25/centered_abs_mean": 0.036509061604738234,
"signal/frontier_coverage_25/group_std_mean": 0.0526451326906681,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005704540875740349,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005704540875740349,
"signal/frontier_coverage_5/centered_abs_mean": 0.09796600192785263,
"signal/frontier_coverage_5/group_std_mean": 0.13663864582777024,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015307187801226973,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015307187801226973,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0036625199019908903,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.006539558339864015,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.002777777798473835,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0004578149877488613,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0004578149877488613,
"step": 150
},
{
"epoch": 0.3599955000562493,
"eval_completions/clipped_ratio": 0.011284722222222229,
"eval_completions/max_length": 2426.8333333333335,
"eval_completions/max_terminated_length": 2426.8333333333335,
"eval_completions/mean_length": 678.6144205729166,
"eval_completions/mean_terminated_length": 686.3630065917969,
"eval_completions/min_length": 51.5,
"eval_completions/min_terminated_length": 242.16666666666666,
"eval_loss": 0.0,
"eval_num_tokens": 318237273.0,
"eval_reward": 1.037120411793391,
"eval_reward_std": 0.26037078599135083,
"eval_rewards/accuracy_reward": 0.6710069278875986,
"eval_rewards/brier_reward": 0.7987116674582163,
"eval_rewards/confidence_uniqueness_reward": 0.864409069220225,
"eval_rewards/format_reward": 0.9861111144224802,
"eval_rewards/frontier_aurc_reward": -0.0020773761401263378,
"eval_rewards/frontier_coverage_0": 0.008917404959599176,
"eval_rewards/frontier_coverage_1": 0.008917404959599176,
"eval_rewards/frontier_coverage_10": 0.008917404959599176,
"eval_rewards/frontier_coverage_15": 0.008917404959599176,
"eval_rewards/frontier_coverage_20": 0.00835825433023274,
"eval_rewards/frontier_coverage_25": 0.019696833721051615,
"eval_rewards/frontier_coverage_5": 0.008917404959599176,
"eval_rewards/true_frontier_ece_gap_only_reward": -0.003450475827169915,
"eval_runtime": 210.1124,
"eval_samples_per_second": 4.759,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4268120676279068,
"eval_signal/accuracy_reward/group_std_mean": 0.46841634809970856,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2134060338139534,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2134060338139534,
"eval_signal/advantage_abs_mean": 0.22498015811045965,
"eval_signal/advantage_pre_scale_abs_mean": 0.22498015811045965,
"eval_signal/advantage_pre_scale_std": 0.2599627524614334,
"eval_signal/advantage_std": 0.2599627524614334,
"eval_signal/brier_reward/centered_abs_mean": 0.19082651287317276,
"eval_signal/brier_reward/group_std_mean": 0.23933010548353195,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023853314109146595,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.023853314109146595,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07184251459936301,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.10839165622989337,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008980314324920377,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008980314324920377,
"eval_signal/format_reward/centered_abs_mean": 0.026475694806625445,
"eval_signal/format_reward/group_std_mean": 0.06660978465030591,
"eval_signal/format_reward/group_zero_std_frac": 0.6666666865348816,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.013237847403312722,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.013237847403312722,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0033070850962152085,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.006110090451935927,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.167320462836263e-05,
"eval_signal/frontier_aurc_reward/weight": 0.015625,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.167320462836263e-05,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.1586559092005094,
"eval_signal/frontier_coverage_0/group_std_mean": 0.24918479472398758,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024789985812579594,
"eval_signal/frontier_coverage_0/weight": 0.015625,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024789985812579594,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.1586559092005094,
"eval_signal/frontier_coverage_1/group_std_mean": 0.24918479472398758,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024789985812579594,
"eval_signal/frontier_coverage_1/weight": 0.015625,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024789985812579594,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.1586559092005094,
"eval_signal/frontier_coverage_10/group_std_mean": 0.24918479472398758,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024789985812579594,
"eval_signal/frontier_coverage_10/weight": 0.015625,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024789985812579594,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.1586559092005094,
"eval_signal/frontier_coverage_15/group_std_mean": 0.24918479472398758,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024789985812579594,
"eval_signal/frontier_coverage_15/weight": 0.015625,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024789985812579594,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.12169866388042767,
"eval_signal/frontier_coverage_20/group_std_mean": 0.19936797271172205,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019015416231316824,
"eval_signal/frontier_coverage_20/weight": 0.015625,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019015416231316824,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.04645684982339541,
"eval_signal/frontier_coverage_25/group_std_mean": 0.07699030389388402,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007258882784905533,
"eval_signal/frontier_coverage_25/weight": 0.015625,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007258882784905533,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.1586559092005094,
"eval_signal/frontier_coverage_5/group_std_mean": 0.24918479472398758,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024789985812579594,
"eval_signal/frontier_coverage_5/weight": 0.015625,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024789985812579594,
"eval_signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0035569225437939167,
"eval_signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.005802453495562077,
"eval_signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"eval_signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0004446153179742396,
"eval_signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"eval_signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0004446153179742396,
"eval_steps_per_second": 0.029,
"step": 150
},
{
"epoch": 0.3599955000562493,
"step": 150,
"train_probe_completions/clipped_ratio": 0.009375000000000003,
"train_probe_completions/max_length": 2373.3333333333335,
"train_probe_completions/max_terminated_length": 2373.3333333333335,
"train_probe_completions/mean_length": 680.1991678873698,
"train_probe_completions/mean_terminated_length": 686.6373596191406,
"train_probe_completions/min_length": 70.66666666666667,
"train_probe_completions/min_terminated_length": 212.5,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 318237273.0,
"train_probe_reward": 1.055055598417918,
"train_probe_reward_std": 0.24644359946250916,
"train_probe_rewards/accuracy_reward": 0.6944444477558136,
"train_probe_rewards/brier_reward": 0.8187563320000967,
"train_probe_rewards/confidence_uniqueness_reward": 0.873372862736384,
"train_probe_rewards/format_reward": 0.9904513855775198,
"train_probe_rewards/frontier_aurc_reward": -0.001900765870232135,
"train_probe_rewards/frontier_coverage_0": 0.012389092764351517,
"train_probe_rewards/frontier_coverage_1": 0.012389092764351517,
"train_probe_rewards/frontier_coverage_10": 0.012389092764351517,
"train_probe_rewards/frontier_coverage_15": 0.012389092764351517,
"train_probe_rewards/frontier_coverage_20": 0.014410387520911172,
"train_probe_rewards/frontier_coverage_25": 0.024321939796209335,
"train_probe_rewards/frontier_coverage_5": 0.012389092764351517,
"train_probe_rewards/true_frontier_ece_gap_only_reward": -0.0036154407619809112,
"train_probe_runtime": 188.9198,
"train_probe_samples_per_second": 5.293,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.4124348958333333,
"train_probe_signal/accuracy_reward/group_std_mean": 0.45987477401892346,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20621744791666666,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.20621744791666666,
"train_probe_signal/advantage_abs_mean": 0.21144999066988626,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.21144999066988626,
"train_probe_signal/advantage_pre_scale_std": 0.24553329994281134,
"train_probe_signal/advantage_std": 0.24553329994281134,
"train_probe_signal/brier_reward/centered_abs_mean": 0.17376654346783957,
"train_probe_signal/brier_reward/group_std_mean": 0.2235363299647967,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021720817933479946,
"train_probe_signal/brier_reward/weight": 0.125,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.021720817933479946,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.060595336059729256,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.09015070833265781,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007574417007466157,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007574417007466157,
"train_probe_signal/format_reward/centered_abs_mean": 0.018283420087148745,
"train_probe_signal/format_reward/group_std_mean": 0.04803628505518039,
"train_probe_signal/format_reward/group_zero_std_frac": 0.7500000149011612,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.009141710043574372,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.009141710043574372,
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.003131849652466675,
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0059242877177894115,
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8935150819791794e-05,
"train_probe_signal/frontier_aurc_reward/weight": 0.015625,
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8935150819791794e-05,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.16411924362182617,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.25143779317537945,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002564363181591034,
"train_probe_signal/frontier_coverage_0/weight": 0.015625,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002564363181591034,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.16411924362182617,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.25143779317537945,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002564363181591034,
"train_probe_signal/frontier_coverage_1/weight": 0.015625,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002564363181591034,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.16411924362182617,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.25143779317537945,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002564363181591034,
"train_probe_signal/frontier_coverage_10/weight": 0.015625,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002564363181591034,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.16411924362182617,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.25143779317537945,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002564363181591034,
"train_probe_signal/frontier_coverage_15/weight": 0.015625,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002564363181591034,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.11266574015220006,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.17991459121306738,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001760402189878126,
"train_probe_signal/frontier_coverage_20/weight": 0.015625,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001760402189878126,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.04643759255607923,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.07232892637451489,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000725587383688738,
"train_probe_signal/frontier_coverage_25/weight": 0.015625,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000725587383688738,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.16411924362182617,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.25143779317537945,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002564363181591034,
"train_probe_signal/frontier_coverage_5/weight": 0.015625,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002564363181591034,
"train_probe_signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0037723184019948044,
"train_probe_signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.00622099117996792,
"train_probe_signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"train_probe_signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00047153980024935055,
"train_probe_signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"train_probe_signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00047153980024935055,
"train_probe_steps_per_second": 0.032
},
{
"calibration/aurc": 0.13391643198760078,
"calibration/batch_distribution_entropy": 0.7948606211172603,
"calibration/batch_entropy_100bins": 0.8368200343599981,
"calibration/batch_entropy_10bins": 0.7948606211172603,
"calibration/batch_entropy_50bins": 0.8510265931141483,
"calibration/batch_uniqueness": 0.944000865460584,
"calibration/buffer_distribution_entropy": 0.8213485685365185,
"calibration/buffer_entropy_100bins": 0.8620536929835996,
"calibration/buffer_entropy_10bins": 0.8213485685365185,
"calibration/buffer_entropy_50bins": 0.8727871263863621,
"calibration/confidence_entropy": 0.5393953875867862,
"calibration/coverage@0%": 0.032740440088172104,
"calibration/coverage@1%": 0.032740440088172104,
"calibration/coverage@10%": 0.3944664512557533,
"calibration/coverage@15%": 0.6908698207865707,
"calibration/coverage@20%": 0.7854961519008106,
"calibration/coverage@25%": 0.8895916777165667,
"calibration/coverage@30%": 0.9335078534031414,
"calibration/coverage@5%": 0.24789239883630465,
"calibration/distribution_entropy_10": 0.7948606211172603,
"calibration/distribution_entropy_100": 0.8368200343599981,
"calibration/ece": 0.1253224171661008,
"calibration/mean_confidence": 0.6584433504347348,
"calibration/unique_confidence_per_question": 0.196875,
"calibration/unique_confidences": 75.6,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008767361111111116,
"completions/max_length": 3146.6,
"completions/max_terminated_length": 3146.6,
"completions/mean_length": 669.9478393554688,
"completions/mean_terminated_length": 675.8888916015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 214.6,
"epoch": 0.3719953500581243,
"grad_norm": 0.0004369099042378366,
"learning_rate": 1.5963855421686747e-06,
"loss": -0.0074,
"num_tokens": 329062784.0,
"reward": 1.0875563144683837,
"reward_std": 0.1229211449623108,
"rewards/accuracy_reward": 0.7425347328186035,
"rewards/brier_reward": 0.835495126247406,
"rewards/confidence_uniqueness_reward": 0.9289066553115845,
"rewards/format_reward": 0.9911458492279053,
"rewards/frontier_aurc_reward": -0.0014140044804662466,
"rewards/frontier_coverage_0": -0.001173873944208026,
"rewards/frontier_coverage_1": -0.001173873944208026,
"rewards/frontier_coverage_10": -0.001173873944208026,
"rewards/frontier_coverage_15": -0.001173873944208026,
"rewards/frontier_coverage_20": 0.01185264540836215,
"rewards/frontier_coverage_25": 0.028303157165646554,
"rewards/frontier_coverage_5": -0.001173873944208026,
"rewards/true_frontier_ece_gap_only_reward": -0.0027825822588056328,
"signal/accuracy_reward/centered_abs_mean": 0.16309678852558135,
"signal/accuracy_reward/group_std_mean": 0.21074391305446624,
"signal/accuracy_reward/group_zero_std_frac": 0.4111111104488373,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08154839426279067,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08154839426279067,
"signal/advantage_abs_mean": 0.09126082807779312,
"signal/advantage_pre_scale_abs_mean": 0.09126082807779312,
"signal/advantage_pre_scale_std": 0.16173238456249237,
"signal/advantage_std": 0.16173238456249237,
"signal/brier_reward/centered_abs_mean": 0.10318089425563812,
"signal/brier_reward/group_std_mean": 0.13717953413724898,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012897611781954765,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012897611781954765,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03800181671977043,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05584709048271179,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004750227089971304,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004750227089971304,
"signal/format_reward/centered_abs_mean": 0.014941406436264515,
"signal/format_reward/group_std_mean": 0.02782573737204075,
"signal/format_reward/group_zero_std_frac": 0.8861111164093017,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0074707032181322575,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0074707032181322575,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018860452808439731,
"signal/frontier_aurc_reward/group_std_mean": 0.0032281734980642795,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.946945751318708e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.946945751318708e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.10934300273656845,
"signal/frontier_coverage_0/group_std_mean": 0.15232057571411134,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001708484417758882,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001708484417758882,
"signal/frontier_coverage_1/centered_abs_mean": 0.10934300273656845,
"signal/frontier_coverage_1/group_std_mean": 0.15232057571411134,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001708484417758882,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001708484417758882,
"signal/frontier_coverage_10/centered_abs_mean": 0.10934300273656845,
"signal/frontier_coverage_10/group_std_mean": 0.15232057571411134,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001708484417758882,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001708484417758882,
"signal/frontier_coverage_15/centered_abs_mean": 0.10934300273656845,
"signal/frontier_coverage_15/group_std_mean": 0.15232057571411134,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001708484417758882,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001708484417758882,
"signal/frontier_coverage_20/centered_abs_mean": 0.055034608393907544,
"signal/frontier_coverage_20/group_std_mean": 0.08059312552213668,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008599157561548054,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008599157561548054,
"signal/frontier_coverage_25/centered_abs_mean": 0.033925560861825944,
"signal/frontier_coverage_25/group_std_mean": 0.04722090288996696,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005300868884660304,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005300868884660304,
"signal/frontier_coverage_5/centered_abs_mean": 0.10934300273656845,
"signal/frontier_coverage_5/group_std_mean": 0.15232057571411134,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001708484417758882,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001708484417758882,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.002597217308357358,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.0038485261145979168,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00032465216354466976,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00032465216354466976,
"step": 155
},
{
"calibration/aurc": 0.15233241534879957,
"calibration/batch_distribution_entropy": 0.7053979650865262,
"calibration/batch_entropy_100bins": 0.7897403240858474,
"calibration/batch_entropy_10bins": 0.7053979650865262,
"calibration/batch_entropy_50bins": 0.7958750116338504,
"calibration/batch_uniqueness": 0.9272095434772781,
"calibration/buffer_distribution_entropy": 0.8133656085702008,
"calibration/buffer_entropy_100bins": 0.8631911273929548,
"calibration/buffer_entropy_10bins": 0.8133656085702008,
"calibration/buffer_entropy_50bins": 0.8712631524302715,
"calibration/confidence_entropy": 0.5216451024089949,
"calibration/coverage@0%": 0.12002745244582987,
"calibration/coverage@1%": 0.20824453289395234,
"calibration/coverage@10%": 0.45039907142423363,
"calibration/coverage@15%": 0.735983934467869,
"calibration/coverage@20%": 0.773753280839895,
"calibration/coverage@25%": 0.7937007874015748,
"calibration/coverage@30%": 0.8,
"calibration/coverage@5%": 0.27139264976610233,
"calibration/distribution_entropy_10": 0.7053979650865262,
"calibration/distribution_entropy_100": 0.7897403240858474,
"calibration/ece": 0.13596159831377114,
"calibration/mean_confidence": 0.7032028452021251,
"calibration/unique_confidence_per_question": 0.17864583333333334,
"calibration/unique_confidences": 68.6,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011979166666666674,
"completions/max_length": 3056.6,
"completions/max_terminated_length": 3056.6,
"completions/mean_length": 679.8614624023437,
"completions/mean_terminated_length": 688.1098266601563,
"completions/min_length": 0.0,
"completions/min_terminated_length": 200.0,
"epoch": 0.38399520005999926,
"grad_norm": 0.0003775613440666348,
"learning_rate": 1.4457831325301204e-06,
"loss": -0.0102,
"num_tokens": 339982084.0,
"reward": 1.0505271673202514,
"reward_std": 0.1207915186882019,
"rewards/accuracy_reward": 0.677256953716278,
"rewards/brier_reward": 0.8067374706268311,
"rewards/confidence_uniqueness_reward": 0.9252258539199829,
"rewards/format_reward": 0.9880208373069763,
"rewards/frontier_aurc_reward": -0.0025012485682964327,
"rewards/frontier_coverage_0": 0.012539402535185217,
"rewards/frontier_coverage_1": 0.012539402535185217,
"rewards/frontier_coverage_10": 0.012539402535185217,
"rewards/frontier_coverage_15": 0.013295956503134221,
"rewards/frontier_coverage_20": 0.016371296532452107,
"rewards/frontier_coverage_25": 0.02971927933394909,
"rewards/frontier_coverage_5": 0.012539402535185217,
"rewards/true_frontier_ece_gap_only_reward": -0.0022376260720193388,
"signal/accuracy_reward/centered_abs_mean": 0.1533745676279068,
"signal/accuracy_reward/group_std_mean": 0.1958913177251816,
"signal/accuracy_reward/group_zero_std_frac": 0.4666666746139526,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0766872838139534,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0766872838139534,
"signal/advantage_abs_mean": 0.09020575135946274,
"signal/advantage_pre_scale_abs_mean": 0.09020575135946274,
"signal/advantage_pre_scale_std": 0.16270052194595336,
"signal/advantage_std": 0.16270052194595336,
"signal/brier_reward/centered_abs_mean": 0.10894776731729508,
"signal/brier_reward/group_std_mean": 0.1400896966457367,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013618470914661885,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013618470914661885,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04274830222129822,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06329518854618073,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005343537777662277,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005343537777662277,
"signal/format_reward/centered_abs_mean": 0.019791666604578494,
"signal/format_reward/group_std_mean": 0.03536950312554836,
"signal/format_reward/group_zero_std_frac": 0.8611111164093017,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009895833302289247,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009895833302289247,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028245057445019485,
"signal/frontier_aurc_reward/group_std_mean": 0.004615729767829179,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.4132902257842946e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.4132902257842946e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.11053884625434876,
"signal/frontier_coverage_0/group_std_mean": 0.1505269557237625,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017271694727241994,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017271694727241994,
"signal/frontier_coverage_1/centered_abs_mean": 0.11053884625434876,
"signal/frontier_coverage_1/group_std_mean": 0.1505269557237625,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017271694727241994,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017271694727241994,
"signal/frontier_coverage_10/centered_abs_mean": 0.11053884625434876,
"signal/frontier_coverage_10/group_std_mean": 0.1505269557237625,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017271694727241994,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017271694727241994,
"signal/frontier_coverage_15/centered_abs_mean": 0.09622148275375367,
"signal/frontier_coverage_15/group_std_mean": 0.13204507827758788,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001503460668027401,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001503460668027401,
"signal/frontier_coverage_20/centered_abs_mean": 0.04334339499473572,
"signal/frontier_coverage_20/group_std_mean": 0.06285871043801308,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006772405467927456,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006772405467927456,
"signal/frontier_coverage_25/centered_abs_mean": 0.036124877631664276,
"signal/frontier_coverage_25/group_std_mean": 0.04845571741461754,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005644512129947543,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005644512129947543,
"signal/frontier_coverage_5/centered_abs_mean": 0.11053884625434876,
"signal/frontier_coverage_5/group_std_mean": 0.1505269557237625,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017271694727241994,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017271694727241994,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0021982237696647642,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.00320082139223814,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00027477797120809553,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00027477797120809553,
"step": 160
},
{
"calibration/aurc": 0.18058905327825436,
"calibration/batch_distribution_entropy": 0.7767610930080233,
"calibration/batch_entropy_100bins": 0.828420695410555,
"calibration/batch_entropy_10bins": 0.7767610930080233,
"calibration/batch_entropy_50bins": 0.8396945215174878,
"calibration/batch_uniqueness": 0.9377851813118999,
"calibration/buffer_distribution_entropy": 0.8137825525481095,
"calibration/buffer_entropy_100bins": 0.8667738672543628,
"calibration/buffer_entropy_10bins": 0.8137825525481095,
"calibration/buffer_entropy_50bins": 0.873905604077686,
"calibration/confidence_entropy": 0.5159103697988725,
"calibration/coverage@0%": 0.0042729555744199215,
"calibration/coverage@1%": 0.0042729555744199215,
"calibration/coverage@10%": 0.41248231465761176,
"calibration/coverage@15%": 0.4753775820599887,
"calibration/coverage@20%": 0.532258064516129,
"calibration/coverage@25%": 0.7172935454271994,
"calibration/coverage@30%": 0.8444380428056248,
"calibration/coverage@5%": 0.21247736276174306,
"calibration/distribution_entropy_10": 0.7767610930080233,
"calibration/distribution_entropy_100": 0.828420695410555,
"calibration/ece": 0.12461592848485974,
"calibration/mean_confidence": 0.6432186546021172,
"calibration/unique_confidence_per_question": 0.20104166666666665,
"calibration/unique_confidences": 77.2,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01362847222222221,
"completions/max_length": 3466.0,
"completions/max_terminated_length": 3466.0,
"completions/mean_length": 714.0564208984375,
"completions/mean_terminated_length": 724.0512939453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 200.6,
"epoch": 0.39599505006187424,
"grad_norm": 0.0003885479236487299,
"learning_rate": 1.2951807228915664e-06,
"loss": -0.0108,
"num_tokens": 351347086.0,
"reward": 1.0483879327774048,
"reward_std": 0.12390840351581574,
"rewards/accuracy_reward": 0.6697048664093017,
"rewards/brier_reward": 0.8130905270576477,
"rewards/confidence_uniqueness_reward": 0.9271872401237488,
"rewards/format_reward": 0.9863715410232544,
"rewards/frontier_aurc_reward": -0.002153858123347163,
"rewards/frontier_coverage_0": 0.028279137797653675,
"rewards/frontier_coverage_1": 0.028279137797653675,
"rewards/frontier_coverage_10": 0.028279137797653675,
"rewards/frontier_coverage_15": 0.028847700357437132,
"rewards/frontier_coverage_20": 0.02469187043607235,
"rewards/frontier_coverage_25": 0.03973658010363579,
"rewards/frontier_coverage_5": 0.028279137797653675,
"rewards/true_frontier_ece_gap_only_reward": -0.0030099464114755393,
"signal/accuracy_reward/centered_abs_mean": 0.14902886599302292,
"signal/accuracy_reward/group_std_mean": 0.19532329142093657,
"signal/accuracy_reward/group_zero_std_frac": 0.44166667461395265,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07451443299651146,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07451443299651146,
"signal/advantage_abs_mean": 0.09079683572053909,
"signal/advantage_pre_scale_abs_mean": 0.09079683572053909,
"signal/advantage_pre_scale_std": 0.16342334747314452,
"signal/advantage_std": 0.16342334747314452,
"signal/brier_reward/centered_abs_mean": 0.11507419794797898,
"signal/brier_reward/group_std_mean": 0.15091157853603362,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014384274743497372,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014384274743497372,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04255444556474686,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06380771696567536,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005319305695593357,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005319305695593357,
"signal/format_reward/centered_abs_mean": 0.022292751632630826,
"signal/format_reward/group_std_mean": 0.03928981348872185,
"signal/format_reward/group_zero_std_frac": 0.8472222208976745,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011146375816315413,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011146375816315413,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026308648753911256,
"signal/frontier_aurc_reward/group_std_mean": 0.0045830888208001856,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.110726367798634e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.110726367798634e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.12479231059551239,
"signal/frontier_coverage_0/group_std_mean": 0.16951032280921935,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019498798530548811,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019498798530548811,
"signal/frontier_coverage_1/centered_abs_mean": 0.12479231059551239,
"signal/frontier_coverage_1/group_std_mean": 0.16951032280921935,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019498798530548811,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019498798530548811,
"signal/frontier_coverage_10/centered_abs_mean": 0.12479231059551239,
"signal/frontier_coverage_10/group_std_mean": 0.16951032280921935,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019498798530548811,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019498798530548811,
"signal/frontier_coverage_15/centered_abs_mean": 0.09972788542509078,
"signal/frontier_coverage_15/group_std_mean": 0.13676573038101197,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015582482097670435,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015582482097670435,
"signal/frontier_coverage_20/centered_abs_mean": 0.05088546723127365,
"signal/frontier_coverage_20/group_std_mean": 0.07109279409050942,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007950854254886508,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007950854254886508,
"signal/frontier_coverage_25/centered_abs_mean": 0.04205540716648102,
"signal/frontier_coverage_25/group_std_mean": 0.05501595437526703,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006571157369762659,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006571157369762659,
"signal/frontier_coverage_5/centered_abs_mean": 0.12479231059551239,
"signal/frontier_coverage_5/group_std_mean": 0.16951032280921935,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019498798530548811,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019498798530548811,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0027308772783726453,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.0037563166581094263,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00034135965979658066,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00034135965979658066,
"step": 165
},
{
"calibration/aurc": 0.14844663172603637,
"calibration/batch_distribution_entropy": 0.6995723211429985,
"calibration/batch_entropy_100bins": 0.7860955102263528,
"calibration/batch_entropy_10bins": 0.6995723211429985,
"calibration/batch_entropy_50bins": 0.7898930908087898,
"calibration/batch_uniqueness": 0.9239117880487016,
"calibration/buffer_distribution_entropy": 0.8153715309967344,
"calibration/buffer_entropy_100bins": 0.8700450976408975,
"calibration/buffer_entropy_10bins": 0.8153715309967344,
"calibration/buffer_entropy_50bins": 0.8761398327730963,
"calibration/confidence_entropy": 0.511929929827141,
"calibration/coverage@0%": 0.014253398536593877,
"calibration/coverage@1%": 0.014253398536593877,
"calibration/coverage@10%": 0.3627433108172956,
"calibration/coverage@15%": 0.650761552543238,
"calibration/coverage@20%": 0.7580184572251633,
"calibration/coverage@25%": 0.9351347617666155,
"calibration/coverage@30%": 0.9687830687830689,
"calibration/coverage@5%": 0.014253398536593877,
"calibration/distribution_entropy_10": 0.6995723211429985,
"calibration/distribution_entropy_100": 0.7860955102263528,
"calibration/ece": 0.09246575064390247,
"calibration/mean_confidence": 0.7047365129909607,
"calibration/unique_confidence_per_question": 0.1796875,
"calibration/unique_confidences": 69.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009809027777777767,
"completions/max_length": 3302.6,
"completions/max_terminated_length": 3302.6,
"completions/mean_length": 686.3215209960938,
"completions/mean_terminated_length": 693.1324462890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 190.6,
"epoch": 0.4079949000637492,
"grad_norm": 0.0003591932763811201,
"learning_rate": 1.1445783132530121e-06,
"loss": -0.0087,
"num_tokens": 362342694.0,
"reward": 1.0759626150131225,
"reward_std": 0.12116758078336716,
"rewards/accuracy_reward": 0.7206597208976746,
"rewards/brier_reward": 0.8285124659538269,
"rewards/confidence_uniqueness_reward": 0.9224372029304504,
"rewards/format_reward": 0.9901909828186035,
"rewards/frontier_aurc_reward": -0.002220888831652701,
"rewards/frontier_coverage_0": 0.010648279171437024,
"rewards/frontier_coverage_1": 0.010648279171437024,
"rewards/frontier_coverage_10": 0.010648279171437024,
"rewards/frontier_coverage_15": 0.015687369927763938,
"rewards/frontier_coverage_20": 0.02136296220123768,
"rewards/frontier_coverage_25": 0.0481999009847641,
"rewards/frontier_coverage_5": 0.010648279171437024,
"rewards/true_frontier_ece_gap_only_reward": -0.002354492200538516,
"signal/accuracy_reward/centered_abs_mean": 0.14618055820465087,
"signal/accuracy_reward/group_std_mean": 0.19709926843643188,
"signal/accuracy_reward/group_zero_std_frac": 0.42777777314186094,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07309027910232543,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07309027910232543,
"signal/advantage_abs_mean": 0.08560824990272523,
"signal/advantage_pre_scale_abs_mean": 0.08560824990272523,
"signal/advantage_pre_scale_std": 0.1597005158662796,
"signal/advantage_std": 0.1597005158662796,
"signal/brier_reward/centered_abs_mean": 0.10978586375713348,
"signal/brier_reward/group_std_mean": 0.14447366297245026,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013723232969641685,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013723232969641685,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04156898036599159,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06315687522292138,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005196122545748949,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005196122545748949,
"signal/format_reward/centered_abs_mean": 0.017355685867369176,
"signal/format_reward/group_std_mean": 0.03438038341701031,
"signal/format_reward/group_zero_std_frac": 0.8527777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008677842933684588,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008677842933684588,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002847391273826361,
"signal/frontier_aurc_reward/group_std_mean": 0.004856492578983307,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.449048865353689e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.449048865353689e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.10908669680356979,
"signal/frontier_coverage_0/group_std_mean": 0.1511477291584015,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001704479637555778,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001704479637555778,
"signal/frontier_coverage_1/centered_abs_mean": 0.10908669680356979,
"signal/frontier_coverage_1/group_std_mean": 0.1511477291584015,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001704479637555778,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001704479637555778,
"signal/frontier_coverage_10/centered_abs_mean": 0.10908669680356979,
"signal/frontier_coverage_10/group_std_mean": 0.1511477291584015,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001704479637555778,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001704479637555778,
"signal/frontier_coverage_15/centered_abs_mean": 0.08291138708591461,
"signal/frontier_coverage_15/group_std_mean": 0.11675856113433838,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012954904232174158,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012954904232174158,
"signal/frontier_coverage_20/centered_abs_mean": 0.0416046604514122,
"signal/frontier_coverage_20/group_std_mean": 0.059697122871875764,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006500728195533156,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006500728195533156,
"signal/frontier_coverage_25/centered_abs_mean": 0.041713655740022657,
"signal/frontier_coverage_25/group_std_mean": 0.05425269529223442,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000651775870937854,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000651775870937854,
"signal/frontier_coverage_5/centered_abs_mean": 0.10908669680356979,
"signal/frontier_coverage_5/group_std_mean": 0.1511477291584015,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001704479637555778,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001704479637555778,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0024256525095552204,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.0035729790572077034,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00030320656369440255,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00030320656369440255,
"step": 170
},
{
"calibration/aurc": 0.14461688721979393,
"calibration/batch_distribution_entropy": 0.7800736712882704,
"calibration/batch_entropy_100bins": 0.8300042856724333,
"calibration/batch_entropy_10bins": 0.7800736712882704,
"calibration/batch_entropy_50bins": 0.8404826975612186,
"calibration/batch_uniqueness": 0.938196954829919,
"calibration/buffer_distribution_entropy": 0.8152604036836385,
"calibration/buffer_entropy_100bins": 0.8717666012501055,
"calibration/buffer_entropy_10bins": 0.8152604036836385,
"calibration/buffer_entropy_50bins": 0.8767388825784943,
"calibration/confidence_entropy": 0.5208488747591534,
"calibration/coverage@0%": 0.012662042165713209,
"calibration/coverage@1%": 0.012662042165713209,
"calibration/coverage@10%": 0.3192511394437657,
"calibration/coverage@15%": 0.5831173985414801,
"calibration/coverage@20%": 0.8100232338874823,
"calibration/coverage@25%": 0.9231852982671059,
"calibration/coverage@30%": 0.9916449086161879,
"calibration/coverage@5%": 0.1479950273702389,
"calibration/distribution_entropy_10": 0.7800736712882704,
"calibration/distribution_entropy_100": 0.8300042856724333,
"calibration/ece": 0.0991099850213959,
"calibration/mean_confidence": 0.6590511411080155,
"calibration/unique_confidence_per_question": 0.20572916666666666,
"calibration/unique_confidences": 79.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011197916666666674,
"completions/max_length": 3595.2,
"completions/max_terminated_length": 3595.2,
"completions/mean_length": 714.6521850585938,
"completions/mean_terminated_length": 722.7832885742188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 208.0,
"epoch": 0.4199947500656242,
"grad_norm": 0.0004064071399625391,
"learning_rate": 9.93975903614458e-07,
"loss": -0.0108,
"num_tokens": 373683455.0,
"reward": 1.0710617542266845,
"reward_std": 0.12647038847208023,
"rewards/accuracy_reward": 0.7127604246139526,
"rewards/brier_reward": 0.823624587059021,
"rewards/confidence_uniqueness_reward": 0.9248634934425354,
"rewards/format_reward": 0.9888020873069763,
"rewards/frontier_aurc_reward": -0.0020120171364396812,
"rewards/frontier_coverage_0": 0.01165504176169634,
"rewards/frontier_coverage_1": 0.01165504176169634,
"rewards/frontier_coverage_10": 0.01165504176169634,
"rewards/frontier_coverage_15": 0.015564435138367116,
"rewards/frontier_coverage_20": 0.021842183917760848,
"rewards/frontier_coverage_25": 0.051497886329889296,
"rewards/frontier_coverage_5": 0.01165504176169634,
"rewards/true_frontier_ece_gap_only_reward": -0.0029332443606108426,
"signal/accuracy_reward/centered_abs_mean": 0.15782877802848816,
"signal/accuracy_reward/group_std_mean": 0.21180022656917571,
"signal/accuracy_reward/group_zero_std_frac": 0.39166666865348815,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07891438901424408,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07891438901424408,
"signal/advantage_abs_mean": 0.0904485046863556,
"signal/advantage_pre_scale_abs_mean": 0.0904485046863556,
"signal/advantage_pre_scale_std": 0.16463069915771483,
"signal/advantage_std": 0.16463069915771483,
"signal/brier_reward/centered_abs_mean": 0.11573301851749421,
"signal/brier_reward/group_std_mean": 0.1542545437812805,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014466627314686776,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014466627314686776,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.042830513417720796,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06403593942523003,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0053538141772150995,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0053538141772150995,
"signal/format_reward/centered_abs_mean": 0.01941731758415699,
"signal/format_reward/group_std_mean": 0.03575590215623379,
"signal/format_reward/group_zero_std_frac": 0.8583333373069764,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009708658792078495,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009708658792078495,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027021993417292835,
"signal/frontier_aurc_reward/group_std_mean": 0.00471522705629468,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2221864714520055e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2221864714520055e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.12408257275819778,
"signal/frontier_coverage_0/group_std_mean": 0.173751300573349,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019387901993468404,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019387901993468404,
"signal/frontier_coverage_1/centered_abs_mean": 0.12408257275819778,
"signal/frontier_coverage_1/group_std_mean": 0.173751300573349,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019387901993468404,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019387901993468404,
"signal/frontier_coverage_10/centered_abs_mean": 0.12408257275819778,
"signal/frontier_coverage_10/group_std_mean": 0.173751300573349,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019387901993468404,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019387901993468404,
"signal/frontier_coverage_15/centered_abs_mean": 0.09372715502977372,
"signal/frontier_coverage_15/group_std_mean": 0.13314026296138765,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014644867973402143,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014644867973402143,
"signal/frontier_coverage_20/centered_abs_mean": 0.04657027423381806,
"signal/frontier_coverage_20/group_std_mean": 0.0670913964509964,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007276605349034071,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007276605349034071,
"signal/frontier_coverage_25/centered_abs_mean": 0.04459658488631248,
"signal/frontier_coverage_25/group_std_mean": 0.05846061035990715,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006968216388486326,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006968216388486326,
"signal/frontier_coverage_5/centered_abs_mean": 0.12408257275819778,
"signal/frontier_coverage_5/group_std_mean": 0.173751300573349,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019387901993468404,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019387901993468404,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0028843230567872523,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.0041553780902177095,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00036054038209840653,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00036054038209840653,
"step": 175
},
{
"calibration/aurc": 0.10250645380777204,
"calibration/batch_distribution_entropy": 0.7497535770739696,
"calibration/batch_entropy_100bins": 0.8134197295262965,
"calibration/batch_entropy_10bins": 0.7497535770739696,
"calibration/batch_entropy_50bins": 0.8202766103654543,
"calibration/batch_uniqueness": 0.9356761553909309,
"calibration/buffer_distribution_entropy": 0.8144947209468967,
"calibration/buffer_entropy_100bins": 0.8727833543328449,
"calibration/buffer_entropy_10bins": 0.8144947209468967,
"calibration/buffer_entropy_50bins": 0.876997242538901,
"calibration/confidence_entropy": 0.5126060799233956,
"calibration/coverage@0%": 0.013113817192110735,
"calibration/coverage@1%": 0.013113817192110735,
"calibration/coverage@10%": 0.5760200171226603,
"calibration/coverage@15%": 0.8476765097999672,
"calibration/coverage@20%": 0.9179676956091463,
"calibration/coverage@25%": 0.9608355091383812,
"calibration/coverage@30%": 0.981201044386423,
"calibration/coverage@5%": 0.24902806695100796,
"calibration/distribution_entropy_10": 0.7497535770739696,
"calibration/distribution_entropy_100": 0.8134197295262965,
"calibration/ece": 0.12372607921773937,
"calibration/mean_confidence": 0.6789990075283177,
"calibration/unique_confidence_per_question": 0.19635416666666666,
"calibration/unique_confidences": 75.4,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010243055555555537,
"completions/max_length": 3533.2,
"completions/max_terminated_length": 3533.2,
"completions/mean_length": 687.0453247070312,
"completions/mean_terminated_length": 694.14697265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 214.8,
"epoch": 0.4319946000674992,
"grad_norm": 0.0003923497861251235,
"learning_rate": 8.433734939759036e-07,
"loss": -0.009,
"num_tokens": 384698185.0,
"reward": 1.0713138580322266,
"reward_std": 0.12391123622655868,
"rewards/accuracy_reward": 0.7142361044883728,
"rewards/brier_reward": 0.8178089022636413,
"rewards/confidence_uniqueness_reward": 0.9249926686286927,
"rewards/format_reward": 0.9896701335906982,
"rewards/frontier_aurc_reward": -0.0023395067546516657,
"rewards/frontier_coverage_0": 0.009305649372981862,
"rewards/frontier_coverage_1": 0.009305649372981862,
"rewards/frontier_coverage_10": 0.009305649372981862,
"rewards/frontier_coverage_15": 0.013631703774444759,
"rewards/frontier_coverage_20": 0.022413885779678823,
"rewards/frontier_coverage_25": 0.05289755538105965,
"rewards/frontier_coverage_5": 0.009305649372981862,
"rewards/true_frontier_ece_gap_only_reward": -0.0033940633293241262,
"signal/accuracy_reward/centered_abs_mean": 0.1560980886220932,
"signal/accuracy_reward/group_std_mean": 0.20446191132068633,
"signal/accuracy_reward/group_zero_std_frac": 0.41666666865348817,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0780490443110466,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0780490443110466,
"signal/advantage_abs_mean": 0.09040587842464447,
"signal/advantage_pre_scale_abs_mean": 0.09040587842464447,
"signal/advantage_pre_scale_std": 0.1639217257499695,
"signal/advantage_std": 0.1639217257499695,
"signal/brier_reward/centered_abs_mean": 0.1156775861978531,
"signal/brier_reward/group_std_mean": 0.15296037197113038,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014459698274731637,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014459698274731637,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04092123620212078,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06180307194590569,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005115154525265097,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005115154525265097,
"signal/format_reward/centered_abs_mean": 0.017681206576526165,
"signal/format_reward/group_std_mean": 0.03354543596506119,
"signal/format_reward/group_zero_std_frac": 0.8611111044883728,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008840603288263083,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008840603288263083,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002855647308751941,
"signal/frontier_aurc_reward/group_std_mean": 0.004864505957812071,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.4619489199249077e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.4619489199249077e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.12355190813541413,
"signal/frontier_coverage_0/group_std_mean": 0.16903219521045684,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019304985646158457,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019304985646158457,
"signal/frontier_coverage_1/centered_abs_mean": 0.12355190813541413,
"signal/frontier_coverage_1/group_std_mean": 0.16903219521045684,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019304985646158457,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019304985646158457,
"signal/frontier_coverage_10/centered_abs_mean": 0.12355190813541413,
"signal/frontier_coverage_10/group_std_mean": 0.16903219521045684,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019304985646158457,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019304985646158457,
"signal/frontier_coverage_15/centered_abs_mean": 0.08319898694753647,
"signal/frontier_coverage_15/group_std_mean": 0.11640357077121735,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012999841710552573,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012999841710552573,
"signal/frontier_coverage_20/centered_abs_mean": 0.04654121547937393,
"signal/frontier_coverage_20/group_std_mean": 0.06476506888866425,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007272064918652177,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007272064918652177,
"signal/frontier_coverage_25/centered_abs_mean": 0.048268646001815796,
"signal/frontier_coverage_25/group_std_mean": 0.06180224493145943,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007541975937783718,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007541975937783718,
"signal/frontier_coverage_5/centered_abs_mean": 0.12355190813541413,
"signal/frontier_coverage_5/group_std_mean": 0.16903219521045684,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019304985646158457,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019304985646158457,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0031099628657102587,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.004470847826451063,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00038874535821378233,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00038874535821378233,
"step": 180
},
{
"calibration/aurc": 0.16988037415763974,
"calibration/batch_distribution_entropy": 0.7940710514827269,
"calibration/batch_entropy_100bins": 0.8368481344782455,
"calibration/batch_entropy_10bins": 0.7940710514827269,
"calibration/batch_entropy_50bins": 0.8476841669994413,
"calibration/batch_uniqueness": 0.9411219051737953,
"calibration/buffer_distribution_entropy": 0.8081097969439645,
"calibration/buffer_entropy_100bins": 0.8695779315744969,
"calibration/buffer_entropy_10bins": 0.8081097969439645,
"calibration/buffer_entropy_50bins": 0.873141871107264,
"calibration/confidence_entropy": 0.5195127592582154,
"calibration/coverage@0%": 0.022429873118014296,
"calibration/coverage@1%": 0.022429873118014296,
"calibration/coverage@10%": 0.15776704580497936,
"calibration/coverage@15%": 0.47283910007332636,
"calibration/coverage@20%": 0.8386187835365227,
"calibration/coverage@25%": 0.8956120693790559,
"calibration/coverage@30%": 0.9506561679790027,
"calibration/coverage@5%": 0.044304873118014294,
"calibration/distribution_entropy_10": 0.7940710514827269,
"calibration/distribution_entropy_100": 0.8368481344782455,
"calibration/ece": 0.11371638934936182,
"calibration/mean_confidence": 0.6561408147419053,
"calibration/unique_confidence_per_question": 0.2109375,
"calibration/unique_confidences": 81.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00980902777777779,
"completions/max_length": 3568.8,
"completions/max_terminated_length": 3568.8,
"completions/mean_length": 694.918212890625,
"completions/mean_terminated_length": 701.8487915039062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 187.8,
"epoch": 0.44399445006937416,
"grad_norm": 0.0004112598253414035,
"learning_rate": 6.927710843373495e-07,
"loss": -0.0072,
"num_tokens": 395793691.0,
"reward": 1.0613837242126465,
"reward_std": 0.12879109233617783,
"rewards/accuracy_reward": 0.692187488079071,
"rewards/brier_reward": 0.8143394708633422,
"rewards/confidence_uniqueness_reward": 0.9319449424743652,
"rewards/format_reward": 0.9901041746139526,
"rewards/frontier_aurc_reward": -0.0021198054775595663,
"rewards/frontier_coverage_0": 0.01427230816334486,
"rewards/frontier_coverage_1": 0.01427230816334486,
"rewards/frontier_coverage_10": 0.01427230816334486,
"rewards/frontier_coverage_15": 0.016747461259365083,
"rewards/frontier_coverage_20": 0.02341715954244137,
"rewards/frontier_coverage_25": 0.05537489578127861,
"rewards/frontier_coverage_5": 0.01427230816334486,
"rewards/true_frontier_ece_gap_only_reward": -0.003194707864895463,
"signal/accuracy_reward/centered_abs_mean": 0.16829426884651183,
"signal/accuracy_reward/group_std_mean": 0.2190181851387024,
"signal/accuracy_reward/group_zero_std_frac": 0.38333333730697633,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08414713442325591,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08414713442325591,
"signal/advantage_abs_mean": 0.09635478109121323,
"signal/advantage_pre_scale_abs_mean": 0.09635478109121323,
"signal/advantage_pre_scale_std": 0.1639193296432495,
"signal/advantage_std": 0.1639193296432495,
"signal/brier_reward/centered_abs_mean": 0.12017861008644104,
"signal/brier_reward/group_std_mean": 0.15563611090183258,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01502232626080513,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01502232626080513,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.037514998018741606,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05578840374946594,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004689374752342701,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004689374752342701,
"signal/format_reward/centered_abs_mean": 0.016894531436264515,
"signal/format_reward/group_std_mean": 0.030554963275790215,
"signal/format_reward/group_zero_std_frac": 0.8777777791023255,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008447265718132257,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008447265718132257,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002720799436792731,
"signal/frontier_aurc_reward/group_std_mean": 0.004833174217492342,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2512491199886425e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2512491199886425e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.130600044131279,
"signal/frontier_coverage_0/group_std_mean": 0.18123140037059784,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002040625689551234,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002040625689551234,
"signal/frontier_coverage_1/centered_abs_mean": 0.130600044131279,
"signal/frontier_coverage_1/group_std_mean": 0.18123140037059784,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002040625689551234,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002040625689551234,
"signal/frontier_coverage_10/centered_abs_mean": 0.130600044131279,
"signal/frontier_coverage_10/group_std_mean": 0.18123140037059784,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002040625689551234,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002040625689551234,
"signal/frontier_coverage_15/centered_abs_mean": 0.08408356457948685,
"signal/frontier_coverage_15/group_std_mean": 0.11986269503831863,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001313805696554482,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001313805696554482,
"signal/frontier_coverage_20/centered_abs_mean": 0.0467585064470768,
"signal/frontier_coverage_20/group_std_mean": 0.06627060770988465,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000730601663235575,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000730601663235575,
"signal/frontier_coverage_25/centered_abs_mean": 0.04904806688427925,
"signal/frontier_coverage_25/group_std_mean": 0.06319007501006127,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007663760450668633,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007663760450668633,
"signal/frontier_coverage_5/centered_abs_mean": 0.130600044131279,
"signal/frontier_coverage_5/group_std_mean": 0.18123140037059784,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002040625689551234,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002040625689551234,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.003226568968966603,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.004763441625982523,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0004033211211208254,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0004033211211208254,
"step": 185
},
{
"calibration/aurc": 0.16887821111438306,
"calibration/batch_distribution_entropy": 0.7166309619833362,
"calibration/batch_entropy_100bins": 0.793829835616982,
"calibration/batch_entropy_10bins": 0.7166309619833362,
"calibration/batch_entropy_50bins": 0.7992505076218754,
"calibration/batch_uniqueness": 0.9261153678295246,
"calibration/buffer_distribution_entropy": 0.800176021818779,
"calibration/buffer_entropy_100bins": 0.8647032984957514,
"calibration/buffer_entropy_10bins": 0.800176021818779,
"calibration/buffer_entropy_50bins": 0.8680643724938966,
"calibration/confidence_entropy": 0.4969127807971727,
"calibration/coverage@0%": 0.013662280701754386,
"calibration/coverage@1%": 0.013662280701754386,
"calibration/coverage@10%": 0.38303179824561406,
"calibration/coverage@15%": 0.5142105263157895,
"calibration/coverage@20%": 0.5489473684210526,
"calibration/coverage@25%": 0.938843201754386,
"calibration/coverage@30%": 0.9942105263157895,
"calibration/coverage@5%": 0.0999780701754386,
"calibration/distribution_entropy_10": 0.7166309619833362,
"calibration/distribution_entropy_100": 0.793829835616982,
"calibration/ece": 0.11243645289887176,
"calibration/mean_confidence": 0.6973970449081458,
"calibration/unique_confidence_per_question": 0.18958333333333335,
"calibration/unique_confidences": 72.8,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00512152777777779,
"completions/max_length": 3413.8,
"completions/max_terminated_length": 3413.8,
"completions/mean_length": 683.5370727539063,
"completions/mean_terminated_length": 687.0613037109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 192.6,
"epoch": 0.45599430007124914,
"grad_norm": 0.0003837902913801372,
"learning_rate": 5.421686746987952e-07,
"loss": -0.0035,
"num_tokens": 406750982.0,
"reward": 1.0861162424087525,
"reward_std": 0.11900279521942139,
"rewards/accuracy_reward": 0.7318576335906982,
"rewards/brier_reward": 0.8348490834236145,
"rewards/confidence_uniqueness_reward": 0.92904052734375,
"rewards/format_reward": 0.9947916746139527,
"rewards/frontier_aurc_reward": -0.0019866148009896278,
"rewards/frontier_coverage_0": 0.013782516145147384,
"rewards/frontier_coverage_1": 0.013782516145147384,
"rewards/frontier_coverage_10": 0.014211940788663923,
"rewards/frontier_coverage_15": 0.018956656288355588,
"rewards/frontier_coverage_20": 0.02874315045773983,
"rewards/frontier_coverage_25": 0.07212998867034912,
"rewards/frontier_coverage_5": 0.013782516145147384,
"rewards/true_frontier_ece_gap_only_reward": -0.003232320211827755,
"signal/accuracy_reward/centered_abs_mean": 0.16196288764476777,
"signal/accuracy_reward/group_std_mean": 0.21733182072639465,
"signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08098144382238388,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08098144382238388,
"signal/advantage_abs_mean": 0.08545112162828446,
"signal/advantage_pre_scale_abs_mean": 0.08545112162828446,
"signal/advantage_pre_scale_std": 0.15151307582855225,
"signal/advantage_std": 0.15151307582855225,
"signal/brier_reward/centered_abs_mean": 0.10914837270975113,
"signal/brier_reward/group_std_mean": 0.14498610198497772,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01364354658871889,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01364354658871889,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03477521277964115,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05167415216565132,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004346901597455144,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004346901597455144,
"signal/format_reward/centered_abs_mean": 0.009429253498092293,
"signal/format_reward/group_std_mean": 0.021391174383461477,
"signal/format_reward/group_zero_std_frac": 0.8972222208976746,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.004714626749046147,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004714626749046147,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025213475339114664,
"signal/frontier_aurc_reward/group_std_mean": 0.004476012662053108,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.939605521736666e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.939605521736666e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.13185926526784897,
"signal/frontier_coverage_0/group_std_mean": 0.18062789738178253,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00206030101981014,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00206030101981014,
"signal/frontier_coverage_1/centered_abs_mean": 0.13185926526784897,
"signal/frontier_coverage_1/group_std_mean": 0.18062789738178253,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00206030101981014,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00206030101981014,
"signal/frontier_coverage_10/centered_abs_mean": 0.12977752983570098,
"signal/frontier_coverage_10/group_std_mean": 0.1778223305940628,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002027773903682828,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002027773903682828,
"signal/frontier_coverage_15/centered_abs_mean": 0.08260580152273178,
"signal/frontier_coverage_15/group_std_mean": 0.11576226651668549,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001290715648792684,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001290715648792684,
"signal/frontier_coverage_20/centered_abs_mean": 0.04762231633067131,
"signal/frontier_coverage_20/group_std_mean": 0.06589499711990357,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007440986926667392,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007440986926667392,
"signal/frontier_coverage_25/centered_abs_mean": 0.05049858167767525,
"signal/frontier_coverage_25/group_std_mean": 0.06524901390075684,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007890403387136758,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007890403387136758,
"signal/frontier_coverage_5/centered_abs_mean": 0.13185926526784897,
"signal/frontier_coverage_5/group_std_mean": 0.18062789738178253,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00206030101981014,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00206030101981014,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.003113184357061982,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.004526341799646616,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00038914804463274777,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00038914804463274777,
"step": 190
},
{
"calibration/aurc": 0.2062296307283377,
"calibration/batch_distribution_entropy": 0.8093306883536086,
"calibration/batch_entropy_100bins": 0.8432700346966481,
"calibration/batch_entropy_10bins": 0.8093306883536086,
"calibration/batch_entropy_50bins": 0.859267476518579,
"calibration/batch_uniqueness": 0.9426119233178885,
"calibration/buffer_distribution_entropy": 0.7945642543079567,
"calibration/buffer_entropy_100bins": 0.8605465543658124,
"calibration/buffer_entropy_10bins": 0.7945642543079567,
"calibration/buffer_entropy_50bins": 0.8645292173196768,
"calibration/confidence_entropy": 0.5176270953297042,
"calibration/coverage@0%": 0.004699738903394255,
"calibration/coverage@1%": 0.004699738903394255,
"calibration/coverage@10%": 0.28507615891758575,
"calibration/coverage@15%": 0.4576479260387029,
"calibration/coverage@20%": 0.5506327631461108,
"calibration/coverage@25%": 0.617482242623319,
"calibration/coverage@30%": 0.772239810615784,
"calibration/coverage@5%": 0.09712793733681462,
"calibration/distribution_entropy_10": 0.8093306883536086,
"calibration/distribution_entropy_100": 0.8432700346966481,
"calibration/ece": 0.13137105508504543,
"calibration/mean_confidence": 0.6472678487210273,
"calibration/unique_confidence_per_question": 0.21197916666666666,
"calibration/unique_confidences": 81.4,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011545138888888884,
"completions/max_length": 3631.2,
"completions/max_terminated_length": 3631.2,
"completions/mean_length": 703.418408203125,
"completions/mean_terminated_length": 711.7278564453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 189.0,
"epoch": 0.46799415007312406,
"grad_norm": 0.00042763527017086744,
"learning_rate": 3.91566265060241e-07,
"loss": -0.011,
"num_tokens": 417935226.0,
"reward": 1.0512622594833374,
"reward_std": 0.12501877546310425,
"rewards/accuracy_reward": 0.6756076455116272,
"rewards/brier_reward": 0.8063122510910035,
"rewards/confidence_uniqueness_reward": 0.9281278252601624,
"rewards/format_reward": 0.9883680462837219,
"rewards/frontier_aurc_reward": -0.0024975771084427834,
"rewards/frontier_coverage_0": 0.018676279671490194,
"rewards/frontier_coverage_1": 0.018676279671490194,
"rewards/frontier_coverage_10": 0.01876285169273615,
"rewards/frontier_coverage_15": 0.0209655387327075,
"rewards/frontier_coverage_20": 0.02632690779864788,
"rewards/frontier_coverage_25": 0.06297426149249077,
"rewards/frontier_coverage_5": 0.018676279671490194,
"rewards/true_frontier_ece_gap_only_reward": -0.0030649449676275254,
"signal/accuracy_reward/centered_abs_mean": 0.156504987180233,
"signal/accuracy_reward/group_std_mean": 0.20543068647384644,
"signal/accuracy_reward/group_zero_std_frac": 0.4194444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0782524935901165,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0782524935901165,
"signal/advantage_abs_mean": 0.09245173931121826,
"signal/advantage_pre_scale_abs_mean": 0.09245173931121826,
"signal/advantage_pre_scale_std": 0.1626460701227188,
"signal/advantage_std": 0.1626460701227188,
"signal/brier_reward/centered_abs_mean": 0.1210327297449112,
"signal/brier_reward/group_std_mean": 0.15611167550086974,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0151290912181139,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0151290912181139,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04194310083985329,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06149864494800568,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005242887604981661,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005242887604981661,
"signal/format_reward/centered_abs_mean": 0.01963975690305233,
"signal/format_reward/group_std_mean": 0.03504836894571781,
"signal/format_reward/group_zero_std_frac": 0.8611111164093017,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009819878451526164,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009819878451526164,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003186128893867135,
"signal/frontier_aurc_reward/group_std_mean": 0.005603937339037657,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.9783263966673986e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.9783263966673986e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.12809589505195618,
"signal/frontier_coverage_0/group_std_mean": 0.17424156665802001,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020014983601868153,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020014983601868153,
"signal/frontier_coverage_1/centered_abs_mean": 0.12809589505195618,
"signal/frontier_coverage_1/group_std_mean": 0.17424156665802001,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020014983601868153,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020014983601868153,
"signal/frontier_coverage_10/centered_abs_mean": 0.12744116485118867,
"signal/frontier_coverage_10/group_std_mean": 0.17343433499336242,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001991268200799823,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001991268200799823,
"signal/frontier_coverage_15/centered_abs_mean": 0.07363787293434143,
"signal/frontier_coverage_15/group_std_mean": 0.10297145694494247,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011505917645990849,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011505917645990849,
"signal/frontier_coverage_20/centered_abs_mean": 0.044591452926397324,
"signal/frontier_coverage_20/group_std_mean": 0.06145058870315552,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006967414519749582,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006967414519749582,
"signal/frontier_coverage_25/centered_abs_mean": 0.052361331135034564,
"signal/frontier_coverage_25/group_std_mean": 0.0672803521156311,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008181457989849151,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008181457989849151,
"signal/frontier_coverage_5/centered_abs_mean": 0.12809589505195618,
"signal/frontier_coverage_5/group_std_mean": 0.17424156665802001,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020014983601868153,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020014983601868153,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0031045635230839254,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.004452465567737818,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0003880704403854907,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0003880704403854907,
"step": 195
},
{
"calibration/aurc": 0.18052983140791248,
"calibration/batch_distribution_entropy": 0.7398158763396785,
"calibration/batch_entropy_100bins": 0.8093075678083164,
"calibration/batch_entropy_10bins": 0.7398158763396785,
"calibration/batch_entropy_50bins": 0.8162096039603671,
"calibration/batch_uniqueness": 0.9336981234577946,
"calibration/buffer_distribution_entropy": 0.7927739625462163,
"calibration/buffer_entropy_100bins": 0.8589941803683226,
"calibration/buffer_entropy_10bins": 0.7927739625462163,
"calibration/buffer_entropy_50bins": 0.8634857187639527,
"calibration/confidence_entropy": 0.5153811073665431,
"calibration/coverage@0%": 0.0062856144931519396,
"calibration/coverage@1%": 0.0062856144931519396,
"calibration/coverage@10%": 0.1866731368237827,
"calibration/coverage@15%": 0.38207938252943063,
"calibration/coverage@20%": 0.838360745614035,
"calibration/coverage@25%": 0.9291008771929825,
"calibration/coverage@30%": 0.9573848684210526,
"calibration/coverage@5%": 0.0062856144931519396,
"calibration/distribution_entropy_10": 0.7398158763396785,
"calibration/distribution_entropy_100": 0.8093075678083164,
"calibration/ece": 0.109616134077216,
"calibration/mean_confidence": 0.6974732587107646,
"calibration/unique_confidence_per_question": 0.19479166666666664,
"calibration/unique_confidences": 74.8,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.007899305555555559,
"completions/max_length": 3355.0,
"completions/max_terminated_length": 3355.0,
"completions/mean_length": 681.3518310546875,
"completions/mean_terminated_length": 686.8287475585937,
"completions/min_length": 0.0,
"completions/min_terminated_length": 195.0,
"epoch": 0.47999400007499904,
"grad_norm": 0.0004486938123591244,
"learning_rate": 2.409638554216868e-07,
"loss": -0.0059,
"num_tokens": 428852207.0,
"reward": 1.070695161819458,
"reward_std": 0.11748828440904617,
"rewards/accuracy_reward": 0.7063368082046508,
"rewards/brier_reward": 0.8224664568901062,
"rewards/confidence_uniqueness_reward": 0.9281678915023803,
"rewards/format_reward": 0.9921006917953491,
"rewards/frontier_aurc_reward": -0.002603556914255023,
"rewards/frontier_coverage_0": 0.01670327754691243,
"rewards/frontier_coverage_1": 0.01670327754691243,
"rewards/frontier_coverage_10": 0.016874231677502394,
"rewards/frontier_coverage_15": 0.021660929918289183,
"rewards/frontier_coverage_20": 0.030010566860437394,
"rewards/frontier_coverage_25": 0.07551120072603226,
"rewards/frontier_coverage_5": 0.01670327754691243,
"rewards/true_frontier_ece_gap_only_reward": -0.0027688577305525542,
"signal/accuracy_reward/centered_abs_mean": 0.14654405266046525,
"signal/accuracy_reward/group_std_mean": 0.19502569139003753,
"signal/accuracy_reward/group_zero_std_frac": 0.4277777910232544,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07327202633023262,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07327202633023262,
"signal/advantage_abs_mean": 0.08604095876216888,
"signal/advantage_pre_scale_abs_mean": 0.08604095876216888,
"signal/advantage_pre_scale_std": 0.15435749292373657,
"signal/advantage_std": 0.15435749292373657,
"signal/brier_reward/centered_abs_mean": 0.10942392647266388,
"signal/brier_reward/group_std_mean": 0.14457024335861207,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013677990809082986,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013677990809082986,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03794047012925148,
"signal/confidence_uniqueness_reward/group_std_mean": 0.055311404168605804,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004742558766156435,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004742558766156435,
"signal/format_reward/centered_abs_mean": 0.01360134556889534,
"signal/format_reward/group_std_mean": 0.025955809652805327,
"signal/format_reward/group_zero_std_frac": 0.8916666746139527,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00680067278444767,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00680067278444767,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003289140481501818,
"signal/frontier_aurc_reward/group_std_mean": 0.005822925828397274,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1392820023465904e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1392820023465904e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1157380223274231,
"signal/frontier_coverage_0/group_std_mean": 0.16240898966789247,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001808406598865986,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001808406598865986,
"signal/frontier_coverage_1/centered_abs_mean": 0.1157380223274231,
"signal/frontier_coverage_1/group_std_mean": 0.16240898966789247,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001808406598865986,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001808406598865986,
"signal/frontier_coverage_10/centered_abs_mean": 0.11215617209672928,
"signal/frontier_coverage_10/group_std_mean": 0.15783893167972565,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001752440189011395,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001752440189011395,
"signal/frontier_coverage_15/centered_abs_mean": 0.06608396619558335,
"signal/frontier_coverage_15/group_std_mean": 0.09578151851892472,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010325619718059898,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010325619718059898,
"signal/frontier_coverage_20/centered_abs_mean": 0.04160864725708961,
"signal/frontier_coverage_20/group_std_mean": 0.058051402866840365,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006501351133920252,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006501351133920252,
"signal/frontier_coverage_25/centered_abs_mean": 0.05538794472813606,
"signal/frontier_coverage_25/group_std_mean": 0.07133743911981583,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008654366363771259,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008654366363771259,
"signal/frontier_coverage_5/centered_abs_mean": 0.1157380223274231,
"signal/frontier_coverage_5/group_std_mean": 0.16240898966789247,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001808406598865986,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001808406598865986,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0029893687460571527,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.004549006605520845,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0003736710932571441,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0003736710932571441,
"step": 200
},
{
"epoch": 0.47999400007499904,
"eval_completions/clipped_ratio": 0.008680555555555544,
"eval_completions/max_length": 2405.5,
"eval_completions/max_terminated_length": 2405.5,
"eval_completions/mean_length": 691.6689249674479,
"eval_completions/mean_terminated_length": 697.7305806477865,
"eval_completions/min_length": 56.833333333333336,
"eval_completions/min_terminated_length": 249.66666666666666,
"eval_loss": 0.0,
"eval_num_tokens": 428852207.0,
"eval_reward": 1.0510073900222778,
"eval_reward_std": 0.2505972956617673,
"eval_rewards/accuracy_reward": 0.6796875,
"eval_rewards/brier_reward": 0.8202938636144003,
"eval_rewards/confidence_uniqueness_reward": 0.8755098978678385,
"eval_rewards/format_reward": 0.9913194378217062,
"eval_rewards/frontier_aurc_reward": -0.002243300104358544,
"eval_rewards/frontier_coverage_0": 0.029487861630817253,
"eval_rewards/frontier_coverage_1": 0.029487861630817253,
"eval_rewards/frontier_coverage_10": 0.02955207011351983,
"eval_rewards/frontier_coverage_15": 0.02661541321625312,
"eval_rewards/frontier_coverage_20": 0.03148760460317135,
"eval_rewards/frontier_coverage_25": 0.0767225877692302,
"eval_rewards/frontier_coverage_5": 0.029487861630817253,
"eval_rewards/true_frontier_ece_gap_only_reward": -0.0030973663087934256,
"eval_runtime": 184.977,
"eval_samples_per_second": 5.406,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4178059895833333,
"eval_signal/accuracy_reward/group_std_mean": 0.4625024398167928,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20890299479166666,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20890299479166666,
"eval_signal/advantage_abs_mean": 0.21793826669454575,
"eval_signal/advantage_pre_scale_abs_mean": 0.21793826669454575,
"eval_signal/advantage_pre_scale_std": 0.24974885831276575,
"eval_signal/advantage_std": 0.24974885831276575,
"eval_signal/brier_reward/centered_abs_mean": 0.18156319856643677,
"eval_signal/brier_reward/group_std_mean": 0.2326709752281507,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022695399820804596,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.022695399820804596,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0633502888182799,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08953885920345783,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007918786102284988,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007918786102284988,
"eval_signal/format_reward/centered_abs_mean": 0.01671006918574373,
"eval_signal/format_reward/group_std_mean": 0.04611522859583298,
"eval_signal/format_reward/group_zero_std_frac": 0.750000019868215,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.008355034592871865,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.008355034592871865,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0037845204351469874,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.00772972172126174,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.913313179917168e-05,
"eval_signal/frontier_aurc_reward/weight": 0.015625,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.913313179917168e-05,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.1768440529704094,
"eval_signal/frontier_coverage_0/group_std_mean": 0.275893231232961,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027631883276626468,
"eval_signal/frontier_coverage_0/weight": 0.015625,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027631883276626468,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.1768440529704094,
"eval_signal/frontier_coverage_1/group_std_mean": 0.275893231232961,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027631883276626468,
"eval_signal/frontier_coverage_1/weight": 0.015625,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027631883276626468,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.17100265125433603,
"eval_signal/frontier_coverage_10/group_std_mean": 0.2681894302368164,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026719164258490005,
"eval_signal/frontier_coverage_10/weight": 0.015625,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026719164258490005,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.09313676009575526,
"eval_signal/frontier_coverage_15/group_std_mean": 0.16024632503588995,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001455261876496176,
"eval_signal/frontier_coverage_15/weight": 0.015625,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001455261876496176,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.05307722526292006,
"eval_signal/frontier_coverage_20/group_std_mean": 0.08430640151103337,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008293316447331259,
"eval_signal/frontier_coverage_20/weight": 0.015625,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008293316447331259,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.09919273108243942,
"eval_signal/frontier_coverage_25/group_std_mean": 0.11935225501656532,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001549886423163116,
"eval_signal/frontier_coverage_25/weight": 0.015625,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001549886423163116,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.1768440529704094,
"eval_signal/frontier_coverage_5/group_std_mean": 0.275893231232961,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027631883276626468,
"eval_signal/frontier_coverage_5/weight": 0.015625,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027631883276626468,
"eval_signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.00442255346570164,
"eval_signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.0076944112467269106,
"eval_signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"eval_signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.000552819183212705,
"eval_signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"eval_signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.000552819183212705,
"eval_steps_per_second": 0.032,
"step": 200
},
{
"epoch": 0.47999400007499904,
"step": 200,
"train_probe_completions/clipped_ratio": 0.012847222222222232,
"train_probe_completions/max_length": 2355.3333333333335,
"train_probe_completions/max_terminated_length": 2355.3333333333335,
"train_probe_completions/mean_length": 680.9791870117188,
"train_probe_completions/mean_terminated_length": 689.9359436035156,
"train_probe_completions/min_length": 0.0,
"train_probe_completions/min_terminated_length": 217.16666666666666,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 428852207.0,
"train_probe_reward": 1.067217191060384,
"train_probe_reward_std": 0.25106702248255414,
"train_probe_rewards/accuracy_reward": 0.7170139054457346,
"train_probe_rewards/brier_reward": 0.8235729734102885,
"train_probe_rewards/confidence_uniqueness_reward": 0.8701435724894205,
"train_probe_rewards/format_reward": 0.9895833333333334,
"train_probe_rewards/frontier_aurc_reward": -0.002555853434993575,
"train_probe_rewards/frontier_coverage_0": 0.00876838636274139,
"train_probe_rewards/frontier_coverage_1": 0.00876838636274139,
"train_probe_rewards/frontier_coverage_10": 0.009510708196709553,
"train_probe_rewards/frontier_coverage_15": 0.017479141689060878,
"train_probe_rewards/frontier_coverage_20": 0.02941159127900998,
"train_probe_rewards/frontier_coverage_25": 0.08159822722276051,
"train_probe_rewards/frontier_coverage_5": 0.00876838636274139,
"train_probe_rewards/true_frontier_ece_gap_only_reward": -0.002586768241599202,
"train_probe_runtime": 203.222,
"train_probe_samples_per_second": 4.921,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.3974609375,
"train_probe_signal/accuracy_reward/group_std_mean": 0.4516189793745677,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.19873046875,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.19873046875,
"train_probe_signal/advantage_abs_mean": 0.21062870572010675,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.21062870572010675,
"train_probe_signal/advantage_pre_scale_std": 0.24998972316582999,
"train_probe_signal/advantage_std": 0.24998972316582999,
"train_probe_signal/brier_reward/centered_abs_mean": 0.1768066460887591,
"train_probe_signal/brier_reward/group_std_mean": 0.2324258784453074,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022100830761094887,
"train_probe_signal/brier_reward/weight": 0.125,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.022100830761094887,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06669201205174129,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.1005245956281821,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00833650150646766,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00833650150646766,
"train_probe_signal/format_reward/centered_abs_mean": 0.02007378451526165,
"train_probe_signal/format_reward/group_std_mean": 0.05593615584075451,
"train_probe_signal/format_reward/group_zero_std_frac": 0.6944444676240286,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.010036892257630825,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.010036892257630825,
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.004501550691202283,
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.010145407247667512,
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.033672955003567e-05,
"train_probe_signal/frontier_aurc_reward/weight": 0.015625,
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.033672955003567e-05,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.1579719434181849,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.25707169622182846,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002468311615909139,
"train_probe_signal/frontier_coverage_0/weight": 0.015625,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002468311615909139,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.1579719434181849,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.25707169622182846,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002468311615909139,
"train_probe_signal/frontier_coverage_1/weight": 0.015625,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002468311615909139,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.15405935049057007,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.25156734387079877,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024071773514151573,
"train_probe_signal/frontier_coverage_10/weight": 0.015625,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024071773514151573,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.08175658682982127,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.14768946915864944,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012774466692159574,
"train_probe_signal/frontier_coverage_15/weight": 0.015625,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012774466692159574,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.04819720300535361,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.07652890309691429,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007530812969586501,
"train_probe_signal/frontier_coverage_20/weight": 0.015625,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007530812969586501,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.09637204806009929,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.11713628967603047,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015058132509390514,
"train_probe_signal/frontier_coverage_25/weight": 0.015625,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015058132509390514,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.1579719434181849,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.25707169622182846,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002468311615909139,
"train_probe_signal/frontier_coverage_5/weight": 0.015625,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002468311615909139,
"train_probe_signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0036215446889400482,
"train_probe_signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.006501481092224519,
"train_probe_signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"train_probe_signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00045269308611750603,
"train_probe_signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"train_probe_signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00045269308611750603,
"train_probe_steps_per_second": 0.03
},
{
"calibration/aurc": 0.16907717783248571,
"calibration/batch_distribution_entropy": 0.7094331746984237,
"calibration/batch_entropy_100bins": 0.7936049195126413,
"calibration/batch_entropy_10bins": 0.7094331746984237,
"calibration/batch_entropy_50bins": 0.7939230371346102,
"calibration/batch_uniqueness": 0.924174709187571,
"calibration/buffer_distribution_entropy": 0.7921673058072191,
"calibration/buffer_entropy_100bins": 0.8584895422787383,
"calibration/buffer_entropy_10bins": 0.7921673058072191,
"calibration/buffer_entropy_50bins": 0.8628322959175796,
"calibration/confidence_entropy": 0.49239157203877826,
"calibration/coverage@0%": 0.010430265448215839,
"calibration/coverage@1%": 0.010430265448215839,
"calibration/coverage@10%": 0.11064104656222802,
"calibration/coverage@15%": 0.5501699847693647,
"calibration/coverage@20%": 0.8597367275892079,
"calibration/coverage@25%": 0.8936221714534378,
"calibration/coverage@30%": 0.9400198542210617,
"calibration/coverage@5%": 0.010430265448215839,
"calibration/distribution_entropy_10": 0.7094331746984237,
"calibration/distribution_entropy_100": 0.7936049195126413,
"calibration/ece": 0.10543889063662855,
"calibration/mean_confidence": 0.6970287687410912,
"calibration/unique_confidence_per_question": 0.19322916666666667,
"calibration/unique_confidences": 74.2,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008767361111111116,
"completions/max_length": 2892.6,
"completions/max_terminated_length": 2892.6,
"completions/mean_length": 690.477685546875,
"completions/mean_terminated_length": 696.5508056640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 157.6,
"epoch": 0.491993850076874,
"grad_norm": 0.0003582312201615423,
"learning_rate": 9.036144578313253e-08,
"loss": -0.0072,
"num_tokens": 439872462.0,
"reward": 1.092501163482666,
"reward_std": 0.11384595781564713,
"rewards/accuracy_reward": 0.7523437380790711,
"rewards/brier_reward": 0.828589677810669,
"rewards/confidence_uniqueness_reward": 0.9259551286697387,
"rewards/format_reward": 0.9912326335906982,
"rewards/frontier_aurc_reward": -0.00198111105710268,
"rewards/frontier_coverage_0": -0.00452432045713067,
"rewards/frontier_coverage_1": -0.00452432045713067,
"rewards/frontier_coverage_10": -0.003602027613669634,
"rewards/frontier_coverage_15": 0.012093347311019898,
"rewards/frontier_coverage_20": 0.03151162005960941,
"rewards/frontier_coverage_25": 0.09083455055952072,
"rewards/frontier_coverage_5": -0.00452432045713067,
"rewards/true_frontier_ece_gap_only_reward": -0.0032516193110495805,
"signal/accuracy_reward/centered_abs_mean": 0.14517686665058135,
"signal/accuracy_reward/group_std_mean": 0.19360876083374023,
"signal/accuracy_reward/group_zero_std_frac": 0.4444444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07258843332529068,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07258843332529068,
"signal/advantage_abs_mean": 0.0818178191781044,
"signal/advantage_pre_scale_abs_mean": 0.0818178191781044,
"signal/advantage_pre_scale_std": 0.1523301661014557,
"signal/advantage_std": 0.1523301661014557,
"signal/brier_reward/centered_abs_mean": 0.1120417907834053,
"signal/brier_reward/group_std_mean": 0.14704422652721405,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014005223847925663,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014005223847925663,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03881465494632721,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05692438259720802,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004851831868290901,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004851831868290901,
"signal/format_reward/centered_abs_mean": 0.014360894355922938,
"signal/format_reward/group_std_mean": 0.027210034802556037,
"signal/format_reward/group_zero_std_frac": 0.8888888835906983,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007180447177961469,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007180447177961469,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026562759652733804,
"signal/frontier_aurc_reward/group_std_mean": 0.005001515662297606,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.150431195739657e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.150431195739657e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.12333370298147202,
"signal/frontier_coverage_0/group_std_mean": 0.16834968626499175,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019270891090855003,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019270891090855003,
"signal/frontier_coverage_1/centered_abs_mean": 0.12333370298147202,
"signal/frontier_coverage_1/group_std_mean": 0.16834968626499175,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019270891090855003,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019270891090855003,
"signal/frontier_coverage_10/centered_abs_mean": 0.11967587620019912,
"signal/frontier_coverage_10/group_std_mean": 0.1636903315782547,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018699355656281113,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018699355656281113,
"signal/frontier_coverage_15/centered_abs_mean": 0.06968192905187606,
"signal/frontier_coverage_15/group_std_mean": 0.09741113483905792,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010887801414355635,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010887801414355635,
"signal/frontier_coverage_20/centered_abs_mean": 0.04491528794169426,
"signal/frontier_coverage_20/group_std_mean": 0.060000843554735186,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007018013740889729,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007018013740889729,
"signal/frontier_coverage_25/centered_abs_mean": 0.057969672977924346,
"signal/frontier_coverage_25/group_std_mean": 0.0744464322924614,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009057761402800679,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009057761402800679,
"signal/frontier_coverage_5/centered_abs_mean": 0.12333370298147202,
"signal/frontier_coverage_5/group_std_mean": 0.16834968626499175,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019270891090855003,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019270891090855003,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0032607629895210267,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.004654625337570906,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.00040759537369012834,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.00040759537369012834,
"step": 205
},
{
"calibration/aurc": 0.1370533125922073,
"calibration/batch_distribution_entropy": 0.7072075783416373,
"calibration/batch_entropy_100bins": 0.7965424913173278,
"calibration/batch_entropy_10bins": 0.7072075783416373,
"calibration/batch_entropy_50bins": 0.7991626918689265,
"calibration/batch_uniqueness": 0.9300988239372975,
"calibration/buffer_distribution_entropy": 0.7932410370024624,
"calibration/buffer_entropy_100bins": 0.8591231164142057,
"calibration/buffer_entropy_10bins": 0.7932410370024624,
"calibration/buffer_entropy_50bins": 0.8632449929488756,
"calibration/confidence_entropy": 0.5134386595908284,
"calibration/coverage@0%": 0.007853439020572171,
"calibration/coverage@1%": 0.007853439020572171,
"calibration/coverage@10%": 0.21872482437993335,
"calibration/coverage@15%": 0.7426824379239719,
"calibration/coverage@20%": 0.8840740183815656,
"calibration/coverage@25%": 0.9695113893711355,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.04005535372901429,
"calibration/distribution_entropy_10": 0.7072075783416373,
"calibration/distribution_entropy_100": 0.7965424913173278,
"calibration/ece": 0.07591815575628273,
"calibration/mean_confidence": 0.7097033007115973,
"calibration/unique_confidence_per_question": 0.18229166666666666,
"calibration/unique_confidences": 70.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.005353009259259227,
"completions/max_length": 3599.0,
"completions/max_terminated_length": 3599.0,
"completions/mean_length": 694.3878784179688,
"completions/mean_terminated_length": 698.0981852213541,
"completions/min_length": 0.0,
"completions/min_terminated_length": 190.66666666666666,
"epoch": 0.49919376007799904,
"num_tokens": 446538119.0,
"reward": 1.0708950360616047,
"reward_std": 0.11581993599732716,
"rewards/accuracy_reward": 0.7042824029922485,
"rewards/brier_reward": 0.8196952740351359,
"rewards/confidence_uniqueness_reward": 0.9326375126838684,
"rewards/format_reward": 0.9945023059844971,
"rewards/frontier_aurc_reward": -0.002047328627668321,
"rewards/frontier_coverage_0": 0.012841465882956982,
"rewards/frontier_coverage_1": 0.012841465882956982,
"rewards/frontier_coverage_10": 0.012699058279395103,
"rewards/frontier_coverage_15": 0.017329357874890167,
"rewards/frontier_coverage_20": 0.0331996213644743,
"rewards/frontier_coverage_25": 0.0871302808324496,
"rewards/frontier_coverage_5": 0.012841465882956982,
"rewards/true_frontier_ece_gap_only_reward": -0.00366590932632486,
"signal/accuracy_reward/centered_abs_mean": 0.15892650187015533,
"signal/accuracy_reward/group_std_mean": 0.20545404652754465,
"signal/accuracy_reward/group_zero_std_frac": 0.43981483578681946,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07946325093507767,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07946325093507767,
"signal/advantage_abs_mean": 0.08434396361311276,
"signal/advantage_pre_scale_abs_mean": 0.08434396361311276,
"signal/advantage_pre_scale_std": 0.15035154422124228,
"signal/advantage_std": 0.15035154422124228,
"signal/brier_reward/centered_abs_mean": 0.11334347476561864,
"signal/brier_reward/group_std_mean": 0.14777959883213043,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01416793434570233,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01416793434570233,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03333321896692117,
"signal/confidence_uniqueness_reward/group_std_mean": 0.051341903706391655,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004166652370865147,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004166652370865147,
"signal/format_reward/centered_abs_mean": 0.010308159670482079,
"signal/format_reward/group_std_mean": 0.02368570367495219,
"signal/format_reward/group_zero_std_frac": 0.8888888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005154079835241039,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005154079835241039,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027586812308679023,
"signal/frontier_aurc_reward/group_std_mean": 0.0049862076217929525,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.310439423231097e-05,
"signal/frontier_aurc_reward/weight": 0.015625,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.310439423231097e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1345667690038681,
"signal/frontier_coverage_0/group_std_mean": 0.18024377524852753,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002102605765685439,
"signal/frontier_coverage_0/weight": 0.015625,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002102605765685439,
"signal/frontier_coverage_1/centered_abs_mean": 0.1345667690038681,
"signal/frontier_coverage_1/group_std_mean": 0.18024377524852753,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002102605765685439,
"signal/frontier_coverage_1/weight": 0.015625,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002102605765685439,
"signal/frontier_coverage_10/centered_abs_mean": 0.12825309236844382,
"signal/frontier_coverage_10/group_std_mean": 0.17192438741525015,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020039545682569346,
"signal/frontier_coverage_10/weight": 0.015625,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020039545682569346,
"signal/frontier_coverage_15/centered_abs_mean": 0.07049262523651123,
"signal/frontier_coverage_15/group_std_mean": 0.09754702945550282,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001101447269320488,
"signal/frontier_coverage_15/weight": 0.015625,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001101447269320488,
"signal/frontier_coverage_20/centered_abs_mean": 0.04566365604599317,
"signal/frontier_coverage_20/group_std_mean": 0.06064340099692345,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007134946257186433,
"signal/frontier_coverage_20/weight": 0.015625,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007134946257186433,
"signal/frontier_coverage_25/centered_abs_mean": 0.05877576395869255,
"signal/frontier_coverage_25/group_std_mean": 0.07547732442617416,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009183713118545711,
"signal/frontier_coverage_25/weight": 0.015625,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009183713118545711,
"signal/frontier_coverage_5/centered_abs_mean": 0.1345667690038681,
"signal/frontier_coverage_5/group_std_mean": 0.18024377524852753,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002102605765685439,
"signal/frontier_coverage_5/weight": 0.015625,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002102605765685439,
"signal/true_frontier_ece_gap_only_reward/centered_abs_mean": 0.0035362955338011184,
"signal/true_frontier_ece_gap_only_reward/group_std_mean": 0.0049490658566355705,
"signal/true_frontier_ece_gap_only_reward/group_zero_std_frac": 0.0,
"signal/true_frontier_ece_gap_only_reward/scaled_weighted_centered_abs_mean": 0.0004420369417251398,
"signal/true_frontier_ece_gap_only_reward/weight": 0.125,
"signal/true_frontier_ece_gap_only_reward/weighted_centered_abs_mean": 0.0004420369417251398,
"step": 208,
"total_flos": 0.0,
"train_loss": -0.00872318486038309,
"train_runtime": 40755.4175,
"train_samples_per_second": 0.368,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 208,
"num_input_tokens_seen": 446538119,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}