Files
RLCR-v4-ks-uniqueness-hotpo…/trainer_state.json

8474 lines
471 KiB
JSON
Raw Permalink Normal View History

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 50,
"global_step": 312,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.5681354920214096,
"calibration/batch_distribution_entropy": 0.6450350928927815,
"calibration/confidence_entropy": 0.346740957452881,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.43446125614086684,
"calibration/mean_confidence": 0.7908406375397601,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0361328125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1503.4,
"completions/mean_length": 271.28115234375,
"completions/mean_terminated_length": 223.8628723144531,
"completions/min_length": 2.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.016,
"grad_norm": 0.059467002749443054,
"learning_rate": 3.1249999999999997e-07,
"loss": 0.0934,
"num_tokens": 17621951.0,
"reward": 0.6730658292770386,
"reward_std": 0.5045446038246155,
"rewards/accuracy_reward": 0.27001953125,
"rewards/brier_reward": 0.4092401027679443,
"rewards/confidence_uniqueness_reward": 0.48412379026412966,
"rewards/format_reward": 0.68173828125,
"rewards/frontier_aurc_reward": 0.3416558563709259,
"rewards/frontier_coverage_1": 0.3416558563709259,
"rewards/frontier_coverage_10": 0.3416558563709259,
"rewards/frontier_coverage_15": 0.3416558563709259,
"rewards/frontier_coverage_20": 0.3416558563709259,
"rewards/frontier_coverage_25": 0.3416558563709259,
"rewards/frontier_coverage_5": 0.3416558563709259,
"rewards/frontier_ece_reward": 0.3416558563709259,
"signal/accuracy_reward/centered_abs_mean": 0.279132080078125,
"signal/accuracy_reward/group_std_mean": 0.31931535005569456,
"signal/accuracy_reward/group_zero_std_frac": 0.25625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1395660400390625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1395660400390625,
"signal/advantage_abs_mean": 0.43447349071502683,
"signal/advantage_pre_scale_abs_mean": 0.43447349071502683,
"signal/advantage_pre_scale_std": 0.5123933017253876,
"signal/advantage_std": 0.5123933017253876,
"signal/brier_reward/centered_abs_mean": 0.33928354978561404,
"signal/brier_reward/group_std_mean": 0.38253386616706847,
"signal/brier_reward/group_zero_std_frac": 0.003125,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.042410443723201754,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.042410443723201754,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2978093445301056,
"signal/confidence_uniqueness_reward/group_std_mean": 0.34845100045204164,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0372261680662632,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0372261680662632,
"signal/format_reward/centered_abs_mean": 0.404998779296875,
"signal/format_reward/group_std_mean": 0.4546263098716736,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2024993896484375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.2024993896484375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.31834944486618044,
"signal/frontier_aurc_reward/group_std_mean": 0.36653432846069334,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00569845512509346,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00569845512509346,
"signal/frontier_coverage_1/centered_abs_mean": 0.31834944486618044,
"signal/frontier_coverage_1/group_std_mean": 0.36653432846069334,
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00569845512509346,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00569845512509346,
"signal/frontier_coverage_10/centered_abs_mean": 0.31834944486618044,
"signal/frontier_coverage_10/group_std_mean": 0.36653432846069334,
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00569845512509346,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00569845512509346,
"signal/frontier_coverage_15/centered_abs_mean": 0.31834944486618044,
"signal/frontier_coverage_15/group_std_mean": 0.36653432846069334,
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00569845512509346,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00569845512509346,
"signal/frontier_coverage_20/centered_abs_mean": 0.31834944486618044,
"signal/frontier_coverage_20/group_std_mean": 0.36653432846069334,
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00569845512509346,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00569845512509346,
"signal/frontier_coverage_25/centered_abs_mean": 0.31834944486618044,
"signal/frontier_coverage_25/group_std_mean": 0.36653432846069334,
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00569845512509346,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00569845512509346,
"signal/frontier_coverage_5/centered_abs_mean": 0.31834944486618044,
"signal/frontier_coverage_5/group_std_mean": 0.36653432846069334,
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00569845512509346,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00569845512509346,
"signal/frontier_ece_reward/centered_abs_mean": 0.31834944486618044,
"signal/frontier_ece_reward/group_std_mean": 0.36653432846069334,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.039793680608272555,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.039793680608272555,
"step": 5
},
{
"calibration/aurc": 0.5823592206951076,
"calibration/batch_distribution_entropy": 0.6377635262826689,
"calibration/confidence_entropy": 0.34316212043587685,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4711939179056281,
"calibration/mean_confidence": 0.8033346823525754,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03837890625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1489.4,
"completions/mean_length": 264.73369140625,
"completions/mean_terminated_length": 214.02195739746094,
"completions/min_length": 2.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.032,
"grad_norm": 0.03480805084109306,
"learning_rate": 6.249999999999999e-07,
"loss": 0.0952,
"num_tokens": 35433176.0,
"reward": 0.6785492658615112,
"reward_std": 0.4799711525440216,
"rewards/accuracy_reward": 0.25234375,
"rewards/brier_reward": 0.4085344135761261,
"rewards/confidence_uniqueness_reward": 0.506452476978302,
"rewards/format_reward": 0.70908203125,
"rewards/frontier_aurc_reward": 0.3334519624710083,
"rewards/frontier_coverage_1": 0.3334519624710083,
"rewards/frontier_coverage_10": 0.3334519624710083,
"rewards/frontier_coverage_15": 0.3334519624710083,
"rewards/frontier_coverage_20": 0.3334519624710083,
"rewards/frontier_coverage_25": 0.3334519624710083,
"rewards/frontier_coverage_5": 0.3334519624710083,
"rewards/frontier_ece_reward": 0.3334519624710083,
"signal/accuracy_reward/centered_abs_mean": 0.2619384765625,
"signal/accuracy_reward/group_std_mean": 0.30939258337020875,
"signal/accuracy_reward/group_zero_std_frac": 0.246875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.13096923828125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.13096923828125,
"signal/advantage_abs_mean": 0.4020266532897949,
"signal/advantage_pre_scale_abs_mean": 0.4020266532897949,
"signal/advantage_pre_scale_std": 0.48843042850494384,
"signal/advantage_std": 0.48843042850494384,
"signal/brier_reward/centered_abs_mean": 0.3226713418960571,
"signal/brier_reward/group_std_mean": 0.3693849265575409,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04033391773700714,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.04033391773700714,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2817148804664612,
"signal/confidence_uniqueness_reward/group_std_mean": 0.3387665629386902,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03521436005830765,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03521436005830765,
"signal/format_reward/centered_abs_mean": 0.378582763671875,
"signal/format_reward/group_std_mean": 0.43834707140922546,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1892913818359375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.1892913818359375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.30297967195510866,
"signal/frontier_aurc_reward/group_std_mean": 0.3533449411392212,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005423336289823055,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005423336289823055,
"signal/frontier_coverage_1/centered_abs_mean": 0.30297967195510866,
"signal/frontier_coverage_1/group_std_mean": 0.3533449411392212,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005423336289823055,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005423336289823055,
"signal/frontier_coverage_10/centered_abs_mean": 0.30297967195510866,
"signal/frontier_coverage_10/group_std_mean": 0.3533449411392212,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005423336289823055,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005423336289823055,
"signal/frontier_coverage_15/centered_abs_mean": 0.30297967195510866,
"signal/frontier_coverage_15/group_std_mean": 0.3533449411392212,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005423336289823055,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005423336289823055,
"signal/frontier_coverage_20/centered_abs_mean": 0.30297967195510866,
"signal/frontier_coverage_20/group_std_mean": 0.3533449411392212,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005423336289823055,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005423336289823055,
"signal/frontier_coverage_25/centered_abs_mean": 0.30297967195510866,
"signal/frontier_coverage_25/group_std_mean": 0.3533449411392212,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005423336289823055,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005423336289823055,
"signal/frontier_coverage_5/centered_abs_mean": 0.30297967195510866,
"signal/frontier_coverage_5/group_std_mean": 0.3533449411392212,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005423336289823055,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005423336289823055,
"signal/frontier_ece_reward/centered_abs_mean": 0.30297967195510866,
"signal/frontier_ece_reward/group_std_mean": 0.3533449411392212,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03787245899438858,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03787245899438858,
"step": 10
},
{
"calibration/aurc": 0.485502347974659,
"calibration/batch_distribution_entropy": 0.6338946369559008,
"calibration/buffer_distribution_entropy": 0.6565034331851883,
"calibration/confidence_entropy": 0.3398889343388115,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.38130162024695624,
"calibration/mean_confidence": 0.8044045301334914,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02001953125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1488.4,
"completions/mean_length": 213.66611328125,
"completions/mean_terminated_length": 186.77418518066406,
"completions/min_length": 5.2,
"completions/min_terminated_length": 5.2,
"epoch": 0.048,
"grad_norm": 0.09747687727212906,
"learning_rate": 9.374999999999999e-07,
"loss": 0.0577,
"num_tokens": 52669853.0,
"reward": 0.8086728811264038,
"reward_std": 0.3855405569076538,
"rewards/accuracy_reward": 0.32490234375,
"rewards/brier_reward": 0.505929458141327,
"rewards/confidence_uniqueness_reward": 0.6144041776657104,
"rewards/format_reward": 0.84814453125,
"rewards/frontier_aurc_reward": 0.3234916229732335,
"rewards/frontier_coverage_1": 0.33721864223480225,
"rewards/frontier_coverage_10": 0.33721864223480225,
"rewards/frontier_coverage_15": 0.33721864223480225,
"rewards/frontier_coverage_20": 0.33721864223480225,
"rewards/frontier_coverage_25": 0.33721864223480225,
"rewards/frontier_coverage_5": 0.33721864223480225,
"rewards/frontier_ece_reward": 0.32079982459545137,
"signal/accuracy_reward/centered_abs_mean": 0.239251708984375,
"signal/accuracy_reward/group_std_mean": 0.2890691041946411,
"signal/accuracy_reward/group_zero_std_frac": 0.28125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1196258544921875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1196258544921875,
"signal/advantage_abs_mean": 0.3007605969905853,
"signal/advantage_pre_scale_abs_mean": 0.3007605969905853,
"signal/advantage_pre_scale_std": 0.3971730887889862,
"signal/advantage_std": 0.3971730887889862,
"signal/brier_reward/centered_abs_mean": 0.28599911630153657,
"signal/brier_reward/group_std_mean": 0.34063884019851687,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03574988953769207,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.03574988953769207,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.21475785672664643,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2769153594970703,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026844732090830804,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.026844732090830804,
"signal/format_reward/centered_abs_mean": 0.224554443359375,
"signal/format_reward/group_std_mean": 0.31889126896858216,
"signal/format_reward/group_zero_std_frac": 0.065625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1122772216796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.1122772216796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.23223379356786608,
"signal/frontier_aurc_reward/group_std_mean": 0.27599835190922023,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.004156984848668799,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.004156984848668799,
"signal/frontier_coverage_1/centered_abs_mean": 0.25027269423007964,
"signal/frontier_coverage_1/group_std_mean": 0.3035570979118347,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004479881143197417,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004479881143197417,
"signal/frontier_coverage_10/centered_abs_mean": 0.25027269423007964,
"signal/frontier_coverage_10/group_std_mean": 0.3035570979118347,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004479881143197417,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004479881143197417,
"signal/frontier_coverage_15/centered_abs_mean": 0.25027269423007964,
"signal/frontier_coverage_15/group_std_mean": 0.3035570979118347,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004479881143197417,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004479881143197417,
"signal/frontier_coverage_20/centered_abs_mean": 0.25027269423007964,
"signal/frontier_coverage_20/group_std_mean": 0.3035570979118347,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004479881143197417,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004479881143197417,
"signal/frontier_coverage_25/centered_abs_mean": 0.25027269423007964,
"signal/frontier_coverage_25/group_std_mean": 0.3035570979118347,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004479881143197417,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004479881143197417,
"signal/frontier_coverage_5/centered_abs_mean": 0.25027269423007964,
"signal/frontier_coverage_5/group_std_mean": 0.3035570979118347,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004479881143197417,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004479881143197417,
"signal/frontier_ece_reward/centered_abs_mean": 0.25118278712034225,
"signal/frontier_ece_reward/group_std_mean": 0.2993951976299286,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03139784839004278,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03139784839004278,
"step": 15
},
{
"calibration/aurc": 0.4613156372239649,
"calibration/batch_distribution_entropy": 0.6885188701822018,
"calibration/buffer_distribution_entropy": 0.6525163906375744,
"calibration/confidence_entropy": 0.3612115527122598,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.33557730079398584,
"calibration/mean_confidence": 0.7816876164808539,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01103515625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1425.8,
"completions/mean_length": 162.32021484375,
"completions/mean_terminated_length": 146.99488830566406,
"completions/min_length": 1.8,
"completions/min_terminated_length": 1.8,
"epoch": 0.064,
"grad_norm": 0.028669551014900208,
"learning_rate": 1e-06,
"loss": 0.0214,
"num_tokens": 69250412.0,
"reward": 0.8064048051834106,
"reward_std": 0.26577826142311095,
"rewards/accuracy_reward": 0.37060546875,
"rewards/brier_reward": 0.5703646183013916,
"rewards/confidence_uniqueness_reward": 0.6865696787834168,
"rewards/format_reward": 0.92236328125,
"rewards/frontier_aurc_reward": -0.006006188318133354,
"rewards/frontier_coverage_1": 0.05366070494055748,
"rewards/frontier_coverage_10": 0.05366070494055748,
"rewards/frontier_coverage_15": 0.05366070494055748,
"rewards/frontier_coverage_20": 0.05366070494055748,
"rewards/frontier_coverage_25": 0.05366070494055748,
"rewards/frontier_coverage_5": 0.05366070494055748,
"rewards/frontier_ece_reward": -0.022815992310643195,
"signal/accuracy_reward/centered_abs_mean": 0.232647705078125,
"signal/accuracy_reward/group_std_mean": 0.2829224646091461,
"signal/accuracy_reward/group_zero_std_frac": 0.290625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1163238525390625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1163238525390625,
"signal/advantage_abs_mean": 0.20600511133670807,
"signal/advantage_pre_scale_abs_mean": 0.20600511133670807,
"signal/advantage_pre_scale_std": 0.2877360999584198,
"signal/advantage_std": 0.2877360999584198,
"signal/brier_reward/centered_abs_mean": 0.2589739263057709,
"signal/brier_reward/group_std_mean": 0.31490403413772583,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03237174078822136,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.03237174078822136,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.16161151528358458,
"signal/confidence_uniqueness_reward/group_std_mean": 0.20550169944763183,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020201439410448073,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020201439410448073,
"signal/format_reward/centered_abs_mean": 0.111956787109375,
"signal/format_reward/group_std_mean": 0.17785735428333282,
"signal/format_reward/group_zero_std_frac": 0.3625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0559783935546875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0559783935546875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.005545902531594038,
"signal/frontier_aurc_reward/group_std_mean": 0.0080027237534523,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.927165228873491e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.927165228873491e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.09711904674768448,
"signal/frontier_coverage_1/group_std_mean": 0.15192094445228577,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017384308390319347,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017384308390319347,
"signal/frontier_coverage_10/centered_abs_mean": 0.09711904674768448,
"signal/frontier_coverage_10/group_std_mean": 0.15192094445228577,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017384308390319347,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017384308390319347,
"signal/frontier_coverage_15/centered_abs_mean": 0.09711904674768448,
"signal/frontier_coverage_15/group_std_mean": 0.15192094445228577,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017384308390319347,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017384308390319347,
"signal/frontier_coverage_20/centered_abs_mean": 0.09711904674768448,
"signal/frontier_coverage_20/group_std_mean": 0.15192094445228577,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017384308390319347,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017384308390319347,
"signal/frontier_coverage_25/centered_abs_mean": 0.09711904674768448,
"signal/frontier_coverage_25/group_std_mean": 0.15192094445228577,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017384308390319347,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017384308390319347,
"signal/frontier_coverage_5/centered_abs_mean": 0.09711904674768448,
"signal/frontier_coverage_5/group_std_mean": 0.15192094445228577,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017384308390319347,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017384308390319347,
"signal/frontier_ece_reward/centered_abs_mean": 0.1027738630771637,
"signal/frontier_ece_reward/group_std_mean": 0.12494452595710755,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.012846732884645462,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.012846732884645462,
"step": 20
},
{
"calibration/aurc": 0.5838333717990067,
"calibration/batch_distribution_entropy": 0.7631321019020263,
"calibration/buffer_distribution_entropy": 0.6752091827121237,
"calibration/confidence_entropy": 0.427675346192575,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.3880188298739684,
"calibration/mean_confidence": 0.743169744444465,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0072265625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1216.2,
"completions/mean_length": 133.6181640625,
"completions/mean_terminated_length": 123.41706695556641,
"completions/min_length": 1.6,
"completions/min_terminated_length": 1.6,
"epoch": 0.08,
"grad_norm": 0.10266012698411942,
"learning_rate": 1e-06,
"loss": 0.0038,
"num_tokens": 85551814.0,
"reward": 0.86251140832901,
"reward_std": 0.2213844656944275,
"rewards/accuracy_reward": 0.41123046875,
"rewards/brier_reward": 0.6216847538948059,
"rewards/confidence_uniqueness_reward": 0.7656373262405396,
"rewards/format_reward": 0.96044921875,
"rewards/frontier_aurc_reward": -0.005164883844554424,
"rewards/frontier_coverage_1": 0.04586975798010826,
"rewards/frontier_coverage_10": 0.04586975798010826,
"rewards/frontier_coverage_15": 0.04586975798010826,
"rewards/frontier_coverage_20": 0.04586975798010826,
"rewards/frontier_coverage_25": 0.04586975798010826,
"rewards/frontier_coverage_5": 0.04586975798010826,
"rewards/frontier_ece_reward": -0.01262117656879127,
"signal/accuracy_reward/centered_abs_mean": 0.220159912109375,
"signal/accuracy_reward/group_std_mean": 0.27431103587150574,
"signal/accuracy_reward/group_zero_std_frac": 0.284375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1100799560546875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1100799560546875,
"signal/advantage_abs_mean": 0.1713700234889984,
"signal/advantage_pre_scale_abs_mean": 0.1713700234889984,
"signal/advantage_pre_scale_std": 0.2442230075597763,
"signal/advantage_std": 0.2442230075597763,
"signal/brier_reward/centered_abs_mean": 0.23227280676364898,
"signal/brier_reward/group_std_mean": 0.28580942153930666,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029034100845456122,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.029034100845456122,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.11165157109498977,
"signal/confidence_uniqueness_reward/group_std_mean": 0.14749074429273606,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013956446386873721,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013956446386873721,
"signal/format_reward/centered_abs_mean": 0.058660888671875,
"signal/format_reward/group_std_mean": 0.0945195160806179,
"signal/format_reward/group_zero_std_frac": 0.65,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0293304443359375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0293304443359375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004352754168212414,
"signal/frontier_aurc_reward/group_std_mean": 0.006485749594867229,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.79142945248168e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.79142945248168e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.11262711882591248,
"signal/frontier_coverage_1/group_std_mean": 0.171766459941864,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020160253159701826,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020160253159701826,
"signal/frontier_coverage_10/centered_abs_mean": 0.11262711882591248,
"signal/frontier_coverage_10/group_std_mean": 0.171766459941864,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020160253159701826,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020160253159701826,
"signal/frontier_coverage_15/centered_abs_mean": 0.11262711882591248,
"signal/frontier_coverage_15/group_std_mean": 0.171766459941864,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020160253159701826,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020160253159701826,
"signal/frontier_coverage_20/centered_abs_mean": 0.11262711882591248,
"signal/frontier_coverage_20/group_std_mean": 0.171766459941864,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020160253159701826,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020160253159701826,
"signal/frontier_coverage_25/centered_abs_mean": 0.11262711882591248,
"signal/frontier_coverage_25/group_std_mean": 0.171766459941864,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020160253159701826,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020160253159701826,
"signal/frontier_coverage_5/centered_abs_mean": 0.11262711882591248,
"signal/frontier_coverage_5/group_std_mean": 0.171766459941864,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020160253159701826,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020160253159701826,
"signal/frontier_ece_reward/centered_abs_mean": 0.09184739738702774,
"signal/frontier_ece_reward/group_std_mean": 0.11263370960950851,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.011480924673378468,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.011480924673378468,
"step": 25
},
{
"calibration/aurc": 0.5599529783433583,
"calibration/batch_distribution_entropy": 0.8180527560081053,
"calibration/buffer_distribution_entropy": 0.7099110970064041,
"calibration/confidence_entropy": 0.5050432578890813,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.2930707046300085,
"calibration/mean_confidence": 0.6695370928113038,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0107421875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1365.6,
"completions/mean_length": 139.62607421875,
"completions/mean_terminated_length": 124.46354522705079,
"completions/min_length": 1.0,
"completions/min_terminated_length": 1.0,
"epoch": 0.096,
"grad_norm": 0.3033556342124939,
"learning_rate": 1e-06,
"loss": 0.0144,
"num_tokens": 102026193.0,
"reward": 0.8633492946624756,
"reward_std": 0.22220987677574158,
"rewards/accuracy_reward": 0.414453125,
"rewards/brier_reward": 0.6465227484703064,
"rewards/confidence_uniqueness_reward": 0.7718554854393005,
"rewards/format_reward": 0.9494140625,
"rewards/frontier_aurc_reward": -0.004374950844794512,
"rewards/frontier_coverage_1": 0.04679640345275402,
"rewards/frontier_coverage_10": 0.04679640345275402,
"rewards/frontier_coverage_15": 0.04679640345275402,
"rewards/frontier_coverage_20": 0.04679640345275402,
"rewards/frontier_coverage_25": 0.04679640345275402,
"rewards/frontier_coverage_5": 0.04679640345275402,
"rewards/frontier_ece_reward": -0.006633454142138362,
"signal/accuracy_reward/centered_abs_mean": 0.2164794921875,
"signal/accuracy_reward/group_std_mean": 0.2656884342432022,
"signal/accuracy_reward/group_zero_std_frac": 0.31875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10823974609375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10823974609375,
"signal/advantage_abs_mean": 0.1734051823616028,
"signal/advantage_pre_scale_abs_mean": 0.1734051823616028,
"signal/advantage_pre_scale_std": 0.25319576263427734,
"signal/advantage_std": 0.25319576263427734,
"signal/brier_reward/centered_abs_mean": 0.22392457127571105,
"signal/brier_reward/group_std_mean": 0.27409825921058656,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027990571409463882,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.027990571409463882,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1212245300412178,
"signal/confidence_uniqueness_reward/group_std_mean": 0.1597886711359024,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015153066255152225,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015153066255152225,
"signal/format_reward/centered_abs_mean": 0.076416015625,
"signal/format_reward/group_std_mean": 0.11220613121986389,
"signal/format_reward/group_zero_std_frac": 0.6375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0382080078125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0382080078125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0030647643376141786,
"signal/frontier_aurc_reward/group_std_mean": 0.004675971809774637,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.4859279043739664e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.4859279043739664e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13435963690280914,
"signal/frontier_coverage_1/group_std_mean": 0.19624820053577424,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024050374049693346,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024050374049693346,
"signal/frontier_coverage_10/centered_abs_mean": 0.13435963690280914,
"signal/frontier_coverage_10/group_std_mean": 0.19624820053577424,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024050374049693346,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024050374049693346,
"signal/frontier_coverage_15/centered_abs_mean": 0.13435963690280914,
"signal/frontier_coverage_15/group_std_mean": 0.19624820053577424,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024050374049693346,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024050374049693346,
"signal/frontier_coverage_20/centered_abs_mean": 0.13435963690280914,
"signal/frontier_coverage_20/group_std_mean": 0.19624820053577424,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024050374049693346,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024050374049693346,
"signal/frontier_coverage_25/centered_abs_mean": 0.13435963690280914,
"signal/frontier_coverage_25/group_std_mean": 0.19624820053577424,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024050374049693346,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024050374049693346,
"signal/frontier_coverage_5/centered_abs_mean": 0.13435963690280914,
"signal/frontier_coverage_5/group_std_mean": 0.19624820053577424,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024050374049693346,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024050374049693346,
"signal/frontier_ece_reward/centered_abs_mean": 0.08245499283075333,
"signal/frontier_ece_reward/group_std_mean": 0.10052948445081711,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.010306874103844166,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.010306874103844166,
"step": 30
},
{
"calibration/aurc": 0.43093254462035047,
"calibration/batch_distribution_entropy": 0.8765791511807105,
"calibration/buffer_distribution_entropy": 0.7493890212367624,
"calibration/confidence_entropy": 0.5332302085205971,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.12442684122316035,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.18847028436719626,
"calibration/mean_confidence": 0.6052631008223052,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009765625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 974.0,
"completions/mean_length": 140.3001953125,
"completions/mean_terminated_length": 126.54303894042968,
"completions/min_length": 1.0,
"completions/min_terminated_length": 1.0,
"epoch": 0.112,
"grad_norm": 0.0450531542301178,
"learning_rate": 1e-06,
"loss": 0.0177,
"num_tokens": 118572339.0,
"reward": 0.9136639595031738,
"reward_std": 0.17941873669624328,
"rewards/accuracy_reward": 0.4568359375,
"rewards/brier_reward": 0.7062686562538147,
"rewards/confidence_uniqueness_reward": 0.8257040023803711,
"rewards/format_reward": 0.97333984375,
"rewards/frontier_aurc_reward": -0.0037020944990217687,
"rewards/frontier_coverage_1": 0.05813024044036865,
"rewards/frontier_coverage_10": 0.05813024044036865,
"rewards/frontier_coverage_15": 0.05813024044036865,
"rewards/frontier_coverage_20": 0.05813024044036865,
"rewards/frontier_coverage_25": 0.05813024044036865,
"rewards/frontier_coverage_5": 0.05813024044036865,
"rewards/frontier_ece_reward": 0.007220498844981193,
"signal/accuracy_reward/centered_abs_mean": 0.19686279296875,
"signal/accuracy_reward/group_std_mean": 0.2502582728862762,
"signal/accuracy_reward/group_zero_std_frac": 0.325,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.098431396484375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.098431396484375,
"signal/advantage_abs_mean": 0.13896718621253967,
"signal/advantage_pre_scale_abs_mean": 0.13896718621253967,
"signal/advantage_pre_scale_std": 0.20438657999038695,
"signal/advantage_std": 0.20438657999038695,
"signal/brier_reward/centered_abs_mean": 0.1961173176765442,
"signal/brier_reward/group_std_mean": 0.24390378594398499,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024514664709568024,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.024514664709568024,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09459523856639862,
"signal/confidence_uniqueness_reward/group_std_mean": 0.12374730557203292,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011824404820799828,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011824404820799828,
"signal/format_reward/centered_abs_mean": 0.039300537109375,
"signal/format_reward/group_std_mean": 0.0625513531267643,
"signal/format_reward/group_zero_std_frac": 0.778125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0196502685546875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0196502685546875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022663983050733804,
"signal/frontier_aurc_reward/group_std_mean": 0.0036506312899291515,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.056852849316783e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.056852849316783e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18296231627464293,
"signal/frontier_coverage_1/group_std_mean": 0.24793100357055664,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032750254031270742,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032750254031270742,
"signal/frontier_coverage_10/centered_abs_mean": 0.18296231627464293,
"signal/frontier_coverage_10/group_std_mean": 0.24793100357055664,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032750254031270742,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032750254031270742,
"signal/frontier_coverage_15/centered_abs_mean": 0.18296231627464293,
"signal/frontier_coverage_15/group_std_mean": 0.24793100357055664,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032750254031270742,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032750254031270742,
"signal/frontier_coverage_20/centered_abs_mean": 0.18296231627464293,
"signal/frontier_coverage_20/group_std_mean": 0.24793100357055664,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032750254031270742,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032750254031270742,
"signal/frontier_coverage_25/centered_abs_mean": 0.18296231627464293,
"signal/frontier_coverage_25/group_std_mean": 0.24793100357055664,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032750254031270742,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032750254031270742,
"signal/frontier_coverage_5/centered_abs_mean": 0.18296231627464293,
"signal/frontier_coverage_5/group_std_mean": 0.24793100357055664,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032750254031270742,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032750254031270742,
"signal/frontier_ece_reward/centered_abs_mean": 0.06804275140166283,
"signal/frontier_ece_reward/group_std_mean": 0.08398929536342621,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008505343925207853,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008505343925207853,
"step": 35
},
{
"calibration/aurc": 0.4637473590255504,
"calibration/batch_distribution_entropy": 0.9011286403654379,
"calibration/buffer_distribution_entropy": 0.7961353907846576,
"calibration/confidence_entropy": 0.5532398388702255,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.01019607843137255,
"calibration/coverage@20%": 0.02392156862745098,
"calibration/coverage@25%": 0.03686274509803922,
"calibration/coverage@30%": 0.054509803921568636,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.13673163536509028,
"calibration/mean_confidence": 0.485751228767752,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00703125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 953.2,
"completions/mean_length": 145.0564453125,
"completions/mean_terminated_length": 135.19951477050782,
"completions/min_length": 31.6,
"completions/min_terminated_length": 31.6,
"epoch": 0.128,
"grad_norm": 0.017669327557086945,
"learning_rate": 1e-06,
"loss": 0.0129,
"num_tokens": 134974389.0,
"reward": 0.923030960559845,
"reward_std": 0.1417643427848816,
"rewards/accuracy_reward": 0.4427734375,
"rewards/brier_reward": 0.7313620209693908,
"rewards/confidence_uniqueness_reward": 0.8564581751823426,
"rewards/format_reward": 0.9859375,
"rewards/frontier_aurc_reward": -0.003509230772033334,
"rewards/frontier_coverage_1": 0.08638581186532975,
"rewards/frontier_coverage_10": 0.08638581186532975,
"rewards/frontier_coverage_15": 0.08638581186532975,
"rewards/frontier_coverage_20": 0.08638581186532975,
"rewards/frontier_coverage_25": 0.08638581186532975,
"rewards/frontier_coverage_5": 0.08638581186532975,
"rewards/frontier_ece_reward": 0.007863593101501466,
"signal/accuracy_reward/centered_abs_mean": 0.16705322265625,
"signal/accuracy_reward/group_std_mean": 0.21422863900661468,
"signal/accuracy_reward/group_zero_std_frac": 0.415625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.083526611328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.083526611328125,
"signal/advantage_abs_mean": 0.11018433421850204,
"signal/advantage_pre_scale_abs_mean": 0.11018433421850204,
"signal/advantage_pre_scale_std": 0.16467654705047607,
"signal/advantage_std": 0.16467654705047607,
"signal/brier_reward/centered_abs_mean": 0.1866983711719513,
"signal/brier_reward/group_std_mean": 0.23212920725345612,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02333729639649391,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02333729639649391,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0719639778137207,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09171251058578492,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008995497226715088,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008995497226715088,
"signal/format_reward/centered_abs_mean": 0.01739501953125,
"signal/format_reward/group_std_mean": 0.028746084496378898,
"signal/format_reward/group_zero_std_frac": 0.89375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008697509765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008697509765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017647896660491825,
"signal/frontier_aurc_reward/group_std_mean": 0.002844266314059496,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1589733771397734e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1589733771397734e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.22426398992538452,
"signal/frontier_coverage_1/group_std_mean": 0.2852416396141052,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004014325235038996,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004014325235038996,
"signal/frontier_coverage_10/centered_abs_mean": 0.22426398992538452,
"signal/frontier_coverage_10/group_std_mean": 0.2852416396141052,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004014325235038996,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004014325235038996,
"signal/frontier_coverage_15/centered_abs_mean": 0.22426398992538452,
"signal/frontier_coverage_15/group_std_mean": 0.2852416396141052,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004014325235038996,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004014325235038996,
"signal/frontier_coverage_20/centered_abs_mean": 0.22426398992538452,
"signal/frontier_coverage_20/group_std_mean": 0.2852416396141052,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004014325235038996,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004014325235038996,
"signal/frontier_coverage_25/centered_abs_mean": 0.22426398992538452,
"signal/frontier_coverage_25/group_std_mean": 0.2852416396141052,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004014325235038996,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004014325235038996,
"signal/frontier_coverage_5/centered_abs_mean": 0.22426398992538452,
"signal/frontier_coverage_5/group_std_mean": 0.2852416396141052,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004014325235038996,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004014325235038996,
"signal/frontier_ece_reward/centered_abs_mean": 0.05270521864295006,
"signal/frontier_ece_reward/group_std_mean": 0.0674271434545517,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006588152330368757,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006588152330368757,
"step": 40
},
{
"calibration/aurc": 0.27175397742115304,
"calibration/batch_distribution_entropy": 0.9232081653181499,
"calibration/buffer_distribution_entropy": 0.8419158033678219,
"calibration/confidence_entropy": 0.5273831934401841,
"calibration/coverage@0%": 0.012164204650829395,
"calibration/coverage@1%": 0.012164204650829395,
"calibration/coverage@10%": 0.07876753597900463,
"calibration/coverage@15%": 0.19833483371483523,
"calibration/coverage@20%": 0.3504713065890895,
"calibration/coverage@25%": 0.46684294476954935,
"calibration/coverage@30%": 0.606419362713756,
"calibration/coverage@5%": 0.035723291128458054,
"calibration/ece": 0.19987627269719255,
"calibration/mean_confidence": 0.44355403988022324,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003515625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 870.0,
"completions/mean_length": 143.75546875,
"completions/mean_terminated_length": 138.84595947265626,
"completions/min_length": 42.2,
"completions/min_terminated_length": 42.2,
"epoch": 0.144,
"grad_norm": 0.054179396480321884,
"learning_rate": 1e-06,
"loss": 0.0089,
"num_tokens": 151396877.0,
"reward": 0.9702192068099975,
"reward_std": 0.13579329252243041,
"rewards/accuracy_reward": 0.54228515625,
"rewards/brier_reward": 0.737048614025116,
"rewards/confidence_uniqueness_reward": 0.863895833492279,
"rewards/format_reward": 0.9849609375,
"rewards/frontier_aurc_reward": -0.0027967089787125587,
"rewards/frontier_coverage_1": 0.03856944553554058,
"rewards/frontier_coverage_10": 0.03856944553554058,
"rewards/frontier_coverage_15": 0.03856944553554058,
"rewards/frontier_coverage_20": 0.03856944553554058,
"rewards/frontier_coverage_25": 0.03856944553554058,
"rewards/frontier_coverage_5": 0.03856944553554058,
"rewards/frontier_ece_reward": 0.01908651553094387,
"signal/accuracy_reward/centered_abs_mean": 0.164288330078125,
"signal/accuracy_reward/group_std_mean": 0.21441585719585418,
"signal/accuracy_reward/group_zero_std_frac": 0.403125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0821441650390625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0821441650390625,
"signal/advantage_abs_mean": 0.10403890758752823,
"signal/advantage_pre_scale_abs_mean": 0.10403890758752823,
"signal/advantage_pre_scale_std": 0.16093845069408416,
"signal/advantage_std": 0.16093845069408416,
"signal/brier_reward/centered_abs_mean": 0.18338664174079894,
"signal/brier_reward/group_std_mean": 0.22795365154743194,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022923330217599867,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.022923330217599867,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06989959329366684,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09106495976448059,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008737449161708355,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008737449161708355,
"signal/format_reward/centered_abs_mean": 0.0210205078125,
"signal/format_reward/group_std_mean": 0.03324367478489876,
"signal/format_reward/group_zero_std_frac": 0.88125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01051025390625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01051025390625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014914550818502903,
"signal/frontier_aurc_reward/group_std_mean": 0.0024216063786298035,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6697046632762066e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6697046632762066e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2453473687171936,
"signal/frontier_coverage_1/group_std_mean": 0.3083998620510101,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004391717724502087,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004391717724502087,
"signal/frontier_coverage_10/centered_abs_mean": 0.2453473687171936,
"signal/frontier_coverage_10/group_std_mean": 0.3083998620510101,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004391717724502087,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004391717724502087,
"signal/frontier_coverage_15/centered_abs_mean": 0.2453473687171936,
"signal/frontier_coverage_15/group_std_mean": 0.3083998620510101,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004391717724502087,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004391717724502087,
"signal/frontier_coverage_20/centered_abs_mean": 0.2453473687171936,
"signal/frontier_coverage_20/group_std_mean": 0.3083998620510101,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004391717724502087,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004391717724502087,
"signal/frontier_coverage_25/centered_abs_mean": 0.2453473687171936,
"signal/frontier_coverage_25/group_std_mean": 0.3083998620510101,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004391717724502087,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004391717724502087,
"signal/frontier_coverage_5/centered_abs_mean": 0.2453473687171936,
"signal/frontier_coverage_5/group_std_mean": 0.3083998620510101,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004391717724502087,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004391717724502087,
"signal/frontier_ece_reward/centered_abs_mean": 0.042443787306547166,
"signal/frontier_ece_reward/group_std_mean": 0.05635495781898499,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005305473413318396,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005305473413318396,
"step": 45
},
{
"calibration/aurc": 0.403125271003182,
"calibration/batch_distribution_entropy": 0.9391103501505299,
"calibration/buffer_distribution_entropy": 0.875463691913424,
"calibration/confidence_entropy": 0.5222249702517436,
"calibration/coverage@0%": 0.002834008097165992,
"calibration/coverage@1%": 0.002834008097165992,
"calibration/coverage@10%": 0.002834008097165992,
"calibration/coverage@15%": 0.023523663269579782,
"calibration/coverage@20%": 0.04461407067364151,
"calibration/coverage@25%": 0.0984996427720886,
"calibration/coverage@30%": 0.19978814939292627,
"calibration/coverage@5%": 0.002834008097165992,
"calibration/ece": 0.10212839080221536,
"calibration/mean_confidence": 0.4431201023375003,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0041015625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 918.6,
"completions/mean_length": 149.158203125,
"completions/mean_terminated_length": 143.44497680664062,
"completions/min_length": 50.0,
"completions/min_terminated_length": 50.0,
"epoch": 0.16,
"grad_norm": 0.07966148853302002,
"learning_rate": 1e-06,
"loss": 0.0104,
"num_tokens": 167945185.0,
"reward": 0.9424496412277221,
"reward_std": 0.14887651801109314,
"rewards/accuracy_reward": 0.48271484375,
"rewards/brier_reward": 0.7433403611183167,
"rewards/confidence_uniqueness_reward": 0.8616537690162659,
"rewards/format_reward": 0.978125,
"rewards/frontier_aurc_reward": -0.0028730600606650114,
"rewards/frontier_coverage_1": 0.08680228143930435,
"rewards/frontier_coverage_10": 0.08680228143930435,
"rewards/frontier_coverage_15": 0.08680228143930435,
"rewards/frontier_coverage_20": 0.08680228143930435,
"rewards/frontier_coverage_25": 0.08680228143930435,
"rewards/frontier_coverage_5": 0.08680228143930435,
"rewards/frontier_ece_reward": 0.01707436852157116,
"signal/accuracy_reward/centered_abs_mean": 0.175335693359375,
"signal/accuracy_reward/group_std_mean": 0.21820703744888306,
"signal/accuracy_reward/group_zero_std_frac": 0.43125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0876678466796875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0876678466796875,
"signal/advantage_abs_mean": 0.11476020514965057,
"signal/advantage_pre_scale_abs_mean": 0.11476020514965057,
"signal/advantage_pre_scale_std": 0.17932912409305574,
"signal/advantage_std": 0.17932912409305574,
"signal/brier_reward/centered_abs_mean": 0.18417735695838927,
"signal/brier_reward/group_std_mean": 0.2291133314371109,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02302216961979866,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02302216961979866,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07359530031681061,
"signal/confidence_uniqueness_reward/group_std_mean": 0.10155714750289917,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009199412539601326,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009199412539601326,
"signal/format_reward/centered_abs_mean": 0.03203125,
"signal/format_reward/group_std_mean": 0.05196922719478607,
"signal/format_reward/group_zero_std_frac": 0.809375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.016015625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.016015625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001706640375778079,
"signal/frontier_aurc_reward/group_std_mean": 0.002641899697482586,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0548862559953706e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0548862559953706e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.24131617248058318,
"signal/frontier_coverage_1/group_std_mean": 0.30335493087768556,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004319559410214424,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004319559410214424,
"signal/frontier_coverage_10/centered_abs_mean": 0.24131617248058318,
"signal/frontier_coverage_10/group_std_mean": 0.30335493087768556,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004319559410214424,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004319559410214424,
"signal/frontier_coverage_15/centered_abs_mean": 0.24131617248058318,
"signal/frontier_coverage_15/group_std_mean": 0.30335493087768556,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004319559410214424,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004319559410214424,
"signal/frontier_coverage_20/centered_abs_mean": 0.24131617248058318,
"signal/frontier_coverage_20/group_std_mean": 0.30335493087768556,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004319559410214424,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004319559410214424,
"signal/frontier_coverage_25/centered_abs_mean": 0.24131617248058318,
"signal/frontier_coverage_25/group_std_mean": 0.30335493087768556,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004319559410214424,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004319559410214424,
"signal/frontier_coverage_5/centered_abs_mean": 0.24131617248058318,
"signal/frontier_coverage_5/group_std_mean": 0.30335493087768556,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004319559410214424,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004319559410214424,
"signal/frontier_ece_reward/centered_abs_mean": 0.040847336500883104,
"signal/frontier_ece_reward/group_std_mean": 0.05442367494106293,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005105917062610388,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005105917062610388,
"step": 50
},
{
"epoch": 0.16,
"eval_calibration/aurc": 0.6254440846908731,
"eval_calibration/batch_distribution_entropy": 0.8821632107470848,
"eval_calibration/buffer_distribution_entropy": 0.8901940690847239,
"eval_calibration/confidence_entropy": 0.520183064421665,
"eval_calibration/coverage@0%": 0.0,
"eval_calibration/coverage@1%": 0.0,
"eval_calibration/coverage@10%": 0.0,
"eval_calibration/coverage@15%": 0.0,
"eval_calibration/coverage@20%": 0.041666666666666664,
"eval_calibration/coverage@25%": 0.041666666666666664,
"eval_calibration/coverage@30%": 0.058333333333333334,
"eval_calibration/coverage@5%": 0.0,
"eval_calibration/ece": 0.28160651881720433,
"eval_calibration/mean_confidence": 0.46711323924731185,
"eval_completions/clipped_ratio": 0.004108297413793094,
"eval_completions/max_length": 939.5,
"eval_completions/max_terminated_length": 341.0,
"eval_completions/mean_length": 156.55832290649414,
"eval_completions/mean_terminated_length": 150.87627792358398,
"eval_completions/min_length": 69.0,
"eval_completions/min_terminated_length": 69.0,
"eval_loss": 0.0,
"eval_num_tokens": 167945185.0,
"eval_reward": 0.8764741569757462,
"eval_reward_std": 0.2708371505141258,
"eval_rewards/accuracy_reward": 0.353515625,
"eval_rewards/brier_reward": 0.752171978354454,
"eval_rewards/confidence_uniqueness_reward": 0.7996502369642258,
"eval_rewards/format_reward": 0.96875,
"eval_rewards/frontier_aurc_reward": -0.003503879823256284,
"eval_rewards/frontier_coverage_1": 0.18414541706442833,
"eval_rewards/frontier_coverage_10": 0.18414541706442833,
"eval_rewards/frontier_coverage_15": 0.18414541706442833,
"eval_rewards/frontier_coverage_20": 0.18414541706442833,
"eval_rewards/frontier_coverage_25": 0.18414541706442833,
"eval_rewards/frontier_coverage_5": 0.18414541706442833,
"eval_rewards/frontier_ece_reward": 0.01319264032645151,
"eval_runtime": 37.2237,
"eval_samples_per_second": 13.432,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4461669921875,
"eval_signal/accuracy_reward/group_std_mean": 0.47892439365386963,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22308349609375,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22308349609375,
"eval_signal/advantage_abs_mean": 0.21810520812869072,
"eval_signal/advantage_pre_scale_abs_mean": 0.21810520812869072,
"eval_signal/advantage_pre_scale_std": 0.2705560587346554,
"eval_signal/advantage_std": 0.2705560587346554,
"eval_signal/brier_reward/centered_abs_mean": 0.21390501782298088,
"eval_signal/brier_reward/group_std_mean": 0.2747611552476883,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02673812722787261,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02673812722787261,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.1026211753487587,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.15684263966977596,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012827646918594837,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012827646918594837,
"eval_signal/format_reward/centered_abs_mean": 0.058837890625,
"eval_signal/format_reward/group_std_mean": 0.13523541949689388,
"eval_signal/format_reward/group_zero_std_frac": 0.375,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0294189453125,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0294189453125,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002764371281955391,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0045166065683588386,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.948224341205787e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.948224341205787e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3214282989501953,
"eval_signal/frontier_coverage_1/group_std_mean": 0.394027441740036,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005753566394560039,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005753566394560039,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3214282989501953,
"eval_signal/frontier_coverage_10/group_std_mean": 0.394027441740036,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005753566394560039,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005753566394560039,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3214282989501953,
"eval_signal/frontier_coverage_15/group_std_mean": 0.394027441740036,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005753566394560039,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005753566394560039,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3214282989501953,
"eval_signal/frontier_coverage_20/group_std_mean": 0.394027441740036,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005753566394560039,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005753566394560039,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.3214282989501953,
"eval_signal/frontier_coverage_25/group_std_mean": 0.394027441740036,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005753566394560039,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005753566394560039,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3214282989501953,
"eval_signal/frontier_coverage_5/group_std_mean": 0.394027441740036,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005753566394560039,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005753566394560039,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.049655829556286335,
"eval_signal/frontier_ece_reward/group_std_mean": 0.07456529140472412,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006206978694535792,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006206978694535792,
"eval_steps_per_second": 0.107,
"step": 50
},
{
"epoch": 0.16,
"step": 50,
"train_probe_calibration/aurc": 0.34953725528411633,
"train_probe_calibration/batch_distribution_entropy": 0.9158039775330977,
"train_probe_calibration/buffer_distribution_entropy": 0.891133958747135,
"train_probe_calibration/confidence_entropy": 0.5133234399655757,
"train_probe_calibration/coverage@0%": 0.11164314516129031,
"train_probe_calibration/coverage@1%": 0.11164314516129031,
"train_probe_calibration/coverage@10%": 0.11164314516129031,
"train_probe_calibration/coverage@15%": 0.18220766129032256,
"train_probe_calibration/coverage@20%": 0.2537802419354839,
"train_probe_calibration/coverage@25%": 0.2850302419354839,
"train_probe_calibration/coverage@30%": 0.3631552419354839,
"train_probe_calibration/coverage@5%": 0.11164314516129031,
"train_probe_calibration/ece": 0.1722202620967742,
"train_probe_calibration/mean_confidence": 0.4648311491935484,
"train_probe_completions/clipped_ratio": 0.008216594827586216,
"train_probe_completions/max_length": 1455.25,
"train_probe_completions/max_terminated_length": 755.0,
"train_probe_completions/mean_length": 164.17631912231445,
"train_probe_completions/mean_terminated_length": 152.7919807434082,
"train_probe_completions/min_length": 71.0,
"train_probe_completions/min_terminated_length": 71.0,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 167945185.0,
"train_probe_reward": 0.9423353224992752,
"train_probe_reward_std": 0.27149440348148346,
"train_probe_rewards/accuracy_reward": 0.4921875,
"train_probe_rewards/brier_reward": 0.7489275336265564,
"train_probe_rewards/confidence_uniqueness_reward": 0.8245857506990433,
"train_probe_rewards/format_reward": 0.974609375,
"train_probe_rewards/frontier_aurc_reward": -0.0028590288711711764,
"train_probe_rewards/frontier_coverage_1": 0.08961892500519753,
"train_probe_rewards/frontier_coverage_10": 0.08961892500519753,
"train_probe_rewards/frontier_coverage_15": 0.08961892500519753,
"train_probe_rewards/frontier_coverage_20": 0.08961892500519753,
"train_probe_rewards/frontier_coverage_25": 0.08961892500519753,
"train_probe_rewards/frontier_coverage_5": 0.08961892500519753,
"train_probe_rewards/frontier_ece_reward": 0.02139047277159989,
"train_probe_runtime": 54.1297,
"train_probe_samples_per_second": 9.237,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.489990234375,
"train_probe_signal/accuracy_reward/group_std_mean": 0.5028149038553238,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2449951171875,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.2449951171875,
"train_probe_signal/advantage_abs_mean": 0.23267249390482903,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.23267249390482903,
"train_probe_signal/advantage_pre_scale_std": 0.27059199661016464,
"train_probe_signal/advantage_std": 0.27059199661016464,
"train_probe_signal/brier_reward/centered_abs_mean": 0.2169180065393448,
"train_probe_signal/brier_reward/group_std_mean": 0.2719731330871582,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0271147508174181,
"train_probe_signal/brier_reward/weight": 0.125,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0271147508174181,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.08825866505503654,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.14233380556106567,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011032333131879568,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011032333131879568,
"train_probe_signal/format_reward/centered_abs_mean": 0.0484619140625,
"train_probe_signal/format_reward/group_std_mean": 0.1234525553882122,
"train_probe_signal/format_reward/group_zero_std_frac": 0.375,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.02423095703125,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.02423095703125,
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.002652477065566927,
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0041865811217576265,
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.747933689941419e-05,
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.747933689941419e-05,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3308027759194374,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.424383707344532,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005921369651332498,
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005921369651332498,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.3308027759194374,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.424383707344532,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005921369651332498,
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005921369651332498,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.3308027759194374,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.424383707344532,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005921369651332498,
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005921369651332498,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.3308027759194374,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.424383707344532,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005921369651332498,
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005921369651332498,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.3308027759194374,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.424383707344532,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005921369651332498,
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005921369651332498,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.3308027759194374,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.424383707344532,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005921369651332498,
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005921369651332498,
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.051902798004448414,
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.07279590144753456,
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006487849750556052,
"train_probe_signal/frontier_ece_reward/weight": 0.125,
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006487849750556052,
"train_probe_steps_per_second": 0.074
},
{
"calibration/aurc": 0.3712372844316617,
"calibration/batch_distribution_entropy": 0.9609159777516597,
"calibration/buffer_distribution_entropy": 0.8971900490580224,
"calibration/confidence_entropy": 0.49239175054637546,
"calibration/coverage@0%": 0.002510460251046025,
"calibration/coverage@1%": 0.002510460251046025,
"calibration/coverage@10%": 0.002510460251046025,
"calibration/coverage@15%": 0.002510460251046025,
"calibration/coverage@20%": 0.002510460251046025,
"calibration/coverage@25%": 0.04713520670135028,
"calibration/coverage@30%": 0.26879748917877694,
"calibration/coverage@5%": 0.002510460251046025,
"calibration/ece": 0.16573751140661525,
"calibration/mean_confidence": 0.5205409540211041,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006640625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1219.2,
"completions/mean_length": 160.66025390625,
"completions/mean_terminated_length": 151.46548461914062,
"completions/min_length": 46.0,
"completions/min_terminated_length": 46.0,
"epoch": 0.176,
"grad_norm": 0.06872954219579697,
"learning_rate": 1e-06,
"loss": 0.0122,
"num_tokens": 184827466.0,
"reward": 0.9402350187301636,
"reward_std": 0.14623880088329316,
"rewards/accuracy_reward": 0.48759765625,
"rewards/brier_reward": 0.7364905476570129,
"rewards/confidence_uniqueness_reward": 0.8593945026397705,
"rewards/format_reward": 0.9693359375,
"rewards/frontier_aurc_reward": -0.0029026484582573174,
"rewards/frontier_coverage_1": 0.09159794300794602,
"rewards/frontier_coverage_10": 0.09159794300794602,
"rewards/frontier_coverage_15": 0.09159794300794602,
"rewards/frontier_coverage_20": 0.09159794300794602,
"rewards/frontier_coverage_25": 0.09159794300794602,
"rewards/frontier_coverage_5": 0.09159794300794602,
"rewards/frontier_ece_reward": 0.019975333102047445,
"signal/accuracy_reward/centered_abs_mean": 0.145709228515625,
"signal/accuracy_reward/group_std_mean": 0.19187160730361938,
"signal/accuracy_reward/group_zero_std_frac": 0.453125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0728546142578125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0728546142578125,
"signal/advantage_abs_mean": 0.1101900115609169,
"signal/advantage_pre_scale_abs_mean": 0.1101900115609169,
"signal/advantage_pre_scale_std": 0.17681396007537842,
"signal/advantage_std": 0.17681396007537842,
"signal/brier_reward/centered_abs_mean": 0.19099677503108978,
"signal/brier_reward/group_std_mean": 0.23669040203094482,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023874596878886222,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.023874596878886222,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07248903661966324,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09891549348831177,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009061129577457906,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009061129577457906,
"signal/format_reward/centered_abs_mean": 0.0377197265625,
"signal/format_reward/group_std_mean": 0.05772824138402939,
"signal/format_reward/group_zero_std_frac": 0.8,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01885986328125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01885986328125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021433203713968397,
"signal/frontier_aurc_reward/group_std_mean": 0.0032778474967926742,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.8365434011211616e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.8365434011211616e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.22156096398830413,
"signal/frontier_coverage_1/group_std_mean": 0.282322096824646,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003965941350907087,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003965941350907087,
"signal/frontier_coverage_10/centered_abs_mean": 0.22156096398830413,
"signal/frontier_coverage_10/group_std_mean": 0.282322096824646,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003965941350907087,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003965941350907087,
"signal/frontier_coverage_15/centered_abs_mean": 0.22156096398830413,
"signal/frontier_coverage_15/group_std_mean": 0.282322096824646,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003965941350907087,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003965941350907087,
"signal/frontier_coverage_20/centered_abs_mean": 0.22156096398830413,
"signal/frontier_coverage_20/group_std_mean": 0.282322096824646,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003965941350907087,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003965941350907087,
"signal/frontier_coverage_25/centered_abs_mean": 0.22156096398830413,
"signal/frontier_coverage_25/group_std_mean": 0.282322096824646,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003965941350907087,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003965941350907087,
"signal/frontier_coverage_5/centered_abs_mean": 0.22156096398830413,
"signal/frontier_coverage_5/group_std_mean": 0.282322096824646,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003965941350907087,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003965941350907087,
"signal/frontier_ece_reward/centered_abs_mean": 0.04264579936861992,
"signal/frontier_ece_reward/group_std_mean": 0.05547093003988266,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00533072492107749,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00533072492107749,
"step": 55
},
{
"calibration/aurc": 0.35738801713319707,
"calibration/batch_distribution_entropy": 0.9222789410563648,
"calibration/buffer_distribution_entropy": 0.9062810656530844,
"calibration/confidence_entropy": 0.4314712566952025,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.004733727810650888,
"calibration/coverage@15%": 0.010794333871256948,
"calibration/coverage@20%": 0.08998625306317615,
"calibration/coverage@25%": 0.2524840938888444,
"calibration/coverage@30%": 0.3501611604120648,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.129347005564316,
"calibration/mean_confidence": 0.5669145772975883,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0068359375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 990.2,
"completions/mean_length": 165.06435546875,
"completions/mean_terminated_length": 155.6231201171875,
"completions/min_length": 49.4,
"completions/min_terminated_length": 49.4,
"epoch": 0.192,
"grad_norm": 0.06620907038450241,
"learning_rate": 1e-06,
"loss": 0.013,
"num_tokens": 201332541.0,
"reward": 0.9339034557342529,
"reward_std": 0.15482064783573152,
"rewards/accuracy_reward": 0.4763671875,
"rewards/brier_reward": 0.7338881492614746,
"rewards/confidence_uniqueness_reward": 0.8472534418106079,
"rewards/format_reward": 0.966796875,
"rewards/frontier_aurc_reward": -0.0033207187429070474,
"rewards/frontier_coverage_1": 0.11109301298856736,
"rewards/frontier_coverage_10": 0.11109301298856736,
"rewards/frontier_coverage_15": 0.11109301298856736,
"rewards/frontier_coverage_20": 0.11109301298856736,
"rewards/frontier_coverage_25": 0.11109301298856736,
"rewards/frontier_coverage_5": 0.11109301298856736,
"rewards/frontier_ece_reward": 0.022454247623682023,
"signal/accuracy_reward/centered_abs_mean": 0.14830322265625,
"signal/accuracy_reward/group_std_mean": 0.1960592031478882,
"signal/accuracy_reward/group_zero_std_frac": 0.44375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.074151611328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.074151611328125,
"signal/advantage_abs_mean": 0.11740224063396454,
"signal/advantage_pre_scale_abs_mean": 0.11740224063396454,
"signal/advantage_pre_scale_std": 0.19125163555145264,
"signal/advantage_std": 0.19125163555145264,
"signal/brier_reward/centered_abs_mean": 0.19765847623348237,
"signal/brier_reward/group_std_mean": 0.24587923288345337,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024707309529185296,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.024707309529185296,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08868281245231628,
"signal/confidence_uniqueness_reward/group_std_mean": 0.11674559116363525,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011085351556539535,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011085351556539535,
"signal/format_reward/centered_abs_mean": 0.04295654296875,
"signal/format_reward/group_std_mean": 0.06320370435714721,
"signal/format_reward/group_zero_std_frac": 0.790625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.021478271484375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.021478271484375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003104797238484025,
"signal/frontier_aurc_reward/group_std_mean": 0.004736031871289015,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.557586846407503e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.557586846407503e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.20398018062114714,
"signal/frontier_coverage_1/group_std_mean": 0.265421861410141,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036512451246380807,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036512451246380807,
"signal/frontier_coverage_10/centered_abs_mean": 0.20398018062114714,
"signal/frontier_coverage_10/group_std_mean": 0.265421861410141,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036512451246380807,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036512451246380807,
"signal/frontier_coverage_15/centered_abs_mean": 0.20398018062114714,
"signal/frontier_coverage_15/group_std_mean": 0.265421861410141,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036512451246380807,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036512451246380807,
"signal/frontier_coverage_20/centered_abs_mean": 0.20398018062114714,
"signal/frontier_coverage_20/group_std_mean": 0.265421861410141,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036512451246380807,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036512451246380807,
"signal/frontier_coverage_25/centered_abs_mean": 0.20398018062114714,
"signal/frontier_coverage_25/group_std_mean": 0.265421861410141,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036512451246380807,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036512451246380807,
"signal/frontier_coverage_5/centered_abs_mean": 0.20398018062114714,
"signal/frontier_coverage_5/group_std_mean": 0.265421861410141,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036512451246380807,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036512451246380807,
"signal/frontier_ece_reward/centered_abs_mean": 0.0457615964114666,
"signal/frontier_ece_reward/group_std_mean": 0.05886110737919807,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005720199551433325,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005720199551433325,
"step": 60
},
{
"calibration/aurc": 0.29643063095441663,
"calibration/batch_distribution_entropy": 0.9171947927143869,
"calibration/buffer_distribution_entropy": 0.9100448664327612,
"calibration/confidence_entropy": 0.4175287827982107,
"calibration/coverage@0%": 0.014481409001956946,
"calibration/coverage@1%": 0.014481409001956946,
"calibration/coverage@10%": 0.09575864838103554,
"calibration/coverage@15%": 0.2081677205074029,
"calibration/coverage@20%": 0.33050935136204285,
"calibration/coverage@25%": 0.44360224553076566,
"calibration/coverage@30%": 0.5479605902693373,
"calibration/coverage@5%": 0.01643835616438356,
"calibration/ece": 0.1376342119353494,
"calibration/mean_confidence": 0.5811524191443811,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0072265625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1322.8,
"completions/mean_length": 168.00712890625,
"completions/mean_terminated_length": 158.03106689453125,
"completions/min_length": 36.4,
"completions/min_terminated_length": 36.4,
"epoch": 0.208,
"grad_norm": 0.31033796072006226,
"learning_rate": 1e-06,
"loss": 0.0181,
"num_tokens": 218085158.0,
"reward": 0.9355008006095886,
"reward_std": 0.18306846916675568,
"rewards/accuracy_reward": 0.4966796875,
"rewards/brier_reward": 0.7259644269943237,
"rewards/confidence_uniqueness_reward": 0.8441673517227173,
"rewards/format_reward": 0.9564453125,
"rewards/frontier_aurc_reward": -0.002846927708014846,
"rewards/frontier_coverage_1": 0.09027891755104064,
"rewards/frontier_coverage_10": 0.09027891755104064,
"rewards/frontier_coverage_15": 0.09027891755104064,
"rewards/frontier_coverage_20": 0.09027891755104064,
"rewards/frontier_coverage_25": 0.09027891755104064,
"rewards/frontier_coverage_5": 0.09027891755104064,
"rewards/frontier_ece_reward": 0.02421446852385998,
"signal/accuracy_reward/centered_abs_mean": 0.179248046875,
"signal/accuracy_reward/group_std_mean": 0.22900831699371338,
"signal/accuracy_reward/group_zero_std_frac": 0.375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0896240234375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0896240234375,
"signal/advantage_abs_mean": 0.1380708247423172,
"signal/advantage_pre_scale_abs_mean": 0.1380708247423172,
"signal/advantage_pre_scale_std": 0.22031235992908477,
"signal/advantage_std": 0.22031235992908477,
"signal/brier_reward/centered_abs_mean": 0.2126880943775177,
"signal/brier_reward/group_std_mean": 0.2632372736930847,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026586011797189713,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.026586011797189713,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09807170182466507,
"signal/confidence_uniqueness_reward/group_std_mean": 0.1343176171183586,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012258962728083134,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012258962728083134,
"signal/format_reward/centered_abs_mean": 0.06129150390625,
"signal/format_reward/group_std_mean": 0.09367451593279838,
"signal/format_reward/group_zero_std_frac": 0.68125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.030645751953125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.030645751953125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029108581598848104,
"signal/frontier_aurc_reward/group_std_mean": 0.004493788257241249,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.2104357746429744e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.2104357746429744e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.21561312973499297,
"signal/frontier_coverage_1/group_std_mean": 0.28178144097328184,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003859474789351225,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003859474789351225,
"signal/frontier_coverage_10/centered_abs_mean": 0.21561312973499297,
"signal/frontier_coverage_10/group_std_mean": 0.28178144097328184,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003859474789351225,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003859474789351225,
"signal/frontier_coverage_15/centered_abs_mean": 0.21561312973499297,
"signal/frontier_coverage_15/group_std_mean": 0.28178144097328184,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003859474789351225,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003859474789351225,
"signal/frontier_coverage_20/centered_abs_mean": 0.21561312973499297,
"signal/frontier_coverage_20/group_std_mean": 0.28178144097328184,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003859474789351225,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003859474789351225,
"signal/frontier_coverage_25/centered_abs_mean": 0.21561312973499297,
"signal/frontier_coverage_25/group_std_mean": 0.28178144097328184,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003859474789351225,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003859474789351225,
"signal/frontier_coverage_5/centered_abs_mean": 0.21561312973499297,
"signal/frontier_coverage_5/group_std_mean": 0.28178144097328184,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003859474789351225,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003859474789351225,
"signal/frontier_ece_reward/centered_abs_mean": 0.043842590600252154,
"signal/frontier_ece_reward/group_std_mean": 0.056413907557725906,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005480323825031519,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005480323825031519,
"step": 65
},
{
"calibration/aurc": 0.46571920158011276,
"calibration/batch_distribution_entropy": 0.9178738567052317,
"calibration/buffer_distribution_entropy": 0.9163612020518315,
"calibration/confidence_entropy": 0.3947332396271469,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.027000000000000003,
"calibration/coverage@20%": 0.04491176470588236,
"calibration/coverage@25%": 0.17707901232241774,
"calibration/coverage@30%": 0.25417977729208274,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.19856830881468507,
"calibration/mean_confidence": 0.40029018840683345,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.065234375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1464.2,
"completions/mean_length": 242.76435546875,
"completions/mean_terminated_length": 152.45449523925782,
"completions/min_length": 36.2,
"completions/min_terminated_length": 36.2,
"epoch": 0.224,
"grad_norm": 2.9691953659057617,
"learning_rate": 1e-06,
"loss": 0.0767,
"num_tokens": 235724249.0,
"reward": 0.6787481069564819,
"reward_std": 0.34110564887523653,
"rewards/accuracy_reward": 0.33427734375,
"rewards/brier_reward": 0.5312727630138397,
"rewards/confidence_uniqueness_reward": 0.6269549608230591,
"rewards/format_reward": 0.7095703125,
"rewards/frontier_aurc_reward": -0.0023390050046145916,
"rewards/frontier_coverage_1": 0.09750215262174607,
"rewards/frontier_coverage_10": 0.09750215262174607,
"rewards/frontier_coverage_15": 0.09750215262174607,
"rewards/frontier_coverage_20": 0.09750215262174607,
"rewards/frontier_coverage_25": 0.09750215262174607,
"rewards/frontier_coverage_5": 0.09750215262174607,
"rewards/frontier_ece_reward": 0.012927726469933986,
"signal/accuracy_reward/centered_abs_mean": 0.188677978515625,
"signal/accuracy_reward/group_std_mean": 0.23931180834770202,
"signal/accuracy_reward/group_zero_std_frac": 0.359375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0943389892578125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0943389892578125,
"signal/advantage_abs_mean": 0.2798399984836578,
"signal/advantage_pre_scale_abs_mean": 0.2798399984836578,
"signal/advantage_pre_scale_std": 0.3601413905620575,
"signal/advantage_std": 0.3601413905620575,
"signal/brier_reward/centered_abs_mean": 0.29879134297370913,
"signal/brier_reward/group_std_mean": 0.3516451418399811,
"signal/brier_reward/group_zero_std_frac": 0.003125,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03734891787171364,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.03734891787171364,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2545173615217209,
"signal/confidence_uniqueness_reward/group_std_mean": 0.3105557501316071,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03181467019021511,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03181467019021511,
"signal/format_reward/centered_abs_mean": 0.27607421875,
"signal/format_reward/group_std_mean": 0.3350002527236938,
"signal/format_reward/group_zero_std_frac": 0.159375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.138037109375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.138037109375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002703424310311675,
"signal/frontier_aurc_reward/group_std_mean": 0.004504935536533594,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.839129323954694e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.839129323954694e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2409254640340805,
"signal/frontier_coverage_1/group_std_mean": 0.3174335896968842,
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004312565550208092,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004312565550208092,
"signal/frontier_coverage_10/centered_abs_mean": 0.2409254640340805,
"signal/frontier_coverage_10/group_std_mean": 0.3174335896968842,
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004312565550208092,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004312565550208092,
"signal/frontier_coverage_15/centered_abs_mean": 0.2409254640340805,
"signal/frontier_coverage_15/group_std_mean": 0.3174335896968842,
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004312565550208092,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004312565550208092,
"signal/frontier_coverage_20/centered_abs_mean": 0.2409254640340805,
"signal/frontier_coverage_20/group_std_mean": 0.3174335896968842,
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004312565550208092,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004312565550208092,
"signal/frontier_coverage_25/centered_abs_mean": 0.2409254640340805,
"signal/frontier_coverage_25/group_std_mean": 0.3174335896968842,
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004312565550208092,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004312565550208092,
"signal/frontier_coverage_5/centered_abs_mean": 0.2409254640340805,
"signal/frontier_coverage_5/group_std_mean": 0.3174335896968842,
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004312565550208092,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004312565550208092,
"signal/frontier_ece_reward/centered_abs_mean": 0.0325216319411993,
"signal/frontier_ece_reward/group_std_mean": 0.044718362390995026,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004065203992649913,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004065203992649913,
"step": 70
},
{
"calibration/aurc": 0.6752260460289098,
"calibration/batch_distribution_entropy": 0.8585007362093007,
"calibration/buffer_distribution_entropy": 0.921174766327918,
"calibration/confidence_entropy": 0.3302784425160673,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.3135163470474769,
"calibration/mean_confidence": 0.4402562737874036,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.762109375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1532.2,
"completions/mean_length": 1249.8326171875,
"completions/mean_terminated_length": 387.5689727783203,
"completions/min_length": 3.6,
"completions/min_terminated_length": 3.6,
"epoch": 0.24,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0177,
"num_tokens": 263774215.0,
"reward": 0.0329528481233865,
"reward_std": 0.07450879570096731,
"rewards/accuracy_reward": 0.01083984375,
"rewards/brier_reward": 0.02677628120291047,
"rewards/confidence_uniqueness_reward": 0.03159494288265705,
"rewards/format_reward": 0.03857421875,
"rewards/frontier_aurc_reward": -0.0002577310428023338,
"rewards/frontier_coverage_1": 0.009148352436022833,
"rewards/frontier_coverage_10": 0.009148352436022833,
"rewards/frontier_coverage_15": 0.009148352436022833,
"rewards/frontier_coverage_20": 0.009148352436022833,
"rewards/frontier_coverage_25": 0.009148352436022833,
"rewards/frontier_coverage_5": 0.009148352436022833,
"rewards/frontier_ece_reward": -0.00022805376793257892,
"signal/accuracy_reward/centered_abs_mean": 0.017510986328125,
"signal/accuracy_reward/group_std_mean": 0.02862224280834198,
"signal/accuracy_reward/group_zero_std_frac": 0.896875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0087554931640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0087554931640625,
"signal/advantage_abs_mean": 0.04833462685346603,
"signal/advantage_pre_scale_abs_mean": 0.04833462685346603,
"signal/advantage_pre_scale_std": 0.10756354965269566,
"signal/advantage_std": 0.10756354965269566,
"signal/brier_reward/centered_abs_mean": 0.04109984996030107,
"signal/brier_reward/group_std_mean": 0.0659692483022809,
"signal/brier_reward/group_zero_std_frac": 0.71875,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005137481245037634,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.005137481245037634,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04622841775417328,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07011332884430885,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.7125,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00577855221927166,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00577855221927166,
"signal/format_reward/centered_abs_mean": 0.056903076171875,
"signal/format_reward/group_std_mean": 0.0884034713730216,
"signal/format_reward/group_zero_std_frac": 0.69375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0284515380859375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0284515380859375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0004303819587221369,
"signal/frontier_aurc_reward/group_std_mean": 0.0009567889268510043,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.69375,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.70383680901432e-06,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.70383680901432e-06,
"signal/frontier_coverage_1/centered_abs_mean": 0.026687611715169625,
"signal/frontier_coverage_1/group_std_mean": 0.04987622057087719,
"signal/frontier_coverage_1/group_zero_std_frac": 0.7125,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00047770824676263146,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00047770824676263146,
"signal/frontier_coverage_10/centered_abs_mean": 0.026687611715169625,
"signal/frontier_coverage_10/group_std_mean": 0.04987622057087719,
"signal/frontier_coverage_10/group_zero_std_frac": 0.7125,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00047770824676263146,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00047770824676263146,
"signal/frontier_coverage_15/centered_abs_mean": 0.026687611715169625,
"signal/frontier_coverage_15/group_std_mean": 0.04987622057087719,
"signal/frontier_coverage_15/group_zero_std_frac": 0.7125,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00047770824676263146,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00047770824676263146,
"signal/frontier_coverage_20/centered_abs_mean": 0.026687611715169625,
"signal/frontier_coverage_20/group_std_mean": 0.04987622057087719,
"signal/frontier_coverage_20/group_zero_std_frac": 0.7125,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00047770824676263146,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00047770824676263146,
"signal/frontier_coverage_25/centered_abs_mean": 0.026687611715169625,
"signal/frontier_coverage_25/group_std_mean": 0.04987622057087719,
"signal/frontier_coverage_25/group_zero_std_frac": 0.7125,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00047770824676263146,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00047770824676263146,
"signal/frontier_coverage_5/centered_abs_mean": 0.026687611715169625,
"signal/frontier_coverage_5/group_std_mean": 0.04987622057087719,
"signal/frontier_coverage_5/group_zero_std_frac": 0.7125,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00047770824676263146,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00047770824676263146,
"signal/frontier_ece_reward/centered_abs_mean": 0.00249036728637293,
"signal/frontier_ece_reward/group_std_mean": 0.005515742604620755,
"signal/frontier_ece_reward/group_zero_std_frac": 0.69375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00031129591079661625,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00031129591079661625,
"step": 75
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.88017578125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1533.2,
"completions/mean_length": 1385.08759765625,
"completions/mean_terminated_length": 275.3055450439453,
"completions/min_length": 2.2,
"completions/min_terminated_length": 2.2,
"epoch": 0.256,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 293012328.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 80
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.96201171875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1481.6,
"completions/mean_length": 1489.7912109375,
"completions/mean_terminated_length": 334.21091918945314,
"completions/min_length": 2.8,
"completions/min_terminated_length": 2.8,
"epoch": 0.272,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 323233486.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 85
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.98466796875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1513.6,
"completions/mean_length": 1520.5732421875,
"completions/mean_terminated_length": 536.7734497070312,
"completions/min_length": 9.4,
"completions/min_terminated_length": 9.4,
"epoch": 0.288,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 353762332.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 90
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.98818359375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1392.0,
"completions/mean_length": 1522.99775390625,
"completions/mean_terminated_length": 435.09649353027345,
"completions/min_length": 8.4,
"completions/min_terminated_length": 8.4,
"epoch": 0.304,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 384287781.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 95
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9904296875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1392.0,
"completions/mean_length": 1525.1625,
"completions/mean_terminated_length": 416.81488647460935,
"completions/min_length": 12.0,
"completions/min_terminated_length": 12.0,
"epoch": 0.32,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 414994149.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 100
},
{
"epoch": 0.32,
"eval_completions/clipped_ratio": 0.998046875,
"eval_completions/max_length": 1536.0,
"eval_completions/max_terminated_length": 231.5,
"eval_completions/mean_length": 1534.80859375,
"eval_completions/mean_terminated_length": 231.5,
"eval_completions/min_length": 1383.5,
"eval_completions/min_terminated_length": 231.5,
"eval_loss": 0.0,
"eval_num_tokens": 414994149.0,
"eval_reward": 0.0,
"eval_reward_std": 0.0,
"eval_rewards/accuracy_reward": 0.0,
"eval_rewards/brier_reward": 0.0,
"eval_rewards/confidence_uniqueness_reward": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/frontier_aurc_reward": 0.0,
"eval_rewards/frontier_coverage_1": 0.0,
"eval_rewards/frontier_coverage_10": 0.0,
"eval_rewards/frontier_coverage_15": 0.0,
"eval_rewards/frontier_coverage_20": 0.0,
"eval_rewards/frontier_coverage_25": 0.0,
"eval_rewards/frontier_coverage_5": 0.0,
"eval_rewards/frontier_ece_reward": 0.0,
"eval_runtime": 74.8012,
"eval_samples_per_second": 6.684,
"eval_signal/accuracy_reward/centered_abs_mean": 0.0,
"eval_signal/accuracy_reward/group_std_mean": 0.0,
"eval_signal/accuracy_reward/group_zero_std_frac": 1.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/advantage_abs_mean": 0.0,
"eval_signal/advantage_pre_scale_abs_mean": 0.0,
"eval_signal/advantage_pre_scale_std": 0.0,
"eval_signal/advantage_std": 0.0,
"eval_signal/brier_reward/centered_abs_mean": 0.0,
"eval_signal/brier_reward/group_std_mean": 0.0,
"eval_signal/brier_reward/group_zero_std_frac": 1.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_1/group_std_mean": 0.0,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_10/group_std_mean": 0.0,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_15/group_std_mean": 0.0,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_20/group_std_mean": 0.0,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_25/group_std_mean": 0.0,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_5/group_std_mean": 0.0,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_ece_reward/group_std_mean": 0.0,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.053,
"step": 100
},
{
"epoch": 0.32,
"step": 100,
"train_probe_completions/clipped_ratio": 0.994140625,
"train_probe_completions/max_length": 1536.0,
"train_probe_completions/max_terminated_length": 567.5,
"train_probe_completions/mean_length": 1531.43359375,
"train_probe_completions/mean_terminated_length": 567.5,
"train_probe_completions/min_length": 951.5,
"train_probe_completions/min_terminated_length": 567.5,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 414994149.0,
"train_probe_reward": 0.0,
"train_probe_reward_std": 0.0,
"train_probe_rewards/accuracy_reward": 0.0,
"train_probe_rewards/brier_reward": 0.0,
"train_probe_rewards/confidence_uniqueness_reward": 0.0,
"train_probe_rewards/format_reward": 0.0,
"train_probe_rewards/frontier_aurc_reward": 0.0,
"train_probe_rewards/frontier_coverage_1": 0.0,
"train_probe_rewards/frontier_coverage_10": 0.0,
"train_probe_rewards/frontier_coverage_15": 0.0,
"train_probe_rewards/frontier_coverage_20": 0.0,
"train_probe_rewards/frontier_coverage_25": 0.0,
"train_probe_rewards/frontier_coverage_5": 0.0,
"train_probe_rewards/frontier_ece_reward": 0.0,
"train_probe_runtime": 73.546,
"train_probe_samples_per_second": 6.798,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.0,
"train_probe_signal/accuracy_reward/group_std_mean": 0.0,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 1.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/advantage_abs_mean": 0.0,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.0,
"train_probe_signal/advantage_pre_scale_std": 0.0,
"train_probe_signal/advantage_std": 0.0,
"train_probe_signal/brier_reward/centered_abs_mean": 0.0,
"train_probe_signal/brier_reward/group_std_mean": 0.0,
"train_probe_signal/brier_reward/group_zero_std_frac": 1.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/brier_reward/weight": 0.125,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
"train_probe_signal/format_reward/group_std_mean": 0.0,
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.0,
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.0,
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_ece_reward/weight": 0.125,
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"train_probe_steps_per_second": 0.054
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99091796875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1429.0,
"completions/mean_length": 1526.96376953125,
"completions/mean_terminated_length": 541.3440856933594,
"completions/min_length": 21.6,
"completions/min_terminated_length": 21.6,
"epoch": 0.336,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 445352690.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 105
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.991015625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1341.0,
"completions/mean_length": 1526.5474609375,
"completions/mean_terminated_length": 485.99510498046874,
"completions/min_length": 24.6,
"completions/min_terminated_length": 24.6,
"epoch": 0.352,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 476244952.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 110
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99072265625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1438.6,
"completions/mean_length": 1526.39052734375,
"completions/mean_terminated_length": 499.4287414550781,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"epoch": 0.368,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 506940663.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 115
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9912109375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1377.2,
"completions/mean_length": 1526.468359375,
"completions/mean_terminated_length": 455.91710205078124,
"completions/min_length": 28.0,
"completions/min_terminated_length": 28.0,
"epoch": 0.384,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 537428211.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 120
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99150390625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1416.2,
"completions/mean_length": 1527.345703125,
"completions/mean_terminated_length": 484.37660522460936,
"completions/min_length": 32.0,
"completions/min_terminated_length": 32.0,
"epoch": 0.4,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 568104679.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 125
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.98994140625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1400.4,
"completions/mean_length": 1525.31591796875,
"completions/mean_terminated_length": 484.96845703125,
"completions/min_length": 15.0,
"completions/min_terminated_length": 15.0,
"epoch": 0.416,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 598605098.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 130
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.990234375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1392.6,
"completions/mean_length": 1525.57548828125,
"completions/mean_terminated_length": 465.9883239746094,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"epoch": 0.432,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 629241327.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 135
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99169921875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1312.0,
"completions/mean_length": 1527.14140625,
"completions/mean_terminated_length": 479.5002502441406,
"completions/min_length": 21.4,
"completions/min_terminated_length": 21.4,
"epoch": 0.448,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 659832055.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 140
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9908203125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1349.6,
"completions/mean_length": 1526.08798828125,
"completions/mean_terminated_length": 470.441455078125,
"completions/min_length": 30.8,
"completions/min_terminated_length": 30.8,
"epoch": 0.464,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 690630012.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 145
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.992578125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1409.6,
"completions/mean_length": 1527.8091796875,
"completions/mean_terminated_length": 447.0835357666016,
"completions/min_length": 19.8,
"completions/min_terminated_length": 19.8,
"epoch": 0.48,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 721322810.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 150
},
{
"epoch": 0.48,
"eval_completions/clipped_ratio": 0.9876751077586207,
"eval_completions/max_length": 1536.0,
"eval_completions/max_terminated_length": 476.0,
"eval_completions/mean_length": 1521.2329711914062,
"eval_completions/mean_terminated_length": 361.83333587646484,
"eval_completions/min_length": 303.0,
"eval_completions/min_terminated_length": 303.0,
"eval_loss": 0.0,
"eval_num_tokens": 721322810.0,
"eval_reward": 0.0,
"eval_reward_std": 0.0,
"eval_rewards/accuracy_reward": 0.0,
"eval_rewards/brier_reward": 0.0,
"eval_rewards/confidence_uniqueness_reward": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/frontier_aurc_reward": 0.0,
"eval_rewards/frontier_coverage_1": 0.0,
"eval_rewards/frontier_coverage_10": 0.0,
"eval_rewards/frontier_coverage_15": 0.0,
"eval_rewards/frontier_coverage_20": 0.0,
"eval_rewards/frontier_coverage_25": 0.0,
"eval_rewards/frontier_coverage_5": 0.0,
"eval_rewards/frontier_ece_reward": 0.0,
"eval_runtime": 74.8404,
"eval_samples_per_second": 6.681,
"eval_signal/accuracy_reward/centered_abs_mean": 0.0,
"eval_signal/accuracy_reward/group_std_mean": 0.0,
"eval_signal/accuracy_reward/group_zero_std_frac": 1.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/advantage_abs_mean": 0.0,
"eval_signal/advantage_pre_scale_abs_mean": 0.0,
"eval_signal/advantage_pre_scale_std": 0.0,
"eval_signal/advantage_std": 0.0,
"eval_signal/brier_reward/centered_abs_mean": 0.0,
"eval_signal/brier_reward/group_std_mean": 0.0,
"eval_signal/brier_reward/group_zero_std_frac": 1.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_1/group_std_mean": 0.0,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_10/group_std_mean": 0.0,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_15/group_std_mean": 0.0,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_20/group_std_mean": 0.0,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_25/group_std_mean": 0.0,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_5/group_std_mean": 0.0,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_ece_reward/group_std_mean": 0.0,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.053,
"step": 150
},
{
"epoch": 0.48,
"step": 150,
"train_probe_completions/clipped_ratio": 0.994140625,
"train_probe_completions/max_length": 1536.0,
"train_probe_completions/max_terminated_length": 483.25,
"train_probe_completions/mean_length": 1532.205078125,
"train_probe_completions/mean_terminated_length": 443.875,
"train_probe_completions/min_length": 1172.5,
"train_probe_completions/min_terminated_length": 404.5,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 721322810.0,
"train_probe_reward": 0.0,
"train_probe_reward_std": 0.0,
"train_probe_rewards/accuracy_reward": 0.0,
"train_probe_rewards/brier_reward": 0.0,
"train_probe_rewards/confidence_uniqueness_reward": 0.0,
"train_probe_rewards/format_reward": 0.0,
"train_probe_rewards/frontier_aurc_reward": 0.0,
"train_probe_rewards/frontier_coverage_1": 0.0,
"train_probe_rewards/frontier_coverage_10": 0.0,
"train_probe_rewards/frontier_coverage_15": 0.0,
"train_probe_rewards/frontier_coverage_20": 0.0,
"train_probe_rewards/frontier_coverage_25": 0.0,
"train_probe_rewards/frontier_coverage_5": 0.0,
"train_probe_rewards/frontier_ece_reward": 0.0,
"train_probe_runtime": 72.8578,
"train_probe_samples_per_second": 6.863,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.0,
"train_probe_signal/accuracy_reward/group_std_mean": 0.0,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 1.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/advantage_abs_mean": 0.0,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.0,
"train_probe_signal/advantage_pre_scale_std": 0.0,
"train_probe_signal/advantage_std": 0.0,
"train_probe_signal/brier_reward/centered_abs_mean": 0.0,
"train_probe_signal/brier_reward/group_std_mean": 0.0,
"train_probe_signal/brier_reward/group_zero_std_frac": 1.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/brier_reward/weight": 0.125,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
"train_probe_signal/format_reward/group_std_mean": 0.0,
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.0,
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.0,
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_ece_reward/weight": 0.125,
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"train_probe_steps_per_second": 0.055
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99287109375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1102.8,
"completions/mean_length": 1527.67587890625,
"completions/mean_terminated_length": 357.08160400390625,
"completions/min_length": 28.2,
"completions/min_terminated_length": 28.2,
"epoch": 0.496,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 752274051.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 155
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.992578125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1313.8,
"completions/mean_length": 1528.16044921875,
"completions/mean_terminated_length": 473.29002075195314,
"completions/min_length": 33.6,
"completions/min_terminated_length": 33.6,
"epoch": 0.512,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 783068078.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 160
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.990625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1420.2,
"completions/mean_length": 1525.24150390625,
"completions/mean_terminated_length": 396.59959106445314,
"completions/min_length": 22.6,
"completions/min_terminated_length": 22.6,
"epoch": 0.528,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 813716087.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 165
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9921875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1404.4,
"completions/mean_length": 1527.6833984375,
"completions/mean_terminated_length": 487.17767944335935,
"completions/min_length": 21.4,
"completions/min_terminated_length": 21.4,
"epoch": 0.544,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 844523149.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 170
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9919921875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1442.6,
"completions/mean_length": 1527.085546875,
"completions/mean_terminated_length": 423.56866455078125,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"epoch": 0.56,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 874981913.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 175
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99130859375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1212.0,
"completions/mean_length": 1526.4693359375,
"completions/mean_terminated_length": 425.4572448730469,
"completions/min_length": 17.8,
"completions/min_terminated_length": 17.8,
"epoch": 0.576,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 905799583.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 180
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99072265625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1469.0,
"completions/mean_length": 1526.5130859375,
"completions/mean_terminated_length": 515.5063232421875,
"completions/min_length": 26.0,
"completions/min_terminated_length": 26.0,
"epoch": 0.592,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 936598789.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 185
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99072265625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1396.6,
"completions/mean_length": 1525.85185546875,
"completions/mean_terminated_length": 447.1800231933594,
"completions/min_length": 31.2,
"completions/min_terminated_length": 31.2,
"epoch": 0.608,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 967223000.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 190
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99111328125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1440.8,
"completions/mean_length": 1527.06923828125,
"completions/mean_terminated_length": 509.2791809082031,
"completions/min_length": 13.0,
"completions/min_terminated_length": 13.0,
"epoch": 0.624,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 998204093.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 195
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99365234375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1235.8,
"completions/mean_length": 1528.4357421875,
"completions/mean_terminated_length": 320.70795288085935,
"completions/min_length": 15.0,
"completions/min_terminated_length": 15.0,
"epoch": 0.64,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1029197963.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 200
},
{
"epoch": 0.64,
"eval_completions/clipped_ratio": 0.9900323275862069,
"eval_completions/max_length": 1536.0,
"eval_completions/max_terminated_length": 315.75,
"eval_completions/mean_length": 1524.6064758300781,
"eval_completions/mean_terminated_length": 265.1666717529297,
"eval_completions/min_length": 557.0,
"eval_completions/min_terminated_length": 173.0,
"eval_loss": 0.0,
"eval_num_tokens": 1029197963.0,
"eval_reward": 0.0,
"eval_reward_std": 0.0,
"eval_rewards/accuracy_reward": 0.0,
"eval_rewards/brier_reward": 0.0,
"eval_rewards/confidence_uniqueness_reward": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/frontier_aurc_reward": 0.0,
"eval_rewards/frontier_coverage_1": 0.0,
"eval_rewards/frontier_coverage_10": 0.0,
"eval_rewards/frontier_coverage_15": 0.0,
"eval_rewards/frontier_coverage_20": 0.0,
"eval_rewards/frontier_coverage_25": 0.0,
"eval_rewards/frontier_coverage_5": 0.0,
"eval_rewards/frontier_ece_reward": 0.0,
"eval_runtime": 74.0994,
"eval_samples_per_second": 6.748,
"eval_signal/accuracy_reward/centered_abs_mean": 0.0,
"eval_signal/accuracy_reward/group_std_mean": 0.0,
"eval_signal/accuracy_reward/group_zero_std_frac": 1.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/advantage_abs_mean": 0.0,
"eval_signal/advantage_pre_scale_abs_mean": 0.0,
"eval_signal/advantage_pre_scale_std": 0.0,
"eval_signal/advantage_std": 0.0,
"eval_signal/brier_reward/centered_abs_mean": 0.0,
"eval_signal/brier_reward/group_std_mean": 0.0,
"eval_signal/brier_reward/group_zero_std_frac": 1.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_1/group_std_mean": 0.0,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_10/group_std_mean": 0.0,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_15/group_std_mean": 0.0,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_20/group_std_mean": 0.0,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_25/group_std_mean": 0.0,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_5/group_std_mean": 0.0,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_ece_reward/group_std_mean": 0.0,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.054,
"step": 200
},
{
"epoch": 0.64,
"step": 200,
"train_probe_completions/clipped_ratio": 0.9900323275862069,
"train_probe_completions/max_length": 1536.0,
"train_probe_completions/max_terminated_length": 444.75,
"train_probe_completions/mean_length": 1524.4424743652344,
"train_probe_completions/mean_terminated_length": 341.5,
"train_probe_completions/min_length": 238.25,
"train_probe_completions/min_terminated_length": 238.25,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 1029197963.0,
"train_probe_reward": 0.0,
"train_probe_reward_std": 0.0,
"train_probe_rewards/accuracy_reward": 0.0,
"train_probe_rewards/brier_reward": 0.0,
"train_probe_rewards/confidence_uniqueness_reward": 0.0,
"train_probe_rewards/format_reward": 0.0,
"train_probe_rewards/frontier_aurc_reward": 0.0,
"train_probe_rewards/frontier_coverage_1": 0.0,
"train_probe_rewards/frontier_coverage_10": 0.0,
"train_probe_rewards/frontier_coverage_15": 0.0,
"train_probe_rewards/frontier_coverage_20": 0.0,
"train_probe_rewards/frontier_coverage_25": 0.0,
"train_probe_rewards/frontier_coverage_5": 0.0,
"train_probe_rewards/frontier_ece_reward": 0.0,
"train_probe_runtime": 70.6463,
"train_probe_samples_per_second": 7.078,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.0,
"train_probe_signal/accuracy_reward/group_std_mean": 0.0,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 1.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/advantage_abs_mean": 0.0,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.0,
"train_probe_signal/advantage_pre_scale_std": 0.0,
"train_probe_signal/advantage_std": 0.0,
"train_probe_signal/brier_reward/centered_abs_mean": 0.0,
"train_probe_signal/brier_reward/group_std_mean": 0.0,
"train_probe_signal/brier_reward/group_zero_std_frac": 1.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/brier_reward/weight": 0.125,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
"train_probe_signal/format_reward/group_std_mean": 0.0,
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.0,
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.0,
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_ece_reward/weight": 0.125,
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"train_probe_steps_per_second": 0.057
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99091796875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1409.4,
"completions/mean_length": 1526.26650390625,
"completions/mean_terminated_length": 446.5516021728516,
"completions/min_length": 15.4,
"completions/min_terminated_length": 15.4,
"epoch": 0.656,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1059683476.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 205
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99345703125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1140.0,
"completions/mean_length": 1528.49189453125,
"completions/mean_terminated_length": 370.8636016845703,
"completions/min_length": 45.0,
"completions/min_terminated_length": 45.0,
"epoch": 0.672,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1090248673.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 210
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99228515625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1345.6,
"completions/mean_length": 1527.9634765625,
"completions/mean_terminated_length": 482.64027709960936,
"completions/min_length": 23.2,
"completions/min_terminated_length": 23.2,
"epoch": 0.688,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1120848939.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 215
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99169921875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1463.8,
"completions/mean_length": 1527.13515625,
"completions/mean_terminated_length": 466.1702087402344,
"completions/min_length": 21.6,
"completions/min_terminated_length": 21.6,
"epoch": 0.704,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1151352947.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 220
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9904296875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1260.2,
"completions/mean_length": 1525.028515625,
"completions/mean_terminated_length": 393.17090759277346,
"completions/min_length": 18.8,
"completions/min_terminated_length": 18.8,
"epoch": 0.72,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1181979095.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 225
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.992578125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1274.4,
"completions/mean_length": 1527.6798828125,
"completions/mean_terminated_length": 406.5520385742187,
"completions/min_length": 25.2,
"completions/min_terminated_length": 25.2,
"epoch": 0.736,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1212562121.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 230
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99248046875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1376.4,
"completions/mean_length": 1527.54853515625,
"completions/mean_terminated_length": 413.2706298828125,
"completions/min_length": 26.2,
"completions/min_terminated_length": 26.2,
"epoch": 0.752,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1243431418.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 235
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.991015625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1362.2,
"completions/mean_length": 1525.55625,
"completions/mean_terminated_length": 372.6120971679687,
"completions/min_length": 14.2,
"completions/min_terminated_length": 14.2,
"epoch": 0.768,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1273985818.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 240
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99306640625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1481.8,
"completions/mean_length": 1529.205078125,
"completions/mean_terminated_length": 549.9662841796875,
"completions/min_length": 33.8,
"completions/min_terminated_length": 33.8,
"epoch": 0.784,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1304819246.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 245
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99208984375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1298.6,
"completions/mean_length": 1526.48974609375,
"completions/mean_terminated_length": 337.9350891113281,
"completions/min_length": 26.8,
"completions/min_terminated_length": 26.8,
"epoch": 0.8,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1335461061.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 250
},
{
"epoch": 0.8,
"eval_completions/clipped_ratio": 0.990234375,
"eval_completions/max_length": 1536.0,
"eval_completions/max_terminated_length": 590.75,
"eval_completions/mean_length": 1527.345703125,
"eval_completions/mean_terminated_length": 491.875,
"eval_completions/min_length": 777.0,
"eval_completions/min_terminated_length": 393.0,
"eval_loss": 0.0,
"eval_num_tokens": 1335461061.0,
"eval_reward": 0.0,
"eval_reward_std": 0.0,
"eval_rewards/accuracy_reward": 0.0,
"eval_rewards/brier_reward": 0.0,
"eval_rewards/confidence_uniqueness_reward": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/frontier_aurc_reward": 0.0,
"eval_rewards/frontier_coverage_1": 0.0,
"eval_rewards/frontier_coverage_10": 0.0,
"eval_rewards/frontier_coverage_15": 0.0,
"eval_rewards/frontier_coverage_20": 0.0,
"eval_rewards/frontier_coverage_25": 0.0,
"eval_rewards/frontier_coverage_5": 0.0,
"eval_rewards/frontier_ece_reward": 0.0,
"eval_runtime": 73.9291,
"eval_samples_per_second": 6.763,
"eval_signal/accuracy_reward/centered_abs_mean": 0.0,
"eval_signal/accuracy_reward/group_std_mean": 0.0,
"eval_signal/accuracy_reward/group_zero_std_frac": 1.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/advantage_abs_mean": 0.0,
"eval_signal/advantage_pre_scale_abs_mean": 0.0,
"eval_signal/advantage_pre_scale_std": 0.0,
"eval_signal/advantage_std": 0.0,
"eval_signal/brier_reward/centered_abs_mean": 0.0,
"eval_signal/brier_reward/group_std_mean": 0.0,
"eval_signal/brier_reward/group_zero_std_frac": 1.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_1/group_std_mean": 0.0,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_10/group_std_mean": 0.0,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_15/group_std_mean": 0.0,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_20/group_std_mean": 0.0,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_25/group_std_mean": 0.0,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_5/group_std_mean": 0.0,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_ece_reward/group_std_mean": 0.0,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.054,
"step": 250
},
{
"epoch": 0.8,
"step": 250,
"train_probe_completions/clipped_ratio": 0.9861260775862069,
"train_probe_completions/max_length": 1536.0,
"train_probe_completions/max_terminated_length": 655.75,
"train_probe_completions/mean_length": 1522.4951477050781,
"train_probe_completions/mean_terminated_length": 420.75000762939453,
"train_probe_completions/min_length": 643.0,
"train_probe_completions/min_terminated_length": 259.0,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 1335461061.0,
"train_probe_reward": 0.0,
"train_probe_reward_std": 0.0,
"train_probe_rewards/accuracy_reward": 0.0,
"train_probe_rewards/brier_reward": 0.0,
"train_probe_rewards/confidence_uniqueness_reward": 0.0,
"train_probe_rewards/format_reward": 0.0,
"train_probe_rewards/frontier_aurc_reward": 0.0,
"train_probe_rewards/frontier_coverage_1": 0.0,
"train_probe_rewards/frontier_coverage_10": 0.0,
"train_probe_rewards/frontier_coverage_15": 0.0,
"train_probe_rewards/frontier_coverage_20": 0.0,
"train_probe_rewards/frontier_coverage_25": 0.0,
"train_probe_rewards/frontier_coverage_5": 0.0,
"train_probe_rewards/frontier_ece_reward": 0.0,
"train_probe_runtime": 73.7012,
"train_probe_samples_per_second": 6.784,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.0,
"train_probe_signal/accuracy_reward/group_std_mean": 0.0,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 1.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/advantage_abs_mean": 0.0,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.0,
"train_probe_signal/advantage_pre_scale_std": 0.0,
"train_probe_signal/advantage_std": 0.0,
"train_probe_signal/brier_reward/centered_abs_mean": 0.0,
"train_probe_signal/brier_reward/group_std_mean": 0.0,
"train_probe_signal/brier_reward/group_zero_std_frac": 1.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/brier_reward/weight": 0.125,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
"train_probe_signal/format_reward/group_std_mean": 0.0,
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.0,
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.0,
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_ece_reward/weight": 0.125,
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"train_probe_steps_per_second": 0.054
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99208984375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1341.0,
"completions/mean_length": 1527.38896484375,
"completions/mean_terminated_length": 444.1418090820313,
"completions/min_length": 21.8,
"completions/min_terminated_length": 21.8,
"epoch": 0.816,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1366200692.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 255
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99091796875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1425.6,
"completions/mean_length": 1526.38564453125,
"completions/mean_terminated_length": 473.8236938476563,
"completions/min_length": 20.6,
"completions/min_terminated_length": 20.6,
"epoch": 0.832,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1396839233.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 260
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99111328125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1394.4,
"completions/mean_length": 1525.48974609375,
"completions/mean_terminated_length": 345.88324584960935,
"completions/min_length": 16.0,
"completions/min_terminated_length": 16.0,
"epoch": 0.848,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1427474616.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 265
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99072265625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1322.0,
"completions/mean_length": 1525.17509765625,
"completions/mean_terminated_length": 365.7697448730469,
"completions/min_length": 25.4,
"completions/min_terminated_length": 25.4,
"epoch": 0.864,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1458079225.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 270
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99296875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1370.4,
"completions/mean_length": 1528.1609375,
"completions/mean_terminated_length": 439.784326171875,
"completions/min_length": 17.0,
"completions/min_terminated_length": 17.0,
"epoch": 0.88,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1488874665.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 275
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.991015625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1472.0,
"completions/mean_length": 1526.5763671875,
"completions/mean_terminated_length": 484.0293884277344,
"completions/min_length": 18.6,
"completions/min_terminated_length": 18.6,
"epoch": 0.896,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1519617655.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 280
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9912109375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1358.6,
"completions/mean_length": 1526.38759765625,
"completions/mean_terminated_length": 452.73790283203124,
"completions/min_length": 30.6,
"completions/min_terminated_length": 30.6,
"epoch": 0.912,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1550299160.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 285
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9921875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1365.0,
"completions/mean_length": 1528.1560546875,
"completions/mean_terminated_length": 532.4930725097656,
"completions/min_length": 28.4,
"completions/min_terminated_length": 28.4,
"epoch": 0.928,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1580974294.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 290
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9904296875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1427.2,
"completions/mean_length": 1525.52470703125,
"completions/mean_terminated_length": 438.5868408203125,
"completions/min_length": 19.4,
"completions/min_terminated_length": 19.4,
"epoch": 0.944,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1611571091.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 295
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.98994140625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1269.6,
"completions/mean_length": 1524.29560546875,
"completions/mean_terminated_length": 369.2761505126953,
"completions/min_length": 7.4,
"completions/min_terminated_length": 7.4,
"epoch": 0.96,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1642120198.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 300
},
{
"epoch": 0.96,
"eval_completions/clipped_ratio": 0.9878771551724138,
"eval_completions/max_length": 1536.0,
"eval_completions/max_terminated_length": 676.0,
"eval_completions/mean_length": 1527.0393981933594,
"eval_completions/mean_terminated_length": 608.4583435058594,
"eval_completions/min_length": 946.0,
"eval_completions/min_terminated_length": 562.0,
"eval_loss": 0.0,
"eval_num_tokens": 1642120198.0,
"eval_reward": 0.0,
"eval_reward_std": 0.0,
"eval_rewards/accuracy_reward": 0.0,
"eval_rewards/brier_reward": 0.0,
"eval_rewards/confidence_uniqueness_reward": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/frontier_aurc_reward": 0.0,
"eval_rewards/frontier_coverage_1": 0.0,
"eval_rewards/frontier_coverage_10": 0.0,
"eval_rewards/frontier_coverage_15": 0.0,
"eval_rewards/frontier_coverage_20": 0.0,
"eval_rewards/frontier_coverage_25": 0.0,
"eval_rewards/frontier_coverage_5": 0.0,
"eval_rewards/frontier_ece_reward": 0.0,
"eval_runtime": 76.0588,
"eval_samples_per_second": 6.574,
"eval_signal/accuracy_reward/centered_abs_mean": 0.0,
"eval_signal/accuracy_reward/group_std_mean": 0.0,
"eval_signal/accuracy_reward/group_zero_std_frac": 1.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/advantage_abs_mean": 0.0,
"eval_signal/advantage_pre_scale_abs_mean": 0.0,
"eval_signal/advantage_pre_scale_std": 0.0,
"eval_signal/advantage_std": 0.0,
"eval_signal/brier_reward/centered_abs_mean": 0.0,
"eval_signal/brier_reward/group_std_mean": 0.0,
"eval_signal/brier_reward/group_zero_std_frac": 1.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_1/group_std_mean": 0.0,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_10/group_std_mean": 0.0,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_15/group_std_mean": 0.0,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_20/group_std_mean": 0.0,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_25/group_std_mean": 0.0,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_5/group_std_mean": 0.0,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_ece_reward/group_std_mean": 0.0,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.053,
"step": 300
},
{
"epoch": 0.96,
"step": 300,
"train_probe_completions/clipped_ratio": 0.9978448275862069,
"train_probe_completions/max_length": 1536.0,
"train_probe_completions/max_terminated_length": 18.25,
"train_probe_completions/mean_length": 1532.8469848632812,
"train_probe_completions/mean_terminated_length": 18.25,
"train_probe_completions/min_length": 1170.25,
"train_probe_completions/min_terminated_length": 18.25,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 1642120198.0,
"train_probe_reward": 0.0,
"train_probe_reward_std": 0.0,
"train_probe_rewards/accuracy_reward": 0.0,
"train_probe_rewards/brier_reward": 0.0,
"train_probe_rewards/confidence_uniqueness_reward": 0.0,
"train_probe_rewards/format_reward": 0.0,
"train_probe_rewards/frontier_aurc_reward": 0.0,
"train_probe_rewards/frontier_coverage_1": 0.0,
"train_probe_rewards/frontier_coverage_10": 0.0,
"train_probe_rewards/frontier_coverage_15": 0.0,
"train_probe_rewards/frontier_coverage_20": 0.0,
"train_probe_rewards/frontier_coverage_25": 0.0,
"train_probe_rewards/frontier_coverage_5": 0.0,
"train_probe_rewards/frontier_ece_reward": 0.0,
"train_probe_runtime": 72.6093,
"train_probe_samples_per_second": 6.886,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.0,
"train_probe_signal/accuracy_reward/group_std_mean": 0.0,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 1.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/advantage_abs_mean": 0.0,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.0,
"train_probe_signal/advantage_pre_scale_std": 0.0,
"train_probe_signal/advantage_std": 0.0,
"train_probe_signal/brier_reward/centered_abs_mean": 0.0,
"train_probe_signal/brier_reward/group_std_mean": 0.0,
"train_probe_signal/brier_reward/group_zero_std_frac": 1.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/brier_reward/weight": 0.125,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
"train_probe_signal/format_reward/group_std_mean": 0.0,
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.0,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.0,
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.0,
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_ece_reward/weight": 0.125,
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"train_probe_steps_per_second": 0.055
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99326171875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1463.8,
"completions/mean_length": 1529.2005859375,
"completions/mean_terminated_length": 520.1449768066407,
"completions/min_length": 33.4,
"completions/min_terminated_length": 33.4,
"epoch": 0.976,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1672640332.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 305
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.99375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1262.0,
"completions/mean_length": 1529.35078125,
"completions/mean_terminated_length": 473.9320007324219,
"completions/min_length": 45.4,
"completions/min_terminated_length": 45.4,
"epoch": 0.992,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1703429364.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 310
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9893574617346939,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1243.0,
"completions/mean_length": 1523.6888427734375,
"completions/mean_terminated_length": 363.3791961669922,
"completions/min_length": 24.5,
"completions/min_terminated_length": 24.5,
"epoch": 0.9984,
"num_tokens": 1715682258.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward": 0.0,
"rewards/brier_reward": 0.0,
"rewards/confidence_uniqueness_reward": 0.0,
"rewards/format_reward": 0.0,
"rewards/frontier_aurc_reward": 0.0,
"rewards/frontier_coverage_1": 0.0,
"rewards/frontier_coverage_10": 0.0,
"rewards/frontier_coverage_15": 0.0,
"rewards/frontier_coverage_20": 0.0,
"rewards/frontier_coverage_25": 0.0,
"rewards/frontier_coverage_5": 0.0,
"rewards/frontier_ece_reward": 0.0,
"signal/accuracy_reward/centered_abs_mean": 0.0,
"signal/accuracy_reward/group_std_mean": 0.0,
"signal/accuracy_reward/group_zero_std_frac": 1.0,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
"signal/advantage_abs_mean": 0.0,
"signal/advantage_pre_scale_abs_mean": 0.0,
"signal/advantage_pre_scale_std": 0.0,
"signal/advantage_std": 0.0,
"signal/brier_reward/centered_abs_mean": 0.0,
"signal/brier_reward/group_std_mean": 0.0,
"signal/brier_reward/group_zero_std_frac": 1.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/group_std_mean": 0.0,
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
"signal/frontier_coverage_1/group_std_mean": 0.0,
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
"signal/frontier_coverage_10/group_std_mean": 0.0,
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
"signal/frontier_coverage_15/group_std_mean": 0.0,
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
"signal/frontier_coverage_20/group_std_mean": 0.0,
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
"signal/frontier_coverage_25/group_std_mean": 0.0,
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
"signal/frontier_coverage_5/group_std_mean": 0.0,
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
"signal/frontier_ece_reward/group_std_mean": 0.0,
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
"step": 312,
"total_flos": 0.0,
"train_loss": 0.007585880621217,
"train_runtime": 111863.6752,
"train_samples_per_second": 0.179,
"train_steps_per_second": 0.003
}
],
"logging_steps": 5,
"max_steps": 312,
"num_input_tokens_seen": 1715682258,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}