{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 50, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.5681354920214096, "calibration/batch_distribution_entropy": 0.6450350928927815, "calibration/confidence_entropy": 0.346740957452881, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.43446125614086684, "calibration/mean_confidence": 0.7908406375397601, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0361328125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1503.4, "completions/mean_length": 271.28115234375, "completions/mean_terminated_length": 223.8628723144531, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.059467002749443054, "learning_rate": 3.1249999999999997e-07, "loss": 0.0934, "num_tokens": 17621951.0, "reward": 0.6730658292770386, "reward_std": 0.5045446038246155, "rewards/accuracy_reward": 0.27001953125, "rewards/brier_reward": 0.4092401027679443, "rewards/confidence_uniqueness_reward": 0.48412379026412966, "rewards/format_reward": 0.68173828125, "rewards/frontier_aurc_reward": 0.3416558563709259, "rewards/frontier_coverage_1": 0.3416558563709259, "rewards/frontier_coverage_10": 0.3416558563709259, "rewards/frontier_coverage_15": 0.3416558563709259, "rewards/frontier_coverage_20": 0.3416558563709259, "rewards/frontier_coverage_25": 0.3416558563709259, "rewards/frontier_coverage_5": 0.3416558563709259, "rewards/frontier_ece_reward": 0.3416558563709259, "signal/accuracy_reward/centered_abs_mean": 0.279132080078125, "signal/accuracy_reward/group_std_mean": 0.31931535005569456, "signal/accuracy_reward/group_zero_std_frac": 0.25625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1395660400390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1395660400390625, "signal/advantage_abs_mean": 0.43447349071502683, "signal/advantage_pre_scale_abs_mean": 0.43447349071502683, "signal/advantage_pre_scale_std": 0.5123933017253876, "signal/advantage_std": 0.5123933017253876, "signal/brier_reward/centered_abs_mean": 0.33928354978561404, "signal/brier_reward/group_std_mean": 0.38253386616706847, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.042410443723201754, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.042410443723201754, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2978093445301056, "signal/confidence_uniqueness_reward/group_std_mean": 0.34845100045204164, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0372261680662632, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0372261680662632, "signal/format_reward/centered_abs_mean": 0.404998779296875, "signal/format_reward/group_std_mean": 0.4546263098716736, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2024993896484375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.2024993896484375, "signal/frontier_aurc_reward/centered_abs_mean": 0.31834944486618044, "signal/frontier_aurc_reward/group_std_mean": 0.36653432846069334, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00569845512509346, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00569845512509346, "signal/frontier_coverage_1/centered_abs_mean": 0.31834944486618044, "signal/frontier_coverage_1/group_std_mean": 0.36653432846069334, "signal/frontier_coverage_1/group_zero_std_frac": 0.003125, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00569845512509346, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00569845512509346, "signal/frontier_coverage_10/centered_abs_mean": 0.31834944486618044, "signal/frontier_coverage_10/group_std_mean": 0.36653432846069334, "signal/frontier_coverage_10/group_zero_std_frac": 0.003125, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00569845512509346, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00569845512509346, "signal/frontier_coverage_15/centered_abs_mean": 0.31834944486618044, "signal/frontier_coverage_15/group_std_mean": 0.36653432846069334, "signal/frontier_coverage_15/group_zero_std_frac": 0.003125, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00569845512509346, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00569845512509346, "signal/frontier_coverage_20/centered_abs_mean": 0.31834944486618044, "signal/frontier_coverage_20/group_std_mean": 0.36653432846069334, "signal/frontier_coverage_20/group_zero_std_frac": 0.003125, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00569845512509346, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00569845512509346, "signal/frontier_coverage_25/centered_abs_mean": 0.31834944486618044, "signal/frontier_coverage_25/group_std_mean": 0.36653432846069334, "signal/frontier_coverage_25/group_zero_std_frac": 0.003125, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00569845512509346, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00569845512509346, "signal/frontier_coverage_5/centered_abs_mean": 0.31834944486618044, "signal/frontier_coverage_5/group_std_mean": 0.36653432846069334, "signal/frontier_coverage_5/group_zero_std_frac": 0.003125, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00569845512509346, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00569845512509346, "signal/frontier_ece_reward/centered_abs_mean": 0.31834944486618044, "signal/frontier_ece_reward/group_std_mean": 0.36653432846069334, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.039793680608272555, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.039793680608272555, "step": 5 }, { "calibration/aurc": 0.5823592206951076, "calibration/batch_distribution_entropy": 0.6377635262826689, "calibration/confidence_entropy": 0.34316212043587685, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4711939179056281, "calibration/mean_confidence": 0.8033346823525754, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03837890625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1489.4, "completions/mean_length": 264.73369140625, "completions/mean_terminated_length": 214.02195739746094, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 0.03480805084109306, "learning_rate": 6.249999999999999e-07, "loss": 0.0952, "num_tokens": 35433176.0, "reward": 0.6785492658615112, "reward_std": 0.4799711525440216, "rewards/accuracy_reward": 0.25234375, "rewards/brier_reward": 0.4085344135761261, "rewards/confidence_uniqueness_reward": 0.506452476978302, "rewards/format_reward": 0.70908203125, "rewards/frontier_aurc_reward": 0.3334519624710083, "rewards/frontier_coverage_1": 0.3334519624710083, "rewards/frontier_coverage_10": 0.3334519624710083, "rewards/frontier_coverage_15": 0.3334519624710083, "rewards/frontier_coverage_20": 0.3334519624710083, "rewards/frontier_coverage_25": 0.3334519624710083, "rewards/frontier_coverage_5": 0.3334519624710083, "rewards/frontier_ece_reward": 0.3334519624710083, "signal/accuracy_reward/centered_abs_mean": 0.2619384765625, "signal/accuracy_reward/group_std_mean": 0.30939258337020875, "signal/accuracy_reward/group_zero_std_frac": 0.246875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.13096923828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.13096923828125, "signal/advantage_abs_mean": 0.4020266532897949, "signal/advantage_pre_scale_abs_mean": 0.4020266532897949, "signal/advantage_pre_scale_std": 0.48843042850494384, "signal/advantage_std": 0.48843042850494384, "signal/brier_reward/centered_abs_mean": 0.3226713418960571, "signal/brier_reward/group_std_mean": 0.3693849265575409, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04033391773700714, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.04033391773700714, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2817148804664612, "signal/confidence_uniqueness_reward/group_std_mean": 0.3387665629386902, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03521436005830765, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03521436005830765, "signal/format_reward/centered_abs_mean": 0.378582763671875, "signal/format_reward/group_std_mean": 0.43834707140922546, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1892913818359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1892913818359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.30297967195510866, "signal/frontier_aurc_reward/group_std_mean": 0.3533449411392212, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005423336289823055, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005423336289823055, "signal/frontier_coverage_1/centered_abs_mean": 0.30297967195510866, "signal/frontier_coverage_1/group_std_mean": 0.3533449411392212, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005423336289823055, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005423336289823055, "signal/frontier_coverage_10/centered_abs_mean": 0.30297967195510866, "signal/frontier_coverage_10/group_std_mean": 0.3533449411392212, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005423336289823055, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005423336289823055, "signal/frontier_coverage_15/centered_abs_mean": 0.30297967195510866, "signal/frontier_coverage_15/group_std_mean": 0.3533449411392212, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005423336289823055, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005423336289823055, "signal/frontier_coverage_20/centered_abs_mean": 0.30297967195510866, "signal/frontier_coverage_20/group_std_mean": 0.3533449411392212, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005423336289823055, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005423336289823055, "signal/frontier_coverage_25/centered_abs_mean": 0.30297967195510866, "signal/frontier_coverage_25/group_std_mean": 0.3533449411392212, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005423336289823055, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005423336289823055, "signal/frontier_coverage_5/centered_abs_mean": 0.30297967195510866, "signal/frontier_coverage_5/group_std_mean": 0.3533449411392212, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005423336289823055, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005423336289823055, "signal/frontier_ece_reward/centered_abs_mean": 0.30297967195510866, "signal/frontier_ece_reward/group_std_mean": 0.3533449411392212, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03787245899438858, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03787245899438858, "step": 10 }, { "calibration/aurc": 0.485502347974659, "calibration/batch_distribution_entropy": 0.6338946369559008, "calibration/buffer_distribution_entropy": 0.6565034331851883, "calibration/confidence_entropy": 0.3398889343388115, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.38130162024695624, "calibration/mean_confidence": 0.8044045301334914, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02001953125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1488.4, "completions/mean_length": 213.66611328125, "completions/mean_terminated_length": 186.77418518066406, "completions/min_length": 5.2, "completions/min_terminated_length": 5.2, "epoch": 0.048, "grad_norm": 0.09747687727212906, "learning_rate": 9.374999999999999e-07, "loss": 0.0577, "num_tokens": 52669853.0, "reward": 0.8086728811264038, "reward_std": 0.3855405569076538, "rewards/accuracy_reward": 0.32490234375, "rewards/brier_reward": 0.505929458141327, "rewards/confidence_uniqueness_reward": 0.6144041776657104, "rewards/format_reward": 0.84814453125, "rewards/frontier_aurc_reward": 0.3234916229732335, "rewards/frontier_coverage_1": 0.33721864223480225, "rewards/frontier_coverage_10": 0.33721864223480225, "rewards/frontier_coverage_15": 0.33721864223480225, "rewards/frontier_coverage_20": 0.33721864223480225, "rewards/frontier_coverage_25": 0.33721864223480225, "rewards/frontier_coverage_5": 0.33721864223480225, "rewards/frontier_ece_reward": 0.32079982459545137, "signal/accuracy_reward/centered_abs_mean": 0.239251708984375, "signal/accuracy_reward/group_std_mean": 0.2890691041946411, "signal/accuracy_reward/group_zero_std_frac": 0.28125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1196258544921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1196258544921875, "signal/advantage_abs_mean": 0.3007605969905853, "signal/advantage_pre_scale_abs_mean": 0.3007605969905853, "signal/advantage_pre_scale_std": 0.3971730887889862, "signal/advantage_std": 0.3971730887889862, "signal/brier_reward/centered_abs_mean": 0.28599911630153657, "signal/brier_reward/group_std_mean": 0.34063884019851687, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03574988953769207, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.03574988953769207, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.21475785672664643, "signal/confidence_uniqueness_reward/group_std_mean": 0.2769153594970703, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026844732090830804, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.026844732090830804, "signal/format_reward/centered_abs_mean": 0.224554443359375, "signal/format_reward/group_std_mean": 0.31889126896858216, "signal/format_reward/group_zero_std_frac": 0.065625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1122772216796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1122772216796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.23223379356786608, "signal/frontier_aurc_reward/group_std_mean": 0.27599835190922023, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.004156984848668799, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.004156984848668799, "signal/frontier_coverage_1/centered_abs_mean": 0.25027269423007964, "signal/frontier_coverage_1/group_std_mean": 0.3035570979118347, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004479881143197417, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004479881143197417, "signal/frontier_coverage_10/centered_abs_mean": 0.25027269423007964, "signal/frontier_coverage_10/group_std_mean": 0.3035570979118347, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004479881143197417, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004479881143197417, "signal/frontier_coverage_15/centered_abs_mean": 0.25027269423007964, "signal/frontier_coverage_15/group_std_mean": 0.3035570979118347, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004479881143197417, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004479881143197417, "signal/frontier_coverage_20/centered_abs_mean": 0.25027269423007964, "signal/frontier_coverage_20/group_std_mean": 0.3035570979118347, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004479881143197417, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004479881143197417, "signal/frontier_coverage_25/centered_abs_mean": 0.25027269423007964, "signal/frontier_coverage_25/group_std_mean": 0.3035570979118347, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004479881143197417, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004479881143197417, "signal/frontier_coverage_5/centered_abs_mean": 0.25027269423007964, "signal/frontier_coverage_5/group_std_mean": 0.3035570979118347, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004479881143197417, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004479881143197417, "signal/frontier_ece_reward/centered_abs_mean": 0.25118278712034225, "signal/frontier_ece_reward/group_std_mean": 0.2993951976299286, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03139784839004278, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03139784839004278, "step": 15 }, { "calibration/aurc": 0.4613156372239649, "calibration/batch_distribution_entropy": 0.6885188701822018, "calibration/buffer_distribution_entropy": 0.6525163906375744, "calibration/confidence_entropy": 0.3612115527122598, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.33557730079398584, "calibration/mean_confidence": 0.7816876164808539, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01103515625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1425.8, "completions/mean_length": 162.32021484375, "completions/mean_terminated_length": 146.99488830566406, "completions/min_length": 1.8, "completions/min_terminated_length": 1.8, "epoch": 0.064, "grad_norm": 0.028669551014900208, "learning_rate": 1e-06, "loss": 0.0214, "num_tokens": 69250412.0, "reward": 0.8064048051834106, "reward_std": 0.26577826142311095, "rewards/accuracy_reward": 0.37060546875, "rewards/brier_reward": 0.5703646183013916, "rewards/confidence_uniqueness_reward": 0.6865696787834168, "rewards/format_reward": 0.92236328125, "rewards/frontier_aurc_reward": -0.006006188318133354, "rewards/frontier_coverage_1": 0.05366070494055748, "rewards/frontier_coverage_10": 0.05366070494055748, "rewards/frontier_coverage_15": 0.05366070494055748, "rewards/frontier_coverage_20": 0.05366070494055748, "rewards/frontier_coverage_25": 0.05366070494055748, "rewards/frontier_coverage_5": 0.05366070494055748, "rewards/frontier_ece_reward": -0.022815992310643195, "signal/accuracy_reward/centered_abs_mean": 0.232647705078125, "signal/accuracy_reward/group_std_mean": 0.2829224646091461, "signal/accuracy_reward/group_zero_std_frac": 0.290625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1163238525390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1163238525390625, "signal/advantage_abs_mean": 0.20600511133670807, "signal/advantage_pre_scale_abs_mean": 0.20600511133670807, "signal/advantage_pre_scale_std": 0.2877360999584198, "signal/advantage_std": 0.2877360999584198, "signal/brier_reward/centered_abs_mean": 0.2589739263057709, "signal/brier_reward/group_std_mean": 0.31490403413772583, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03237174078822136, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.03237174078822136, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.16161151528358458, "signal/confidence_uniqueness_reward/group_std_mean": 0.20550169944763183, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020201439410448073, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020201439410448073, "signal/format_reward/centered_abs_mean": 0.111956787109375, "signal/format_reward/group_std_mean": 0.17785735428333282, "signal/format_reward/group_zero_std_frac": 0.3625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0559783935546875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0559783935546875, "signal/frontier_aurc_reward/centered_abs_mean": 0.005545902531594038, "signal/frontier_aurc_reward/group_std_mean": 0.0080027237534523, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.927165228873491e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.927165228873491e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.09711904674768448, "signal/frontier_coverage_1/group_std_mean": 0.15192094445228577, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017384308390319347, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017384308390319347, "signal/frontier_coverage_10/centered_abs_mean": 0.09711904674768448, "signal/frontier_coverage_10/group_std_mean": 0.15192094445228577, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017384308390319347, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017384308390319347, "signal/frontier_coverage_15/centered_abs_mean": 0.09711904674768448, "signal/frontier_coverage_15/group_std_mean": 0.15192094445228577, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017384308390319347, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017384308390319347, "signal/frontier_coverage_20/centered_abs_mean": 0.09711904674768448, "signal/frontier_coverage_20/group_std_mean": 0.15192094445228577, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017384308390319347, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017384308390319347, "signal/frontier_coverage_25/centered_abs_mean": 0.09711904674768448, "signal/frontier_coverage_25/group_std_mean": 0.15192094445228577, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017384308390319347, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017384308390319347, "signal/frontier_coverage_5/centered_abs_mean": 0.09711904674768448, "signal/frontier_coverage_5/group_std_mean": 0.15192094445228577, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017384308390319347, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017384308390319347, "signal/frontier_ece_reward/centered_abs_mean": 0.1027738630771637, "signal/frontier_ece_reward/group_std_mean": 0.12494452595710755, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.012846732884645462, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.012846732884645462, "step": 20 }, { "calibration/aurc": 0.5838333717990067, "calibration/batch_distribution_entropy": 0.7631321019020263, "calibration/buffer_distribution_entropy": 0.6752091827121237, "calibration/confidence_entropy": 0.427675346192575, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3880188298739684, "calibration/mean_confidence": 0.743169744444465, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0072265625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1216.2, "completions/mean_length": 133.6181640625, "completions/mean_terminated_length": 123.41706695556641, "completions/min_length": 1.6, "completions/min_terminated_length": 1.6, "epoch": 0.08, "grad_norm": 0.10266012698411942, "learning_rate": 1e-06, "loss": 0.0038, "num_tokens": 85551814.0, "reward": 0.86251140832901, "reward_std": 0.2213844656944275, "rewards/accuracy_reward": 0.41123046875, "rewards/brier_reward": 0.6216847538948059, "rewards/confidence_uniqueness_reward": 0.7656373262405396, "rewards/format_reward": 0.96044921875, "rewards/frontier_aurc_reward": -0.005164883844554424, "rewards/frontier_coverage_1": 0.04586975798010826, "rewards/frontier_coverage_10": 0.04586975798010826, "rewards/frontier_coverage_15": 0.04586975798010826, "rewards/frontier_coverage_20": 0.04586975798010826, "rewards/frontier_coverage_25": 0.04586975798010826, "rewards/frontier_coverage_5": 0.04586975798010826, "rewards/frontier_ece_reward": -0.01262117656879127, "signal/accuracy_reward/centered_abs_mean": 0.220159912109375, "signal/accuracy_reward/group_std_mean": 0.27431103587150574, "signal/accuracy_reward/group_zero_std_frac": 0.284375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1100799560546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1100799560546875, "signal/advantage_abs_mean": 0.1713700234889984, "signal/advantage_pre_scale_abs_mean": 0.1713700234889984, "signal/advantage_pre_scale_std": 0.2442230075597763, "signal/advantage_std": 0.2442230075597763, "signal/brier_reward/centered_abs_mean": 0.23227280676364898, "signal/brier_reward/group_std_mean": 0.28580942153930666, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029034100845456122, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.029034100845456122, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.11165157109498977, "signal/confidence_uniqueness_reward/group_std_mean": 0.14749074429273606, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013956446386873721, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013956446386873721, "signal/format_reward/centered_abs_mean": 0.058660888671875, "signal/format_reward/group_std_mean": 0.0945195160806179, "signal/format_reward/group_zero_std_frac": 0.65, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0293304443359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0293304443359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.004352754168212414, "signal/frontier_aurc_reward/group_std_mean": 0.006485749594867229, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.79142945248168e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.79142945248168e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.11262711882591248, "signal/frontier_coverage_1/group_std_mean": 0.171766459941864, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020160253159701826, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020160253159701826, "signal/frontier_coverage_10/centered_abs_mean": 0.11262711882591248, "signal/frontier_coverage_10/group_std_mean": 0.171766459941864, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020160253159701826, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020160253159701826, "signal/frontier_coverage_15/centered_abs_mean": 0.11262711882591248, "signal/frontier_coverage_15/group_std_mean": 0.171766459941864, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020160253159701826, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020160253159701826, "signal/frontier_coverage_20/centered_abs_mean": 0.11262711882591248, "signal/frontier_coverage_20/group_std_mean": 0.171766459941864, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020160253159701826, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020160253159701826, "signal/frontier_coverage_25/centered_abs_mean": 0.11262711882591248, "signal/frontier_coverage_25/group_std_mean": 0.171766459941864, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020160253159701826, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020160253159701826, "signal/frontier_coverage_5/centered_abs_mean": 0.11262711882591248, "signal/frontier_coverage_5/group_std_mean": 0.171766459941864, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020160253159701826, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020160253159701826, "signal/frontier_ece_reward/centered_abs_mean": 0.09184739738702774, "signal/frontier_ece_reward/group_std_mean": 0.11263370960950851, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.011480924673378468, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.011480924673378468, "step": 25 }, { "calibration/aurc": 0.5599529783433583, "calibration/batch_distribution_entropy": 0.8180527560081053, "calibration/buffer_distribution_entropy": 0.7099110970064041, "calibration/confidence_entropy": 0.5050432578890813, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.2930707046300085, "calibration/mean_confidence": 0.6695370928113038, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0107421875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1365.6, "completions/mean_length": 139.62607421875, "completions/mean_terminated_length": 124.46354522705079, "completions/min_length": 1.0, "completions/min_terminated_length": 1.0, "epoch": 0.096, "grad_norm": 0.3033556342124939, "learning_rate": 1e-06, "loss": 0.0144, "num_tokens": 102026193.0, "reward": 0.8633492946624756, "reward_std": 0.22220987677574158, "rewards/accuracy_reward": 0.414453125, "rewards/brier_reward": 0.6465227484703064, "rewards/confidence_uniqueness_reward": 0.7718554854393005, "rewards/format_reward": 0.9494140625, "rewards/frontier_aurc_reward": -0.004374950844794512, "rewards/frontier_coverage_1": 0.04679640345275402, "rewards/frontier_coverage_10": 0.04679640345275402, "rewards/frontier_coverage_15": 0.04679640345275402, "rewards/frontier_coverage_20": 0.04679640345275402, "rewards/frontier_coverage_25": 0.04679640345275402, "rewards/frontier_coverage_5": 0.04679640345275402, "rewards/frontier_ece_reward": -0.006633454142138362, "signal/accuracy_reward/centered_abs_mean": 0.2164794921875, "signal/accuracy_reward/group_std_mean": 0.2656884342432022, "signal/accuracy_reward/group_zero_std_frac": 0.31875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10823974609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10823974609375, "signal/advantage_abs_mean": 0.1734051823616028, "signal/advantage_pre_scale_abs_mean": 0.1734051823616028, "signal/advantage_pre_scale_std": 0.25319576263427734, "signal/advantage_std": 0.25319576263427734, "signal/brier_reward/centered_abs_mean": 0.22392457127571105, "signal/brier_reward/group_std_mean": 0.27409825921058656, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027990571409463882, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.027990571409463882, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1212245300412178, "signal/confidence_uniqueness_reward/group_std_mean": 0.1597886711359024, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015153066255152225, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015153066255152225, "signal/format_reward/centered_abs_mean": 0.076416015625, "signal/format_reward/group_std_mean": 0.11220613121986389, "signal/format_reward/group_zero_std_frac": 0.6375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0382080078125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0382080078125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0030647643376141786, "signal/frontier_aurc_reward/group_std_mean": 0.004675971809774637, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.4859279043739664e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.4859279043739664e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13435963690280914, "signal/frontier_coverage_1/group_std_mean": 0.19624820053577424, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024050374049693346, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024050374049693346, "signal/frontier_coverage_10/centered_abs_mean": 0.13435963690280914, "signal/frontier_coverage_10/group_std_mean": 0.19624820053577424, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024050374049693346, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024050374049693346, "signal/frontier_coverage_15/centered_abs_mean": 0.13435963690280914, "signal/frontier_coverage_15/group_std_mean": 0.19624820053577424, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024050374049693346, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024050374049693346, "signal/frontier_coverage_20/centered_abs_mean": 0.13435963690280914, "signal/frontier_coverage_20/group_std_mean": 0.19624820053577424, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024050374049693346, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024050374049693346, "signal/frontier_coverage_25/centered_abs_mean": 0.13435963690280914, "signal/frontier_coverage_25/group_std_mean": 0.19624820053577424, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024050374049693346, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024050374049693346, "signal/frontier_coverage_5/centered_abs_mean": 0.13435963690280914, "signal/frontier_coverage_5/group_std_mean": 0.19624820053577424, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024050374049693346, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024050374049693346, "signal/frontier_ece_reward/centered_abs_mean": 0.08245499283075333, "signal/frontier_ece_reward/group_std_mean": 0.10052948445081711, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.010306874103844166, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.010306874103844166, "step": 30 }, { "calibration/aurc": 0.43093254462035047, "calibration/batch_distribution_entropy": 0.8765791511807105, "calibration/buffer_distribution_entropy": 0.7493890212367624, "calibration/confidence_entropy": 0.5332302085205971, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.12442684122316035, "calibration/coverage@5%": 0.0, "calibration/ece": 0.18847028436719626, "calibration/mean_confidence": 0.6052631008223052, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009765625, "completions/max_length": 1536.0, "completions/max_terminated_length": 974.0, "completions/mean_length": 140.3001953125, "completions/mean_terminated_length": 126.54303894042968, "completions/min_length": 1.0, "completions/min_terminated_length": 1.0, "epoch": 0.112, "grad_norm": 0.0450531542301178, "learning_rate": 1e-06, "loss": 0.0177, "num_tokens": 118572339.0, "reward": 0.9136639595031738, "reward_std": 0.17941873669624328, "rewards/accuracy_reward": 0.4568359375, "rewards/brier_reward": 0.7062686562538147, "rewards/confidence_uniqueness_reward": 0.8257040023803711, "rewards/format_reward": 0.97333984375, "rewards/frontier_aurc_reward": -0.0037020944990217687, "rewards/frontier_coverage_1": 0.05813024044036865, "rewards/frontier_coverage_10": 0.05813024044036865, "rewards/frontier_coverage_15": 0.05813024044036865, "rewards/frontier_coverage_20": 0.05813024044036865, "rewards/frontier_coverage_25": 0.05813024044036865, "rewards/frontier_coverage_5": 0.05813024044036865, "rewards/frontier_ece_reward": 0.007220498844981193, "signal/accuracy_reward/centered_abs_mean": 0.19686279296875, "signal/accuracy_reward/group_std_mean": 0.2502582728862762, "signal/accuracy_reward/group_zero_std_frac": 0.325, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.098431396484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.098431396484375, "signal/advantage_abs_mean": 0.13896718621253967, "signal/advantage_pre_scale_abs_mean": 0.13896718621253967, "signal/advantage_pre_scale_std": 0.20438657999038695, "signal/advantage_std": 0.20438657999038695, "signal/brier_reward/centered_abs_mean": 0.1961173176765442, "signal/brier_reward/group_std_mean": 0.24390378594398499, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024514664709568024, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.024514664709568024, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09459523856639862, "signal/confidence_uniqueness_reward/group_std_mean": 0.12374730557203292, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011824404820799828, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011824404820799828, "signal/format_reward/centered_abs_mean": 0.039300537109375, "signal/format_reward/group_std_mean": 0.0625513531267643, "signal/format_reward/group_zero_std_frac": 0.778125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0196502685546875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0196502685546875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022663983050733804, "signal/frontier_aurc_reward/group_std_mean": 0.0036506312899291515, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.056852849316783e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.056852849316783e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18296231627464293, "signal/frontier_coverage_1/group_std_mean": 0.24793100357055664, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032750254031270742, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032750254031270742, "signal/frontier_coverage_10/centered_abs_mean": 0.18296231627464293, "signal/frontier_coverage_10/group_std_mean": 0.24793100357055664, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032750254031270742, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032750254031270742, "signal/frontier_coverage_15/centered_abs_mean": 0.18296231627464293, "signal/frontier_coverage_15/group_std_mean": 0.24793100357055664, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032750254031270742, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032750254031270742, "signal/frontier_coverage_20/centered_abs_mean": 0.18296231627464293, "signal/frontier_coverage_20/group_std_mean": 0.24793100357055664, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032750254031270742, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032750254031270742, "signal/frontier_coverage_25/centered_abs_mean": 0.18296231627464293, "signal/frontier_coverage_25/group_std_mean": 0.24793100357055664, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032750254031270742, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032750254031270742, "signal/frontier_coverage_5/centered_abs_mean": 0.18296231627464293, "signal/frontier_coverage_5/group_std_mean": 0.24793100357055664, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032750254031270742, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032750254031270742, "signal/frontier_ece_reward/centered_abs_mean": 0.06804275140166283, "signal/frontier_ece_reward/group_std_mean": 0.08398929536342621, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008505343925207853, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008505343925207853, "step": 35 }, { "calibration/aurc": 0.4637473590255504, "calibration/batch_distribution_entropy": 0.9011286403654379, "calibration/buffer_distribution_entropy": 0.7961353907846576, "calibration/confidence_entropy": 0.5532398388702255, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.01019607843137255, "calibration/coverage@20%": 0.02392156862745098, "calibration/coverage@25%": 0.03686274509803922, "calibration/coverage@30%": 0.054509803921568636, "calibration/coverage@5%": 0.0, "calibration/ece": 0.13673163536509028, "calibration/mean_confidence": 0.485751228767752, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00703125, "completions/max_length": 1536.0, "completions/max_terminated_length": 953.2, "completions/mean_length": 145.0564453125, "completions/mean_terminated_length": 135.19951477050782, "completions/min_length": 31.6, "completions/min_terminated_length": 31.6, "epoch": 0.128, "grad_norm": 0.017669327557086945, "learning_rate": 1e-06, "loss": 0.0129, "num_tokens": 134974389.0, "reward": 0.923030960559845, "reward_std": 0.1417643427848816, "rewards/accuracy_reward": 0.4427734375, "rewards/brier_reward": 0.7313620209693908, "rewards/confidence_uniqueness_reward": 0.8564581751823426, "rewards/format_reward": 0.9859375, "rewards/frontier_aurc_reward": -0.003509230772033334, "rewards/frontier_coverage_1": 0.08638581186532975, "rewards/frontier_coverage_10": 0.08638581186532975, "rewards/frontier_coverage_15": 0.08638581186532975, "rewards/frontier_coverage_20": 0.08638581186532975, "rewards/frontier_coverage_25": 0.08638581186532975, "rewards/frontier_coverage_5": 0.08638581186532975, "rewards/frontier_ece_reward": 0.007863593101501466, "signal/accuracy_reward/centered_abs_mean": 0.16705322265625, "signal/accuracy_reward/group_std_mean": 0.21422863900661468, "signal/accuracy_reward/group_zero_std_frac": 0.415625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.083526611328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.083526611328125, "signal/advantage_abs_mean": 0.11018433421850204, "signal/advantage_pre_scale_abs_mean": 0.11018433421850204, "signal/advantage_pre_scale_std": 0.16467654705047607, "signal/advantage_std": 0.16467654705047607, "signal/brier_reward/centered_abs_mean": 0.1866983711719513, "signal/brier_reward/group_std_mean": 0.23212920725345612, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02333729639649391, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02333729639649391, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0719639778137207, "signal/confidence_uniqueness_reward/group_std_mean": 0.09171251058578492, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008995497226715088, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008995497226715088, "signal/format_reward/centered_abs_mean": 0.01739501953125, "signal/format_reward/group_std_mean": 0.028746084496378898, "signal/format_reward/group_zero_std_frac": 0.89375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008697509765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008697509765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017647896660491825, "signal/frontier_aurc_reward/group_std_mean": 0.002844266314059496, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1589733771397734e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1589733771397734e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22426398992538452, "signal/frontier_coverage_1/group_std_mean": 0.2852416396141052, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004014325235038996, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004014325235038996, "signal/frontier_coverage_10/centered_abs_mean": 0.22426398992538452, "signal/frontier_coverage_10/group_std_mean": 0.2852416396141052, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004014325235038996, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004014325235038996, "signal/frontier_coverage_15/centered_abs_mean": 0.22426398992538452, "signal/frontier_coverage_15/group_std_mean": 0.2852416396141052, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004014325235038996, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004014325235038996, "signal/frontier_coverage_20/centered_abs_mean": 0.22426398992538452, "signal/frontier_coverage_20/group_std_mean": 0.2852416396141052, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004014325235038996, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004014325235038996, "signal/frontier_coverage_25/centered_abs_mean": 0.22426398992538452, "signal/frontier_coverage_25/group_std_mean": 0.2852416396141052, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004014325235038996, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004014325235038996, "signal/frontier_coverage_5/centered_abs_mean": 0.22426398992538452, "signal/frontier_coverage_5/group_std_mean": 0.2852416396141052, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004014325235038996, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004014325235038996, "signal/frontier_ece_reward/centered_abs_mean": 0.05270521864295006, "signal/frontier_ece_reward/group_std_mean": 0.0674271434545517, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006588152330368757, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006588152330368757, "step": 40 }, { "calibration/aurc": 0.27175397742115304, "calibration/batch_distribution_entropy": 0.9232081653181499, "calibration/buffer_distribution_entropy": 0.8419158033678219, "calibration/confidence_entropy": 0.5273831934401841, "calibration/coverage@0%": 0.012164204650829395, "calibration/coverage@1%": 0.012164204650829395, "calibration/coverage@10%": 0.07876753597900463, "calibration/coverage@15%": 0.19833483371483523, "calibration/coverage@20%": 0.3504713065890895, "calibration/coverage@25%": 0.46684294476954935, "calibration/coverage@30%": 0.606419362713756, "calibration/coverage@5%": 0.035723291128458054, "calibration/ece": 0.19987627269719255, "calibration/mean_confidence": 0.44355403988022324, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003515625, "completions/max_length": 1536.0, "completions/max_terminated_length": 870.0, "completions/mean_length": 143.75546875, "completions/mean_terminated_length": 138.84595947265626, "completions/min_length": 42.2, "completions/min_terminated_length": 42.2, "epoch": 0.144, "grad_norm": 0.054179396480321884, "learning_rate": 1e-06, "loss": 0.0089, "num_tokens": 151396877.0, "reward": 0.9702192068099975, "reward_std": 0.13579329252243041, "rewards/accuracy_reward": 0.54228515625, "rewards/brier_reward": 0.737048614025116, "rewards/confidence_uniqueness_reward": 0.863895833492279, "rewards/format_reward": 0.9849609375, "rewards/frontier_aurc_reward": -0.0027967089787125587, "rewards/frontier_coverage_1": 0.03856944553554058, "rewards/frontier_coverage_10": 0.03856944553554058, "rewards/frontier_coverage_15": 0.03856944553554058, "rewards/frontier_coverage_20": 0.03856944553554058, "rewards/frontier_coverage_25": 0.03856944553554058, "rewards/frontier_coverage_5": 0.03856944553554058, "rewards/frontier_ece_reward": 0.01908651553094387, "signal/accuracy_reward/centered_abs_mean": 0.164288330078125, "signal/accuracy_reward/group_std_mean": 0.21441585719585418, "signal/accuracy_reward/group_zero_std_frac": 0.403125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0821441650390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0821441650390625, "signal/advantage_abs_mean": 0.10403890758752823, "signal/advantage_pre_scale_abs_mean": 0.10403890758752823, "signal/advantage_pre_scale_std": 0.16093845069408416, "signal/advantage_std": 0.16093845069408416, "signal/brier_reward/centered_abs_mean": 0.18338664174079894, "signal/brier_reward/group_std_mean": 0.22795365154743194, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022923330217599867, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.022923330217599867, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06989959329366684, "signal/confidence_uniqueness_reward/group_std_mean": 0.09106495976448059, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008737449161708355, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008737449161708355, "signal/format_reward/centered_abs_mean": 0.0210205078125, "signal/format_reward/group_std_mean": 0.03324367478489876, "signal/format_reward/group_zero_std_frac": 0.88125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01051025390625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01051025390625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014914550818502903, "signal/frontier_aurc_reward/group_std_mean": 0.0024216063786298035, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6697046632762066e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6697046632762066e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2453473687171936, "signal/frontier_coverage_1/group_std_mean": 0.3083998620510101, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004391717724502087, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004391717724502087, "signal/frontier_coverage_10/centered_abs_mean": 0.2453473687171936, "signal/frontier_coverage_10/group_std_mean": 0.3083998620510101, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004391717724502087, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004391717724502087, "signal/frontier_coverage_15/centered_abs_mean": 0.2453473687171936, "signal/frontier_coverage_15/group_std_mean": 0.3083998620510101, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004391717724502087, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004391717724502087, "signal/frontier_coverage_20/centered_abs_mean": 0.2453473687171936, "signal/frontier_coverage_20/group_std_mean": 0.3083998620510101, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004391717724502087, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004391717724502087, "signal/frontier_coverage_25/centered_abs_mean": 0.2453473687171936, "signal/frontier_coverage_25/group_std_mean": 0.3083998620510101, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004391717724502087, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004391717724502087, "signal/frontier_coverage_5/centered_abs_mean": 0.2453473687171936, "signal/frontier_coverage_5/group_std_mean": 0.3083998620510101, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004391717724502087, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004391717724502087, "signal/frontier_ece_reward/centered_abs_mean": 0.042443787306547166, "signal/frontier_ece_reward/group_std_mean": 0.05635495781898499, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005305473413318396, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005305473413318396, "step": 45 }, { "calibration/aurc": 0.403125271003182, "calibration/batch_distribution_entropy": 0.9391103501505299, "calibration/buffer_distribution_entropy": 0.875463691913424, "calibration/confidence_entropy": 0.5222249702517436, "calibration/coverage@0%": 0.002834008097165992, "calibration/coverage@1%": 0.002834008097165992, "calibration/coverage@10%": 0.002834008097165992, "calibration/coverage@15%": 0.023523663269579782, "calibration/coverage@20%": 0.04461407067364151, "calibration/coverage@25%": 0.0984996427720886, "calibration/coverage@30%": 0.19978814939292627, "calibration/coverage@5%": 0.002834008097165992, "calibration/ece": 0.10212839080221536, "calibration/mean_confidence": 0.4431201023375003, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0041015625, "completions/max_length": 1536.0, "completions/max_terminated_length": 918.6, "completions/mean_length": 149.158203125, "completions/mean_terminated_length": 143.44497680664062, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.16, "grad_norm": 0.07966148853302002, "learning_rate": 1e-06, "loss": 0.0104, "num_tokens": 167945185.0, "reward": 0.9424496412277221, "reward_std": 0.14887651801109314, "rewards/accuracy_reward": 0.48271484375, "rewards/brier_reward": 0.7433403611183167, "rewards/confidence_uniqueness_reward": 0.8616537690162659, "rewards/format_reward": 0.978125, "rewards/frontier_aurc_reward": -0.0028730600606650114, "rewards/frontier_coverage_1": 0.08680228143930435, "rewards/frontier_coverage_10": 0.08680228143930435, "rewards/frontier_coverage_15": 0.08680228143930435, "rewards/frontier_coverage_20": 0.08680228143930435, "rewards/frontier_coverage_25": 0.08680228143930435, "rewards/frontier_coverage_5": 0.08680228143930435, "rewards/frontier_ece_reward": 0.01707436852157116, "signal/accuracy_reward/centered_abs_mean": 0.175335693359375, "signal/accuracy_reward/group_std_mean": 0.21820703744888306, "signal/accuracy_reward/group_zero_std_frac": 0.43125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0876678466796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0876678466796875, "signal/advantage_abs_mean": 0.11476020514965057, "signal/advantage_pre_scale_abs_mean": 0.11476020514965057, "signal/advantage_pre_scale_std": 0.17932912409305574, "signal/advantage_std": 0.17932912409305574, "signal/brier_reward/centered_abs_mean": 0.18417735695838927, "signal/brier_reward/group_std_mean": 0.2291133314371109, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02302216961979866, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02302216961979866, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07359530031681061, "signal/confidence_uniqueness_reward/group_std_mean": 0.10155714750289917, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009199412539601326, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009199412539601326, "signal/format_reward/centered_abs_mean": 0.03203125, "signal/format_reward/group_std_mean": 0.05196922719478607, "signal/format_reward/group_zero_std_frac": 0.809375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.016015625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.016015625, "signal/frontier_aurc_reward/centered_abs_mean": 0.001706640375778079, "signal/frontier_aurc_reward/group_std_mean": 0.002641899697482586, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0548862559953706e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0548862559953706e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.24131617248058318, "signal/frontier_coverage_1/group_std_mean": 0.30335493087768556, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004319559410214424, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004319559410214424, "signal/frontier_coverage_10/centered_abs_mean": 0.24131617248058318, "signal/frontier_coverage_10/group_std_mean": 0.30335493087768556, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004319559410214424, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004319559410214424, "signal/frontier_coverage_15/centered_abs_mean": 0.24131617248058318, "signal/frontier_coverage_15/group_std_mean": 0.30335493087768556, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004319559410214424, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004319559410214424, "signal/frontier_coverage_20/centered_abs_mean": 0.24131617248058318, "signal/frontier_coverage_20/group_std_mean": 0.30335493087768556, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004319559410214424, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004319559410214424, "signal/frontier_coverage_25/centered_abs_mean": 0.24131617248058318, "signal/frontier_coverage_25/group_std_mean": 0.30335493087768556, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004319559410214424, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004319559410214424, "signal/frontier_coverage_5/centered_abs_mean": 0.24131617248058318, "signal/frontier_coverage_5/group_std_mean": 0.30335493087768556, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004319559410214424, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004319559410214424, "signal/frontier_ece_reward/centered_abs_mean": 0.040847336500883104, "signal/frontier_ece_reward/group_std_mean": 0.05442367494106293, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005105917062610388, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005105917062610388, "step": 50 }, { "epoch": 0.16, "eval_calibration/aurc": 0.6254440846908731, "eval_calibration/batch_distribution_entropy": 0.8821632107470848, "eval_calibration/buffer_distribution_entropy": 0.8901940690847239, "eval_calibration/confidence_entropy": 0.520183064421665, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.0, "eval_calibration/coverage@20%": 0.041666666666666664, "eval_calibration/coverage@25%": 0.041666666666666664, "eval_calibration/coverage@30%": 0.058333333333333334, "eval_calibration/coverage@5%": 0.0, "eval_calibration/ece": 0.28160651881720433, "eval_calibration/mean_confidence": 0.46711323924731185, "eval_completions/clipped_ratio": 0.004108297413793094, "eval_completions/max_length": 939.5, "eval_completions/max_terminated_length": 341.0, "eval_completions/mean_length": 156.55832290649414, "eval_completions/mean_terminated_length": 150.87627792358398, "eval_completions/min_length": 69.0, "eval_completions/min_terminated_length": 69.0, "eval_loss": 0.0, "eval_num_tokens": 167945185.0, "eval_reward": 0.8764741569757462, "eval_reward_std": 0.2708371505141258, "eval_rewards/accuracy_reward": 0.353515625, "eval_rewards/brier_reward": 0.752171978354454, "eval_rewards/confidence_uniqueness_reward": 0.7996502369642258, "eval_rewards/format_reward": 0.96875, "eval_rewards/frontier_aurc_reward": -0.003503879823256284, "eval_rewards/frontier_coverage_1": 0.18414541706442833, "eval_rewards/frontier_coverage_10": 0.18414541706442833, "eval_rewards/frontier_coverage_15": 0.18414541706442833, "eval_rewards/frontier_coverage_20": 0.18414541706442833, "eval_rewards/frontier_coverage_25": 0.18414541706442833, "eval_rewards/frontier_coverage_5": 0.18414541706442833, "eval_rewards/frontier_ece_reward": 0.01319264032645151, "eval_runtime": 37.2237, "eval_samples_per_second": 13.432, "eval_signal/accuracy_reward/centered_abs_mean": 0.4461669921875, "eval_signal/accuracy_reward/group_std_mean": 0.47892439365386963, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22308349609375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22308349609375, "eval_signal/advantage_abs_mean": 0.21810520812869072, "eval_signal/advantage_pre_scale_abs_mean": 0.21810520812869072, "eval_signal/advantage_pre_scale_std": 0.2705560587346554, "eval_signal/advantage_std": 0.2705560587346554, "eval_signal/brier_reward/centered_abs_mean": 0.21390501782298088, "eval_signal/brier_reward/group_std_mean": 0.2747611552476883, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02673812722787261, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02673812722787261, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.1026211753487587, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.15684263966977596, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012827646918594837, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012827646918594837, "eval_signal/format_reward/centered_abs_mean": 0.058837890625, "eval_signal/format_reward/group_std_mean": 0.13523541949689388, "eval_signal/format_reward/group_zero_std_frac": 0.375, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0294189453125, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0294189453125, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002764371281955391, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0045166065683588386, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.948224341205787e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.948224341205787e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3214282989501953, "eval_signal/frontier_coverage_1/group_std_mean": 0.394027441740036, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005753566394560039, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005753566394560039, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3214282989501953, "eval_signal/frontier_coverage_10/group_std_mean": 0.394027441740036, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005753566394560039, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005753566394560039, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3214282989501953, "eval_signal/frontier_coverage_15/group_std_mean": 0.394027441740036, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005753566394560039, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005753566394560039, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.3214282989501953, "eval_signal/frontier_coverage_20/group_std_mean": 0.394027441740036, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005753566394560039, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005753566394560039, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.3214282989501953, "eval_signal/frontier_coverage_25/group_std_mean": 0.394027441740036, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005753566394560039, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005753566394560039, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3214282989501953, "eval_signal/frontier_coverage_5/group_std_mean": 0.394027441740036, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005753566394560039, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005753566394560039, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.049655829556286335, "eval_signal/frontier_ece_reward/group_std_mean": 0.07456529140472412, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006206978694535792, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006206978694535792, "eval_steps_per_second": 0.107, "step": 50 }, { "epoch": 0.16, "step": 50, "train_probe_calibration/aurc": 0.34953725528411633, "train_probe_calibration/batch_distribution_entropy": 0.9158039775330977, "train_probe_calibration/buffer_distribution_entropy": 0.891133958747135, "train_probe_calibration/confidence_entropy": 0.5133234399655757, "train_probe_calibration/coverage@0%": 0.11164314516129031, "train_probe_calibration/coverage@1%": 0.11164314516129031, "train_probe_calibration/coverage@10%": 0.11164314516129031, "train_probe_calibration/coverage@15%": 0.18220766129032256, "train_probe_calibration/coverage@20%": 0.2537802419354839, "train_probe_calibration/coverage@25%": 0.2850302419354839, "train_probe_calibration/coverage@30%": 0.3631552419354839, "train_probe_calibration/coverage@5%": 0.11164314516129031, "train_probe_calibration/ece": 0.1722202620967742, "train_probe_calibration/mean_confidence": 0.4648311491935484, "train_probe_completions/clipped_ratio": 0.008216594827586216, "train_probe_completions/max_length": 1455.25, "train_probe_completions/max_terminated_length": 755.0, "train_probe_completions/mean_length": 164.17631912231445, "train_probe_completions/mean_terminated_length": 152.7919807434082, "train_probe_completions/min_length": 71.0, "train_probe_completions/min_terminated_length": 71.0, "train_probe_loss": 0.0, "train_probe_num_tokens": 167945185.0, "train_probe_reward": 0.9423353224992752, "train_probe_reward_std": 0.27149440348148346, "train_probe_rewards/accuracy_reward": 0.4921875, "train_probe_rewards/brier_reward": 0.7489275336265564, "train_probe_rewards/confidence_uniqueness_reward": 0.8245857506990433, "train_probe_rewards/format_reward": 0.974609375, "train_probe_rewards/frontier_aurc_reward": -0.0028590288711711764, "train_probe_rewards/frontier_coverage_1": 0.08961892500519753, "train_probe_rewards/frontier_coverage_10": 0.08961892500519753, "train_probe_rewards/frontier_coverage_15": 0.08961892500519753, "train_probe_rewards/frontier_coverage_20": 0.08961892500519753, "train_probe_rewards/frontier_coverage_25": 0.08961892500519753, "train_probe_rewards/frontier_coverage_5": 0.08961892500519753, "train_probe_rewards/frontier_ece_reward": 0.02139047277159989, "train_probe_runtime": 54.1297, "train_probe_samples_per_second": 9.237, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.489990234375, "train_probe_signal/accuracy_reward/group_std_mean": 0.5028149038553238, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2449951171875, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.2449951171875, "train_probe_signal/advantage_abs_mean": 0.23267249390482903, "train_probe_signal/advantage_pre_scale_abs_mean": 0.23267249390482903, "train_probe_signal/advantage_pre_scale_std": 0.27059199661016464, "train_probe_signal/advantage_std": 0.27059199661016464, "train_probe_signal/brier_reward/centered_abs_mean": 0.2169180065393448, "train_probe_signal/brier_reward/group_std_mean": 0.2719731330871582, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0271147508174181, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0271147508174181, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.08825866505503654, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.14233380556106567, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011032333131879568, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011032333131879568, "train_probe_signal/format_reward/centered_abs_mean": 0.0484619140625, "train_probe_signal/format_reward/group_std_mean": 0.1234525553882122, "train_probe_signal/format_reward/group_zero_std_frac": 0.375, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.02423095703125, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.02423095703125, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.002652477065566927, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0041865811217576265, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.747933689941419e-05, "train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.747933689941419e-05, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3308027759194374, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.424383707344532, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005921369651332498, "train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005921369651332498, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.3308027759194374, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.424383707344532, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005921369651332498, "train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005921369651332498, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.3308027759194374, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.424383707344532, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005921369651332498, "train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005921369651332498, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.3308027759194374, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.424383707344532, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005921369651332498, "train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005921369651332498, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.3308027759194374, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.424383707344532, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005921369651332498, "train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005921369651332498, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.3308027759194374, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.424383707344532, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005921369651332498, "train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005921369651332498, "train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.051902798004448414, "train_probe_signal/frontier_ece_reward/group_std_mean": 0.07279590144753456, "train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006487849750556052, "train_probe_signal/frontier_ece_reward/weight": 0.125, "train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006487849750556052, "train_probe_steps_per_second": 0.074 }, { "calibration/aurc": 0.3712372844316617, "calibration/batch_distribution_entropy": 0.9609159777516597, "calibration/buffer_distribution_entropy": 0.8971900490580224, "calibration/confidence_entropy": 0.49239175054637546, "calibration/coverage@0%": 0.002510460251046025, "calibration/coverage@1%": 0.002510460251046025, "calibration/coverage@10%": 0.002510460251046025, "calibration/coverage@15%": 0.002510460251046025, "calibration/coverage@20%": 0.002510460251046025, "calibration/coverage@25%": 0.04713520670135028, "calibration/coverage@30%": 0.26879748917877694, "calibration/coverage@5%": 0.002510460251046025, "calibration/ece": 0.16573751140661525, "calibration/mean_confidence": 0.5205409540211041, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006640625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1219.2, "completions/mean_length": 160.66025390625, "completions/mean_terminated_length": 151.46548461914062, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.176, "grad_norm": 0.06872954219579697, "learning_rate": 1e-06, "loss": 0.0122, "num_tokens": 184827466.0, "reward": 0.9402350187301636, "reward_std": 0.14623880088329316, "rewards/accuracy_reward": 0.48759765625, "rewards/brier_reward": 0.7364905476570129, "rewards/confidence_uniqueness_reward": 0.8593945026397705, "rewards/format_reward": 0.9693359375, "rewards/frontier_aurc_reward": -0.0029026484582573174, "rewards/frontier_coverage_1": 0.09159794300794602, "rewards/frontier_coverage_10": 0.09159794300794602, "rewards/frontier_coverage_15": 0.09159794300794602, "rewards/frontier_coverage_20": 0.09159794300794602, "rewards/frontier_coverage_25": 0.09159794300794602, "rewards/frontier_coverage_5": 0.09159794300794602, "rewards/frontier_ece_reward": 0.019975333102047445, "signal/accuracy_reward/centered_abs_mean": 0.145709228515625, "signal/accuracy_reward/group_std_mean": 0.19187160730361938, "signal/accuracy_reward/group_zero_std_frac": 0.453125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0728546142578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0728546142578125, "signal/advantage_abs_mean": 0.1101900115609169, "signal/advantage_pre_scale_abs_mean": 0.1101900115609169, "signal/advantage_pre_scale_std": 0.17681396007537842, "signal/advantage_std": 0.17681396007537842, "signal/brier_reward/centered_abs_mean": 0.19099677503108978, "signal/brier_reward/group_std_mean": 0.23669040203094482, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023874596878886222, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.023874596878886222, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07248903661966324, "signal/confidence_uniqueness_reward/group_std_mean": 0.09891549348831177, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009061129577457906, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009061129577457906, "signal/format_reward/centered_abs_mean": 0.0377197265625, "signal/format_reward/group_std_mean": 0.05772824138402939, "signal/format_reward/group_zero_std_frac": 0.8, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01885986328125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01885986328125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021433203713968397, "signal/frontier_aurc_reward/group_std_mean": 0.0032778474967926742, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.8365434011211616e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.8365434011211616e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22156096398830413, "signal/frontier_coverage_1/group_std_mean": 0.282322096824646, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003965941350907087, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003965941350907087, "signal/frontier_coverage_10/centered_abs_mean": 0.22156096398830413, "signal/frontier_coverage_10/group_std_mean": 0.282322096824646, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003965941350907087, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003965941350907087, "signal/frontier_coverage_15/centered_abs_mean": 0.22156096398830413, "signal/frontier_coverage_15/group_std_mean": 0.282322096824646, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003965941350907087, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003965941350907087, "signal/frontier_coverage_20/centered_abs_mean": 0.22156096398830413, "signal/frontier_coverage_20/group_std_mean": 0.282322096824646, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003965941350907087, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003965941350907087, "signal/frontier_coverage_25/centered_abs_mean": 0.22156096398830413, "signal/frontier_coverage_25/group_std_mean": 0.282322096824646, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003965941350907087, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003965941350907087, "signal/frontier_coverage_5/centered_abs_mean": 0.22156096398830413, "signal/frontier_coverage_5/group_std_mean": 0.282322096824646, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003965941350907087, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003965941350907087, "signal/frontier_ece_reward/centered_abs_mean": 0.04264579936861992, "signal/frontier_ece_reward/group_std_mean": 0.05547093003988266, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00533072492107749, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00533072492107749, "step": 55 }, { "calibration/aurc": 0.35738801713319707, "calibration/batch_distribution_entropy": 0.9222789410563648, "calibration/buffer_distribution_entropy": 0.9062810656530844, "calibration/confidence_entropy": 0.4314712566952025, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.004733727810650888, "calibration/coverage@15%": 0.010794333871256948, "calibration/coverage@20%": 0.08998625306317615, "calibration/coverage@25%": 0.2524840938888444, "calibration/coverage@30%": 0.3501611604120648, "calibration/coverage@5%": 0.0, "calibration/ece": 0.129347005564316, "calibration/mean_confidence": 0.5669145772975883, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0068359375, "completions/max_length": 1536.0, "completions/max_terminated_length": 990.2, "completions/mean_length": 165.06435546875, "completions/mean_terminated_length": 155.6231201171875, "completions/min_length": 49.4, "completions/min_terminated_length": 49.4, "epoch": 0.192, "grad_norm": 0.06620907038450241, "learning_rate": 1e-06, "loss": 0.013, "num_tokens": 201332541.0, "reward": 0.9339034557342529, "reward_std": 0.15482064783573152, "rewards/accuracy_reward": 0.4763671875, "rewards/brier_reward": 0.7338881492614746, "rewards/confidence_uniqueness_reward": 0.8472534418106079, "rewards/format_reward": 0.966796875, "rewards/frontier_aurc_reward": -0.0033207187429070474, "rewards/frontier_coverage_1": 0.11109301298856736, "rewards/frontier_coverage_10": 0.11109301298856736, "rewards/frontier_coverage_15": 0.11109301298856736, "rewards/frontier_coverage_20": 0.11109301298856736, "rewards/frontier_coverage_25": 0.11109301298856736, "rewards/frontier_coverage_5": 0.11109301298856736, "rewards/frontier_ece_reward": 0.022454247623682023, "signal/accuracy_reward/centered_abs_mean": 0.14830322265625, "signal/accuracy_reward/group_std_mean": 0.1960592031478882, "signal/accuracy_reward/group_zero_std_frac": 0.44375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.074151611328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.074151611328125, "signal/advantage_abs_mean": 0.11740224063396454, "signal/advantage_pre_scale_abs_mean": 0.11740224063396454, "signal/advantage_pre_scale_std": 0.19125163555145264, "signal/advantage_std": 0.19125163555145264, "signal/brier_reward/centered_abs_mean": 0.19765847623348237, "signal/brier_reward/group_std_mean": 0.24587923288345337, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024707309529185296, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.024707309529185296, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08868281245231628, "signal/confidence_uniqueness_reward/group_std_mean": 0.11674559116363525, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011085351556539535, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011085351556539535, "signal/format_reward/centered_abs_mean": 0.04295654296875, "signal/format_reward/group_std_mean": 0.06320370435714721, "signal/format_reward/group_zero_std_frac": 0.790625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.021478271484375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.021478271484375, "signal/frontier_aurc_reward/centered_abs_mean": 0.003104797238484025, "signal/frontier_aurc_reward/group_std_mean": 0.004736031871289015, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.557586846407503e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.557586846407503e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.20398018062114714, "signal/frontier_coverage_1/group_std_mean": 0.265421861410141, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036512451246380807, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036512451246380807, "signal/frontier_coverage_10/centered_abs_mean": 0.20398018062114714, "signal/frontier_coverage_10/group_std_mean": 0.265421861410141, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036512451246380807, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036512451246380807, "signal/frontier_coverage_15/centered_abs_mean": 0.20398018062114714, "signal/frontier_coverage_15/group_std_mean": 0.265421861410141, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036512451246380807, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036512451246380807, "signal/frontier_coverage_20/centered_abs_mean": 0.20398018062114714, "signal/frontier_coverage_20/group_std_mean": 0.265421861410141, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036512451246380807, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036512451246380807, "signal/frontier_coverage_25/centered_abs_mean": 0.20398018062114714, "signal/frontier_coverage_25/group_std_mean": 0.265421861410141, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036512451246380807, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036512451246380807, "signal/frontier_coverage_5/centered_abs_mean": 0.20398018062114714, "signal/frontier_coverage_5/group_std_mean": 0.265421861410141, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036512451246380807, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036512451246380807, "signal/frontier_ece_reward/centered_abs_mean": 0.0457615964114666, "signal/frontier_ece_reward/group_std_mean": 0.05886110737919807, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005720199551433325, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005720199551433325, "step": 60 }, { "calibration/aurc": 0.29643063095441663, "calibration/batch_distribution_entropy": 0.9171947927143869, "calibration/buffer_distribution_entropy": 0.9100448664327612, "calibration/confidence_entropy": 0.4175287827982107, "calibration/coverage@0%": 0.014481409001956946, "calibration/coverage@1%": 0.014481409001956946, "calibration/coverage@10%": 0.09575864838103554, "calibration/coverage@15%": 0.2081677205074029, "calibration/coverage@20%": 0.33050935136204285, "calibration/coverage@25%": 0.44360224553076566, "calibration/coverage@30%": 0.5479605902693373, "calibration/coverage@5%": 0.01643835616438356, "calibration/ece": 0.1376342119353494, "calibration/mean_confidence": 0.5811524191443811, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0072265625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1322.8, "completions/mean_length": 168.00712890625, "completions/mean_terminated_length": 158.03106689453125, "completions/min_length": 36.4, "completions/min_terminated_length": 36.4, "epoch": 0.208, "grad_norm": 0.31033796072006226, "learning_rate": 1e-06, "loss": 0.0181, "num_tokens": 218085158.0, "reward": 0.9355008006095886, "reward_std": 0.18306846916675568, "rewards/accuracy_reward": 0.4966796875, "rewards/brier_reward": 0.7259644269943237, "rewards/confidence_uniqueness_reward": 0.8441673517227173, "rewards/format_reward": 0.9564453125, "rewards/frontier_aurc_reward": -0.002846927708014846, "rewards/frontier_coverage_1": 0.09027891755104064, "rewards/frontier_coverage_10": 0.09027891755104064, "rewards/frontier_coverage_15": 0.09027891755104064, "rewards/frontier_coverage_20": 0.09027891755104064, "rewards/frontier_coverage_25": 0.09027891755104064, "rewards/frontier_coverage_5": 0.09027891755104064, "rewards/frontier_ece_reward": 0.02421446852385998, "signal/accuracy_reward/centered_abs_mean": 0.179248046875, "signal/accuracy_reward/group_std_mean": 0.22900831699371338, "signal/accuracy_reward/group_zero_std_frac": 0.375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0896240234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0896240234375, "signal/advantage_abs_mean": 0.1380708247423172, "signal/advantage_pre_scale_abs_mean": 0.1380708247423172, "signal/advantage_pre_scale_std": 0.22031235992908477, "signal/advantage_std": 0.22031235992908477, "signal/brier_reward/centered_abs_mean": 0.2126880943775177, "signal/brier_reward/group_std_mean": 0.2632372736930847, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026586011797189713, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.026586011797189713, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09807170182466507, "signal/confidence_uniqueness_reward/group_std_mean": 0.1343176171183586, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012258962728083134, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012258962728083134, "signal/format_reward/centered_abs_mean": 0.06129150390625, "signal/format_reward/group_std_mean": 0.09367451593279838, "signal/format_reward/group_zero_std_frac": 0.68125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.030645751953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.030645751953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029108581598848104, "signal/frontier_aurc_reward/group_std_mean": 0.004493788257241249, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.2104357746429744e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.2104357746429744e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.21561312973499297, "signal/frontier_coverage_1/group_std_mean": 0.28178144097328184, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003859474789351225, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003859474789351225, "signal/frontier_coverage_10/centered_abs_mean": 0.21561312973499297, "signal/frontier_coverage_10/group_std_mean": 0.28178144097328184, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003859474789351225, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003859474789351225, "signal/frontier_coverage_15/centered_abs_mean": 0.21561312973499297, "signal/frontier_coverage_15/group_std_mean": 0.28178144097328184, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003859474789351225, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003859474789351225, "signal/frontier_coverage_20/centered_abs_mean": 0.21561312973499297, "signal/frontier_coverage_20/group_std_mean": 0.28178144097328184, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003859474789351225, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003859474789351225, "signal/frontier_coverage_25/centered_abs_mean": 0.21561312973499297, "signal/frontier_coverage_25/group_std_mean": 0.28178144097328184, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003859474789351225, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003859474789351225, "signal/frontier_coverage_5/centered_abs_mean": 0.21561312973499297, "signal/frontier_coverage_5/group_std_mean": 0.28178144097328184, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003859474789351225, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003859474789351225, "signal/frontier_ece_reward/centered_abs_mean": 0.043842590600252154, "signal/frontier_ece_reward/group_std_mean": 0.056413907557725906, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005480323825031519, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005480323825031519, "step": 65 }, { "calibration/aurc": 0.46571920158011276, "calibration/batch_distribution_entropy": 0.9178738567052317, "calibration/buffer_distribution_entropy": 0.9163612020518315, "calibration/confidence_entropy": 0.3947332396271469, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.027000000000000003, "calibration/coverage@20%": 0.04491176470588236, "calibration/coverage@25%": 0.17707901232241774, "calibration/coverage@30%": 0.25417977729208274, "calibration/coverage@5%": 0.0, "calibration/ece": 0.19856830881468507, "calibration/mean_confidence": 0.40029018840683345, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.065234375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1464.2, "completions/mean_length": 242.76435546875, "completions/mean_terminated_length": 152.45449523925782, "completions/min_length": 36.2, "completions/min_terminated_length": 36.2, "epoch": 0.224, "grad_norm": 2.9691953659057617, "learning_rate": 1e-06, "loss": 0.0767, "num_tokens": 235724249.0, "reward": 0.6787481069564819, "reward_std": 0.34110564887523653, "rewards/accuracy_reward": 0.33427734375, "rewards/brier_reward": 0.5312727630138397, "rewards/confidence_uniqueness_reward": 0.6269549608230591, "rewards/format_reward": 0.7095703125, "rewards/frontier_aurc_reward": -0.0023390050046145916, "rewards/frontier_coverage_1": 0.09750215262174607, "rewards/frontier_coverage_10": 0.09750215262174607, "rewards/frontier_coverage_15": 0.09750215262174607, "rewards/frontier_coverage_20": 0.09750215262174607, "rewards/frontier_coverage_25": 0.09750215262174607, "rewards/frontier_coverage_5": 0.09750215262174607, "rewards/frontier_ece_reward": 0.012927726469933986, "signal/accuracy_reward/centered_abs_mean": 0.188677978515625, "signal/accuracy_reward/group_std_mean": 0.23931180834770202, "signal/accuracy_reward/group_zero_std_frac": 0.359375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0943389892578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0943389892578125, "signal/advantage_abs_mean": 0.2798399984836578, "signal/advantage_pre_scale_abs_mean": 0.2798399984836578, "signal/advantage_pre_scale_std": 0.3601413905620575, "signal/advantage_std": 0.3601413905620575, "signal/brier_reward/centered_abs_mean": 0.29879134297370913, "signal/brier_reward/group_std_mean": 0.3516451418399811, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03734891787171364, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.03734891787171364, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2545173615217209, "signal/confidence_uniqueness_reward/group_std_mean": 0.3105557501316071, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03181467019021511, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03181467019021511, "signal/format_reward/centered_abs_mean": 0.27607421875, "signal/format_reward/group_std_mean": 0.3350002527236938, "signal/format_reward/group_zero_std_frac": 0.159375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.138037109375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.138037109375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002703424310311675, "signal/frontier_aurc_reward/group_std_mean": 0.004504935536533594, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.839129323954694e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.839129323954694e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2409254640340805, "signal/frontier_coverage_1/group_std_mean": 0.3174335896968842, "signal/frontier_coverage_1/group_zero_std_frac": 0.003125, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004312565550208092, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004312565550208092, "signal/frontier_coverage_10/centered_abs_mean": 0.2409254640340805, "signal/frontier_coverage_10/group_std_mean": 0.3174335896968842, "signal/frontier_coverage_10/group_zero_std_frac": 0.003125, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004312565550208092, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004312565550208092, "signal/frontier_coverage_15/centered_abs_mean": 0.2409254640340805, "signal/frontier_coverage_15/group_std_mean": 0.3174335896968842, "signal/frontier_coverage_15/group_zero_std_frac": 0.003125, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004312565550208092, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004312565550208092, "signal/frontier_coverage_20/centered_abs_mean": 0.2409254640340805, "signal/frontier_coverage_20/group_std_mean": 0.3174335896968842, "signal/frontier_coverage_20/group_zero_std_frac": 0.003125, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004312565550208092, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004312565550208092, "signal/frontier_coverage_25/centered_abs_mean": 0.2409254640340805, "signal/frontier_coverage_25/group_std_mean": 0.3174335896968842, "signal/frontier_coverage_25/group_zero_std_frac": 0.003125, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004312565550208092, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004312565550208092, "signal/frontier_coverage_5/centered_abs_mean": 0.2409254640340805, "signal/frontier_coverage_5/group_std_mean": 0.3174335896968842, "signal/frontier_coverage_5/group_zero_std_frac": 0.003125, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004312565550208092, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004312565550208092, "signal/frontier_ece_reward/centered_abs_mean": 0.0325216319411993, "signal/frontier_ece_reward/group_std_mean": 0.044718362390995026, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004065203992649913, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004065203992649913, "step": 70 }, { "calibration/aurc": 0.6752260460289098, "calibration/batch_distribution_entropy": 0.8585007362093007, "calibration/buffer_distribution_entropy": 0.921174766327918, "calibration/confidence_entropy": 0.3302784425160673, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3135163470474769, "calibration/mean_confidence": 0.4402562737874036, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.762109375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1532.2, "completions/mean_length": 1249.8326171875, "completions/mean_terminated_length": 387.5689727783203, "completions/min_length": 3.6, "completions/min_terminated_length": 3.6, "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0177, "num_tokens": 263774215.0, "reward": 0.0329528481233865, "reward_std": 0.07450879570096731, "rewards/accuracy_reward": 0.01083984375, "rewards/brier_reward": 0.02677628120291047, "rewards/confidence_uniqueness_reward": 0.03159494288265705, "rewards/format_reward": 0.03857421875, "rewards/frontier_aurc_reward": -0.0002577310428023338, "rewards/frontier_coverage_1": 0.009148352436022833, "rewards/frontier_coverage_10": 0.009148352436022833, "rewards/frontier_coverage_15": 0.009148352436022833, "rewards/frontier_coverage_20": 0.009148352436022833, "rewards/frontier_coverage_25": 0.009148352436022833, "rewards/frontier_coverage_5": 0.009148352436022833, "rewards/frontier_ece_reward": -0.00022805376793257892, "signal/accuracy_reward/centered_abs_mean": 0.017510986328125, "signal/accuracy_reward/group_std_mean": 0.02862224280834198, "signal/accuracy_reward/group_zero_std_frac": 0.896875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0087554931640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0087554931640625, "signal/advantage_abs_mean": 0.04833462685346603, "signal/advantage_pre_scale_abs_mean": 0.04833462685346603, "signal/advantage_pre_scale_std": 0.10756354965269566, "signal/advantage_std": 0.10756354965269566, "signal/brier_reward/centered_abs_mean": 0.04109984996030107, "signal/brier_reward/group_std_mean": 0.0659692483022809, "signal/brier_reward/group_zero_std_frac": 0.71875, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005137481245037634, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.005137481245037634, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04622841775417328, "signal/confidence_uniqueness_reward/group_std_mean": 0.07011332884430885, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.7125, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00577855221927166, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00577855221927166, "signal/format_reward/centered_abs_mean": 0.056903076171875, "signal/format_reward/group_std_mean": 0.0884034713730216, "signal/format_reward/group_zero_std_frac": 0.69375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0284515380859375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0284515380859375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0004303819587221369, "signal/frontier_aurc_reward/group_std_mean": 0.0009567889268510043, "signal/frontier_aurc_reward/group_zero_std_frac": 0.69375, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.70383680901432e-06, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.70383680901432e-06, "signal/frontier_coverage_1/centered_abs_mean": 0.026687611715169625, "signal/frontier_coverage_1/group_std_mean": 0.04987622057087719, "signal/frontier_coverage_1/group_zero_std_frac": 0.7125, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00047770824676263146, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00047770824676263146, "signal/frontier_coverage_10/centered_abs_mean": 0.026687611715169625, "signal/frontier_coverage_10/group_std_mean": 0.04987622057087719, "signal/frontier_coverage_10/group_zero_std_frac": 0.7125, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00047770824676263146, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00047770824676263146, "signal/frontier_coverage_15/centered_abs_mean": 0.026687611715169625, "signal/frontier_coverage_15/group_std_mean": 0.04987622057087719, "signal/frontier_coverage_15/group_zero_std_frac": 0.7125, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00047770824676263146, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00047770824676263146, "signal/frontier_coverage_20/centered_abs_mean": 0.026687611715169625, "signal/frontier_coverage_20/group_std_mean": 0.04987622057087719, "signal/frontier_coverage_20/group_zero_std_frac": 0.7125, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00047770824676263146, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00047770824676263146, "signal/frontier_coverage_25/centered_abs_mean": 0.026687611715169625, "signal/frontier_coverage_25/group_std_mean": 0.04987622057087719, "signal/frontier_coverage_25/group_zero_std_frac": 0.7125, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00047770824676263146, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00047770824676263146, "signal/frontier_coverage_5/centered_abs_mean": 0.026687611715169625, "signal/frontier_coverage_5/group_std_mean": 0.04987622057087719, "signal/frontier_coverage_5/group_zero_std_frac": 0.7125, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00047770824676263146, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00047770824676263146, "signal/frontier_ece_reward/centered_abs_mean": 0.00249036728637293, "signal/frontier_ece_reward/group_std_mean": 0.005515742604620755, "signal/frontier_ece_reward/group_zero_std_frac": 0.69375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00031129591079661625, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00031129591079661625, "step": 75 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.88017578125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1533.2, "completions/mean_length": 1385.08759765625, "completions/mean_terminated_length": 275.3055450439453, "completions/min_length": 2.2, "completions/min_terminated_length": 2.2, "epoch": 0.256, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 293012328.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 80 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.96201171875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1481.6, "completions/mean_length": 1489.7912109375, "completions/mean_terminated_length": 334.21091918945314, "completions/min_length": 2.8, "completions/min_terminated_length": 2.8, "epoch": 0.272, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 323233486.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 85 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.98466796875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1513.6, "completions/mean_length": 1520.5732421875, "completions/mean_terminated_length": 536.7734497070312, "completions/min_length": 9.4, "completions/min_terminated_length": 9.4, "epoch": 0.288, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 353762332.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 90 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.98818359375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1522.99775390625, "completions/mean_terminated_length": 435.09649353027345, "completions/min_length": 8.4, "completions/min_terminated_length": 8.4, "epoch": 0.304, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 384287781.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 95 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9904296875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1525.1625, "completions/mean_terminated_length": 416.81488647460935, "completions/min_length": 12.0, "completions/min_terminated_length": 12.0, "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 414994149.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 100 }, { "epoch": 0.32, "eval_completions/clipped_ratio": 0.998046875, "eval_completions/max_length": 1536.0, "eval_completions/max_terminated_length": 231.5, "eval_completions/mean_length": 1534.80859375, "eval_completions/mean_terminated_length": 231.5, "eval_completions/min_length": 1383.5, "eval_completions/min_terminated_length": 231.5, "eval_loss": 0.0, "eval_num_tokens": 414994149.0, "eval_reward": 0.0, "eval_reward_std": 0.0, "eval_rewards/accuracy_reward": 0.0, "eval_rewards/brier_reward": 0.0, "eval_rewards/confidence_uniqueness_reward": 0.0, "eval_rewards/format_reward": 0.0, "eval_rewards/frontier_aurc_reward": 0.0, "eval_rewards/frontier_coverage_1": 0.0, "eval_rewards/frontier_coverage_10": 0.0, "eval_rewards/frontier_coverage_15": 0.0, "eval_rewards/frontier_coverage_20": 0.0, "eval_rewards/frontier_coverage_25": 0.0, "eval_rewards/frontier_coverage_5": 0.0, "eval_rewards/frontier_ece_reward": 0.0, "eval_runtime": 74.8012, "eval_samples_per_second": 6.684, "eval_signal/accuracy_reward/centered_abs_mean": 0.0, "eval_signal/accuracy_reward/group_std_mean": 0.0, "eval_signal/accuracy_reward/group_zero_std_frac": 1.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "eval_signal/advantage_abs_mean": 0.0, "eval_signal/advantage_pre_scale_abs_mean": 0.0, "eval_signal/advantage_pre_scale_std": 0.0, "eval_signal/advantage_std": 0.0, "eval_signal/brier_reward/centered_abs_mean": 0.0, "eval_signal/brier_reward/group_std_mean": 0.0, "eval_signal/brier_reward/group_zero_std_frac": 1.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.0, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_1/group_std_mean": 0.0, "eval_signal/frontier_coverage_1/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_10/group_std_mean": 0.0, "eval_signal/frontier_coverage_10/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_15/group_std_mean": 0.0, "eval_signal/frontier_coverage_15/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_20/group_std_mean": 0.0, "eval_signal/frontier_coverage_20/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_25/group_std_mean": 0.0, "eval_signal/frontier_coverage_25/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_5/group_std_mean": 0.0, "eval_signal/frontier_coverage_5/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.0, "eval_signal/frontier_ece_reward/group_std_mean": 0.0, "eval_signal/frontier_ece_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.053, "step": 100 }, { "epoch": 0.32, "step": 100, "train_probe_completions/clipped_ratio": 0.994140625, "train_probe_completions/max_length": 1536.0, "train_probe_completions/max_terminated_length": 567.5, "train_probe_completions/mean_length": 1531.43359375, "train_probe_completions/mean_terminated_length": 567.5, "train_probe_completions/min_length": 951.5, "train_probe_completions/min_terminated_length": 567.5, "train_probe_loss": 0.0, "train_probe_num_tokens": 414994149.0, "train_probe_reward": 0.0, "train_probe_reward_std": 0.0, "train_probe_rewards/accuracy_reward": 0.0, "train_probe_rewards/brier_reward": 0.0, "train_probe_rewards/confidence_uniqueness_reward": 0.0, "train_probe_rewards/format_reward": 0.0, "train_probe_rewards/frontier_aurc_reward": 0.0, "train_probe_rewards/frontier_coverage_1": 0.0, "train_probe_rewards/frontier_coverage_10": 0.0, "train_probe_rewards/frontier_coverage_15": 0.0, "train_probe_rewards/frontier_coverage_20": 0.0, "train_probe_rewards/frontier_coverage_25": 0.0, "train_probe_rewards/frontier_coverage_5": 0.0, "train_probe_rewards/frontier_ece_reward": 0.0, "train_probe_runtime": 73.546, "train_probe_samples_per_second": 6.798, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.0, "train_probe_signal/accuracy_reward/group_std_mean": 0.0, "train_probe_signal/accuracy_reward/group_zero_std_frac": 1.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/advantage_abs_mean": 0.0, "train_probe_signal/advantage_pre_scale_abs_mean": 0.0, "train_probe_signal/advantage_pre_scale_std": 0.0, "train_probe_signal/advantage_std": 0.0, "train_probe_signal/brier_reward/centered_abs_mean": 0.0, "train_probe_signal/brier_reward/group_std_mean": 0.0, "train_probe_signal/brier_reward/group_zero_std_frac": 1.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.0, "train_probe_signal/frontier_ece_reward/group_std_mean": 0.0, "train_probe_signal/frontier_ece_reward/group_zero_std_frac": 1.0, "train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_ece_reward/weight": 0.125, "train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "train_probe_steps_per_second": 0.054 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99091796875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1526.96376953125, "completions/mean_terminated_length": 541.3440856933594, "completions/min_length": 21.6, "completions/min_terminated_length": 21.6, "epoch": 0.336, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 445352690.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.991015625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 1526.5474609375, "completions/mean_terminated_length": 485.99510498046874, "completions/min_length": 24.6, "completions/min_terminated_length": 24.6, "epoch": 0.352, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 476244952.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99072265625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1438.6, "completions/mean_length": 1526.39052734375, "completions/mean_terminated_length": 499.4287414550781, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.368, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 506940663.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9912109375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1377.2, "completions/mean_length": 1526.468359375, "completions/mean_terminated_length": 455.91710205078124, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "epoch": 0.384, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 537428211.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99150390625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1416.2, "completions/mean_length": 1527.345703125, "completions/mean_terminated_length": 484.37660522460936, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 568104679.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.98994140625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1400.4, "completions/mean_length": 1525.31591796875, "completions/mean_terminated_length": 484.96845703125, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "epoch": 0.416, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 598605098.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.990234375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1392.6, "completions/mean_length": 1525.57548828125, "completions/mean_terminated_length": 465.9883239746094, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "epoch": 0.432, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 629241327.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99169921875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 1527.14140625, "completions/mean_terminated_length": 479.5002502441406, "completions/min_length": 21.4, "completions/min_terminated_length": 21.4, "epoch": 0.448, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 659832055.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9908203125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1349.6, "completions/mean_length": 1526.08798828125, "completions/mean_terminated_length": 470.441455078125, "completions/min_length": 30.8, "completions/min_terminated_length": 30.8, "epoch": 0.464, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 690630012.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.992578125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1409.6, "completions/mean_length": 1527.8091796875, "completions/mean_terminated_length": 447.0835357666016, "completions/min_length": 19.8, "completions/min_terminated_length": 19.8, "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 721322810.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 150 }, { "epoch": 0.48, "eval_completions/clipped_ratio": 0.9876751077586207, "eval_completions/max_length": 1536.0, "eval_completions/max_terminated_length": 476.0, "eval_completions/mean_length": 1521.2329711914062, "eval_completions/mean_terminated_length": 361.83333587646484, "eval_completions/min_length": 303.0, "eval_completions/min_terminated_length": 303.0, "eval_loss": 0.0, "eval_num_tokens": 721322810.0, "eval_reward": 0.0, "eval_reward_std": 0.0, "eval_rewards/accuracy_reward": 0.0, "eval_rewards/brier_reward": 0.0, "eval_rewards/confidence_uniqueness_reward": 0.0, "eval_rewards/format_reward": 0.0, "eval_rewards/frontier_aurc_reward": 0.0, "eval_rewards/frontier_coverage_1": 0.0, "eval_rewards/frontier_coverage_10": 0.0, "eval_rewards/frontier_coverage_15": 0.0, "eval_rewards/frontier_coverage_20": 0.0, "eval_rewards/frontier_coverage_25": 0.0, "eval_rewards/frontier_coverage_5": 0.0, "eval_rewards/frontier_ece_reward": 0.0, "eval_runtime": 74.8404, "eval_samples_per_second": 6.681, "eval_signal/accuracy_reward/centered_abs_mean": 0.0, "eval_signal/accuracy_reward/group_std_mean": 0.0, "eval_signal/accuracy_reward/group_zero_std_frac": 1.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "eval_signal/advantage_abs_mean": 0.0, "eval_signal/advantage_pre_scale_abs_mean": 0.0, "eval_signal/advantage_pre_scale_std": 0.0, "eval_signal/advantage_std": 0.0, "eval_signal/brier_reward/centered_abs_mean": 0.0, "eval_signal/brier_reward/group_std_mean": 0.0, "eval_signal/brier_reward/group_zero_std_frac": 1.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.0, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_1/group_std_mean": 0.0, "eval_signal/frontier_coverage_1/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_10/group_std_mean": 0.0, "eval_signal/frontier_coverage_10/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_15/group_std_mean": 0.0, "eval_signal/frontier_coverage_15/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_20/group_std_mean": 0.0, "eval_signal/frontier_coverage_20/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_25/group_std_mean": 0.0, "eval_signal/frontier_coverage_25/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_5/group_std_mean": 0.0, "eval_signal/frontier_coverage_5/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.0, "eval_signal/frontier_ece_reward/group_std_mean": 0.0, "eval_signal/frontier_ece_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.053, "step": 150 }, { "epoch": 0.48, "step": 150, "train_probe_completions/clipped_ratio": 0.994140625, "train_probe_completions/max_length": 1536.0, "train_probe_completions/max_terminated_length": 483.25, "train_probe_completions/mean_length": 1532.205078125, "train_probe_completions/mean_terminated_length": 443.875, "train_probe_completions/min_length": 1172.5, "train_probe_completions/min_terminated_length": 404.5, "train_probe_loss": 0.0, "train_probe_num_tokens": 721322810.0, "train_probe_reward": 0.0, "train_probe_reward_std": 0.0, "train_probe_rewards/accuracy_reward": 0.0, "train_probe_rewards/brier_reward": 0.0, "train_probe_rewards/confidence_uniqueness_reward": 0.0, "train_probe_rewards/format_reward": 0.0, "train_probe_rewards/frontier_aurc_reward": 0.0, "train_probe_rewards/frontier_coverage_1": 0.0, "train_probe_rewards/frontier_coverage_10": 0.0, "train_probe_rewards/frontier_coverage_15": 0.0, "train_probe_rewards/frontier_coverage_20": 0.0, "train_probe_rewards/frontier_coverage_25": 0.0, "train_probe_rewards/frontier_coverage_5": 0.0, "train_probe_rewards/frontier_ece_reward": 0.0, "train_probe_runtime": 72.8578, "train_probe_samples_per_second": 6.863, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.0, "train_probe_signal/accuracy_reward/group_std_mean": 0.0, "train_probe_signal/accuracy_reward/group_zero_std_frac": 1.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/advantage_abs_mean": 0.0, "train_probe_signal/advantage_pre_scale_abs_mean": 0.0, "train_probe_signal/advantage_pre_scale_std": 0.0, "train_probe_signal/advantage_std": 0.0, "train_probe_signal/brier_reward/centered_abs_mean": 0.0, "train_probe_signal/brier_reward/group_std_mean": 0.0, "train_probe_signal/brier_reward/group_zero_std_frac": 1.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.0, "train_probe_signal/frontier_ece_reward/group_std_mean": 0.0, "train_probe_signal/frontier_ece_reward/group_zero_std_frac": 1.0, "train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_ece_reward/weight": 0.125, "train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "train_probe_steps_per_second": 0.055 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99287109375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1102.8, "completions/mean_length": 1527.67587890625, "completions/mean_terminated_length": 357.08160400390625, "completions/min_length": 28.2, "completions/min_terminated_length": 28.2, "epoch": 0.496, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 752274051.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.992578125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1313.8, "completions/mean_length": 1528.16044921875, "completions/mean_terminated_length": 473.29002075195314, "completions/min_length": 33.6, "completions/min_terminated_length": 33.6, "epoch": 0.512, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 783068078.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.990625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1420.2, "completions/mean_length": 1525.24150390625, "completions/mean_terminated_length": 396.59959106445314, "completions/min_length": 22.6, "completions/min_terminated_length": 22.6, "epoch": 0.528, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 813716087.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9921875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1404.4, "completions/mean_length": 1527.6833984375, "completions/mean_terminated_length": 487.17767944335935, "completions/min_length": 21.4, "completions/min_terminated_length": 21.4, "epoch": 0.544, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 844523149.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9919921875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1442.6, "completions/mean_length": 1527.085546875, "completions/mean_terminated_length": 423.56866455078125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 874981913.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99130859375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1212.0, "completions/mean_length": 1526.4693359375, "completions/mean_terminated_length": 425.4572448730469, "completions/min_length": 17.8, "completions/min_terminated_length": 17.8, "epoch": 0.576, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 905799583.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99072265625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1526.5130859375, "completions/mean_terminated_length": 515.5063232421875, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "epoch": 0.592, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 936598789.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99072265625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1396.6, "completions/mean_length": 1525.85185546875, "completions/mean_terminated_length": 447.1800231933594, "completions/min_length": 31.2, "completions/min_terminated_length": 31.2, "epoch": 0.608, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 967223000.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99111328125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1440.8, "completions/mean_length": 1527.06923828125, "completions/mean_terminated_length": 509.2791809082031, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "epoch": 0.624, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 998204093.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99365234375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1235.8, "completions/mean_length": 1528.4357421875, "completions/mean_terminated_length": 320.70795288085935, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1029197963.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 200 }, { "epoch": 0.64, "eval_completions/clipped_ratio": 0.9900323275862069, "eval_completions/max_length": 1536.0, "eval_completions/max_terminated_length": 315.75, "eval_completions/mean_length": 1524.6064758300781, "eval_completions/mean_terminated_length": 265.1666717529297, "eval_completions/min_length": 557.0, "eval_completions/min_terminated_length": 173.0, "eval_loss": 0.0, "eval_num_tokens": 1029197963.0, "eval_reward": 0.0, "eval_reward_std": 0.0, "eval_rewards/accuracy_reward": 0.0, "eval_rewards/brier_reward": 0.0, "eval_rewards/confidence_uniqueness_reward": 0.0, "eval_rewards/format_reward": 0.0, "eval_rewards/frontier_aurc_reward": 0.0, "eval_rewards/frontier_coverage_1": 0.0, "eval_rewards/frontier_coverage_10": 0.0, "eval_rewards/frontier_coverage_15": 0.0, "eval_rewards/frontier_coverage_20": 0.0, "eval_rewards/frontier_coverage_25": 0.0, "eval_rewards/frontier_coverage_5": 0.0, "eval_rewards/frontier_ece_reward": 0.0, "eval_runtime": 74.0994, "eval_samples_per_second": 6.748, "eval_signal/accuracy_reward/centered_abs_mean": 0.0, "eval_signal/accuracy_reward/group_std_mean": 0.0, "eval_signal/accuracy_reward/group_zero_std_frac": 1.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "eval_signal/advantage_abs_mean": 0.0, "eval_signal/advantage_pre_scale_abs_mean": 0.0, "eval_signal/advantage_pre_scale_std": 0.0, "eval_signal/advantage_std": 0.0, "eval_signal/brier_reward/centered_abs_mean": 0.0, "eval_signal/brier_reward/group_std_mean": 0.0, "eval_signal/brier_reward/group_zero_std_frac": 1.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.0, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_1/group_std_mean": 0.0, "eval_signal/frontier_coverage_1/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_10/group_std_mean": 0.0, "eval_signal/frontier_coverage_10/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_15/group_std_mean": 0.0, "eval_signal/frontier_coverage_15/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_20/group_std_mean": 0.0, "eval_signal/frontier_coverage_20/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_25/group_std_mean": 0.0, "eval_signal/frontier_coverage_25/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_5/group_std_mean": 0.0, "eval_signal/frontier_coverage_5/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.0, "eval_signal/frontier_ece_reward/group_std_mean": 0.0, "eval_signal/frontier_ece_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.054, "step": 200 }, { "epoch": 0.64, "step": 200, "train_probe_completions/clipped_ratio": 0.9900323275862069, "train_probe_completions/max_length": 1536.0, "train_probe_completions/max_terminated_length": 444.75, "train_probe_completions/mean_length": 1524.4424743652344, "train_probe_completions/mean_terminated_length": 341.5, "train_probe_completions/min_length": 238.25, "train_probe_completions/min_terminated_length": 238.25, "train_probe_loss": 0.0, "train_probe_num_tokens": 1029197963.0, "train_probe_reward": 0.0, "train_probe_reward_std": 0.0, "train_probe_rewards/accuracy_reward": 0.0, "train_probe_rewards/brier_reward": 0.0, "train_probe_rewards/confidence_uniqueness_reward": 0.0, "train_probe_rewards/format_reward": 0.0, "train_probe_rewards/frontier_aurc_reward": 0.0, "train_probe_rewards/frontier_coverage_1": 0.0, "train_probe_rewards/frontier_coverage_10": 0.0, "train_probe_rewards/frontier_coverage_15": 0.0, "train_probe_rewards/frontier_coverage_20": 0.0, "train_probe_rewards/frontier_coverage_25": 0.0, "train_probe_rewards/frontier_coverage_5": 0.0, "train_probe_rewards/frontier_ece_reward": 0.0, "train_probe_runtime": 70.6463, "train_probe_samples_per_second": 7.078, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.0, "train_probe_signal/accuracy_reward/group_std_mean": 0.0, "train_probe_signal/accuracy_reward/group_zero_std_frac": 1.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/advantage_abs_mean": 0.0, "train_probe_signal/advantage_pre_scale_abs_mean": 0.0, "train_probe_signal/advantage_pre_scale_std": 0.0, "train_probe_signal/advantage_std": 0.0, "train_probe_signal/brier_reward/centered_abs_mean": 0.0, "train_probe_signal/brier_reward/group_std_mean": 0.0, "train_probe_signal/brier_reward/group_zero_std_frac": 1.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.0, "train_probe_signal/frontier_ece_reward/group_std_mean": 0.0, "train_probe_signal/frontier_ece_reward/group_zero_std_frac": 1.0, "train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_ece_reward/weight": 0.125, "train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "train_probe_steps_per_second": 0.057 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99091796875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1409.4, "completions/mean_length": 1526.26650390625, "completions/mean_terminated_length": 446.5516021728516, "completions/min_length": 15.4, "completions/min_terminated_length": 15.4, "epoch": 0.656, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1059683476.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99345703125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1140.0, "completions/mean_length": 1528.49189453125, "completions/mean_terminated_length": 370.8636016845703, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.672, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1090248673.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99228515625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1345.6, "completions/mean_length": 1527.9634765625, "completions/mean_terminated_length": 482.64027709960936, "completions/min_length": 23.2, "completions/min_terminated_length": 23.2, "epoch": 0.688, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1120848939.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99169921875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1463.8, "completions/mean_length": 1527.13515625, "completions/mean_terminated_length": 466.1702087402344, "completions/min_length": 21.6, "completions/min_terminated_length": 21.6, "epoch": 0.704, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1151352947.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9904296875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1260.2, "completions/mean_length": 1525.028515625, "completions/mean_terminated_length": 393.17090759277346, "completions/min_length": 18.8, "completions/min_terminated_length": 18.8, "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1181979095.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.992578125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1274.4, "completions/mean_length": 1527.6798828125, "completions/mean_terminated_length": 406.5520385742187, "completions/min_length": 25.2, "completions/min_terminated_length": 25.2, "epoch": 0.736, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1212562121.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99248046875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1376.4, "completions/mean_length": 1527.54853515625, "completions/mean_terminated_length": 413.2706298828125, "completions/min_length": 26.2, "completions/min_terminated_length": 26.2, "epoch": 0.752, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1243431418.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.991015625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1362.2, "completions/mean_length": 1525.55625, "completions/mean_terminated_length": 372.6120971679687, "completions/min_length": 14.2, "completions/min_terminated_length": 14.2, "epoch": 0.768, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1273985818.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99306640625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1481.8, "completions/mean_length": 1529.205078125, "completions/mean_terminated_length": 549.9662841796875, "completions/min_length": 33.8, "completions/min_terminated_length": 33.8, "epoch": 0.784, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1304819246.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99208984375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1298.6, "completions/mean_length": 1526.48974609375, "completions/mean_terminated_length": 337.9350891113281, "completions/min_length": 26.8, "completions/min_terminated_length": 26.8, "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1335461061.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 250 }, { "epoch": 0.8, "eval_completions/clipped_ratio": 0.990234375, "eval_completions/max_length": 1536.0, "eval_completions/max_terminated_length": 590.75, "eval_completions/mean_length": 1527.345703125, "eval_completions/mean_terminated_length": 491.875, "eval_completions/min_length": 777.0, "eval_completions/min_terminated_length": 393.0, "eval_loss": 0.0, "eval_num_tokens": 1335461061.0, "eval_reward": 0.0, "eval_reward_std": 0.0, "eval_rewards/accuracy_reward": 0.0, "eval_rewards/brier_reward": 0.0, "eval_rewards/confidence_uniqueness_reward": 0.0, "eval_rewards/format_reward": 0.0, "eval_rewards/frontier_aurc_reward": 0.0, "eval_rewards/frontier_coverage_1": 0.0, "eval_rewards/frontier_coverage_10": 0.0, "eval_rewards/frontier_coverage_15": 0.0, "eval_rewards/frontier_coverage_20": 0.0, "eval_rewards/frontier_coverage_25": 0.0, "eval_rewards/frontier_coverage_5": 0.0, "eval_rewards/frontier_ece_reward": 0.0, "eval_runtime": 73.9291, "eval_samples_per_second": 6.763, "eval_signal/accuracy_reward/centered_abs_mean": 0.0, "eval_signal/accuracy_reward/group_std_mean": 0.0, "eval_signal/accuracy_reward/group_zero_std_frac": 1.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "eval_signal/advantage_abs_mean": 0.0, "eval_signal/advantage_pre_scale_abs_mean": 0.0, "eval_signal/advantage_pre_scale_std": 0.0, "eval_signal/advantage_std": 0.0, "eval_signal/brier_reward/centered_abs_mean": 0.0, "eval_signal/brier_reward/group_std_mean": 0.0, "eval_signal/brier_reward/group_zero_std_frac": 1.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.0, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_1/group_std_mean": 0.0, "eval_signal/frontier_coverage_1/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_10/group_std_mean": 0.0, "eval_signal/frontier_coverage_10/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_15/group_std_mean": 0.0, "eval_signal/frontier_coverage_15/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_20/group_std_mean": 0.0, "eval_signal/frontier_coverage_20/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_25/group_std_mean": 0.0, "eval_signal/frontier_coverage_25/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_5/group_std_mean": 0.0, "eval_signal/frontier_coverage_5/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.0, "eval_signal/frontier_ece_reward/group_std_mean": 0.0, "eval_signal/frontier_ece_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.054, "step": 250 }, { "epoch": 0.8, "step": 250, "train_probe_completions/clipped_ratio": 0.9861260775862069, "train_probe_completions/max_length": 1536.0, "train_probe_completions/max_terminated_length": 655.75, "train_probe_completions/mean_length": 1522.4951477050781, "train_probe_completions/mean_terminated_length": 420.75000762939453, "train_probe_completions/min_length": 643.0, "train_probe_completions/min_terminated_length": 259.0, "train_probe_loss": 0.0, "train_probe_num_tokens": 1335461061.0, "train_probe_reward": 0.0, "train_probe_reward_std": 0.0, "train_probe_rewards/accuracy_reward": 0.0, "train_probe_rewards/brier_reward": 0.0, "train_probe_rewards/confidence_uniqueness_reward": 0.0, "train_probe_rewards/format_reward": 0.0, "train_probe_rewards/frontier_aurc_reward": 0.0, "train_probe_rewards/frontier_coverage_1": 0.0, "train_probe_rewards/frontier_coverage_10": 0.0, "train_probe_rewards/frontier_coverage_15": 0.0, "train_probe_rewards/frontier_coverage_20": 0.0, "train_probe_rewards/frontier_coverage_25": 0.0, "train_probe_rewards/frontier_coverage_5": 0.0, "train_probe_rewards/frontier_ece_reward": 0.0, "train_probe_runtime": 73.7012, "train_probe_samples_per_second": 6.784, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.0, "train_probe_signal/accuracy_reward/group_std_mean": 0.0, "train_probe_signal/accuracy_reward/group_zero_std_frac": 1.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/advantage_abs_mean": 0.0, "train_probe_signal/advantage_pre_scale_abs_mean": 0.0, "train_probe_signal/advantage_pre_scale_std": 0.0, "train_probe_signal/advantage_std": 0.0, "train_probe_signal/brier_reward/centered_abs_mean": 0.0, "train_probe_signal/brier_reward/group_std_mean": 0.0, "train_probe_signal/brier_reward/group_zero_std_frac": 1.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.0, "train_probe_signal/frontier_ece_reward/group_std_mean": 0.0, "train_probe_signal/frontier_ece_reward/group_zero_std_frac": 1.0, "train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_ece_reward/weight": 0.125, "train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "train_probe_steps_per_second": 0.054 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99208984375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 1527.38896484375, "completions/mean_terminated_length": 444.1418090820313, "completions/min_length": 21.8, "completions/min_terminated_length": 21.8, "epoch": 0.816, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1366200692.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99091796875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1425.6, "completions/mean_length": 1526.38564453125, "completions/mean_terminated_length": 473.8236938476563, "completions/min_length": 20.6, "completions/min_terminated_length": 20.6, "epoch": 0.832, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1396839233.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99111328125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1394.4, "completions/mean_length": 1525.48974609375, "completions/mean_terminated_length": 345.88324584960935, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 0.848, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1427474616.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 265 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99072265625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1525.17509765625, "completions/mean_terminated_length": 365.7697448730469, "completions/min_length": 25.4, "completions/min_terminated_length": 25.4, "epoch": 0.864, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1458079225.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99296875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1370.4, "completions/mean_length": 1528.1609375, "completions/mean_terminated_length": 439.784326171875, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1488874665.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.991015625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1526.5763671875, "completions/mean_terminated_length": 484.0293884277344, "completions/min_length": 18.6, "completions/min_terminated_length": 18.6, "epoch": 0.896, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1519617655.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9912109375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1358.6, "completions/mean_length": 1526.38759765625, "completions/mean_terminated_length": 452.73790283203124, "completions/min_length": 30.6, "completions/min_terminated_length": 30.6, "epoch": 0.912, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1550299160.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9921875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1365.0, "completions/mean_length": 1528.1560546875, "completions/mean_terminated_length": 532.4930725097656, "completions/min_length": 28.4, "completions/min_terminated_length": 28.4, "epoch": 0.928, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1580974294.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9904296875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1427.2, "completions/mean_length": 1525.52470703125, "completions/mean_terminated_length": 438.5868408203125, "completions/min_length": 19.4, "completions/min_terminated_length": 19.4, "epoch": 0.944, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1611571091.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.98994140625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1269.6, "completions/mean_length": 1524.29560546875, "completions/mean_terminated_length": 369.2761505126953, "completions/min_length": 7.4, "completions/min_terminated_length": 7.4, "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1642120198.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 300 }, { "epoch": 0.96, "eval_completions/clipped_ratio": 0.9878771551724138, "eval_completions/max_length": 1536.0, "eval_completions/max_terminated_length": 676.0, "eval_completions/mean_length": 1527.0393981933594, "eval_completions/mean_terminated_length": 608.4583435058594, "eval_completions/min_length": 946.0, "eval_completions/min_terminated_length": 562.0, "eval_loss": 0.0, "eval_num_tokens": 1642120198.0, "eval_reward": 0.0, "eval_reward_std": 0.0, "eval_rewards/accuracy_reward": 0.0, "eval_rewards/brier_reward": 0.0, "eval_rewards/confidence_uniqueness_reward": 0.0, "eval_rewards/format_reward": 0.0, "eval_rewards/frontier_aurc_reward": 0.0, "eval_rewards/frontier_coverage_1": 0.0, "eval_rewards/frontier_coverage_10": 0.0, "eval_rewards/frontier_coverage_15": 0.0, "eval_rewards/frontier_coverage_20": 0.0, "eval_rewards/frontier_coverage_25": 0.0, "eval_rewards/frontier_coverage_5": 0.0, "eval_rewards/frontier_ece_reward": 0.0, "eval_runtime": 76.0588, "eval_samples_per_second": 6.574, "eval_signal/accuracy_reward/centered_abs_mean": 0.0, "eval_signal/accuracy_reward/group_std_mean": 0.0, "eval_signal/accuracy_reward/group_zero_std_frac": 1.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "eval_signal/advantage_abs_mean": 0.0, "eval_signal/advantage_pre_scale_abs_mean": 0.0, "eval_signal/advantage_pre_scale_std": 0.0, "eval_signal/advantage_std": 0.0, "eval_signal/brier_reward/centered_abs_mean": 0.0, "eval_signal/brier_reward/group_std_mean": 0.0, "eval_signal/brier_reward/group_zero_std_frac": 1.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.0, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_1/group_std_mean": 0.0, "eval_signal/frontier_coverage_1/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_10/group_std_mean": 0.0, "eval_signal/frontier_coverage_10/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_15/group_std_mean": 0.0, "eval_signal/frontier_coverage_15/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_20/group_std_mean": 0.0, "eval_signal/frontier_coverage_20/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_25/group_std_mean": 0.0, "eval_signal/frontier_coverage_25/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.0, "eval_signal/frontier_coverage_5/group_std_mean": 0.0, "eval_signal/frontier_coverage_5/group_zero_std_frac": 1.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.0, "eval_signal/frontier_ece_reward/group_std_mean": 0.0, "eval_signal/frontier_ece_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.053, "step": 300 }, { "epoch": 0.96, "step": 300, "train_probe_completions/clipped_ratio": 0.9978448275862069, "train_probe_completions/max_length": 1536.0, "train_probe_completions/max_terminated_length": 18.25, "train_probe_completions/mean_length": 1532.8469848632812, "train_probe_completions/mean_terminated_length": 18.25, "train_probe_completions/min_length": 1170.25, "train_probe_completions/min_terminated_length": 18.25, "train_probe_loss": 0.0, "train_probe_num_tokens": 1642120198.0, "train_probe_reward": 0.0, "train_probe_reward_std": 0.0, "train_probe_rewards/accuracy_reward": 0.0, "train_probe_rewards/brier_reward": 0.0, "train_probe_rewards/confidence_uniqueness_reward": 0.0, "train_probe_rewards/format_reward": 0.0, "train_probe_rewards/frontier_aurc_reward": 0.0, "train_probe_rewards/frontier_coverage_1": 0.0, "train_probe_rewards/frontier_coverage_10": 0.0, "train_probe_rewards/frontier_coverage_15": 0.0, "train_probe_rewards/frontier_coverage_20": 0.0, "train_probe_rewards/frontier_coverage_25": 0.0, "train_probe_rewards/frontier_coverage_5": 0.0, "train_probe_rewards/frontier_ece_reward": 0.0, "train_probe_runtime": 72.6093, "train_probe_samples_per_second": 6.886, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.0, "train_probe_signal/accuracy_reward/group_std_mean": 0.0, "train_probe_signal/accuracy_reward/group_zero_std_frac": 1.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/advantage_abs_mean": 0.0, "train_probe_signal/advantage_pre_scale_abs_mean": 0.0, "train_probe_signal/advantage_pre_scale_std": 0.0, "train_probe_signal/advantage_std": 0.0, "train_probe_signal/brier_reward/centered_abs_mean": 0.0, "train_probe_signal/brier_reward/group_std_mean": 0.0, "train_probe_signal/brier_reward/group_zero_std_frac": 1.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.0, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 1.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.0, "train_probe_signal/frontier_ece_reward/group_std_mean": 0.0, "train_probe_signal/frontier_ece_reward/group_zero_std_frac": 1.0, "train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_ece_reward/weight": 0.125, "train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "train_probe_steps_per_second": 0.055 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99326171875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1463.8, "completions/mean_length": 1529.2005859375, "completions/mean_terminated_length": 520.1449768066407, "completions/min_length": 33.4, "completions/min_terminated_length": 33.4, "epoch": 0.976, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1672640332.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.99375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1262.0, "completions/mean_length": 1529.35078125, "completions/mean_terminated_length": 473.9320007324219, "completions/min_length": 45.4, "completions/min_terminated_length": 45.4, "epoch": 0.992, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1703429364.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9893574617346939, "completions/max_length": 1536.0, "completions/max_terminated_length": 1243.0, "completions/mean_length": 1523.6888427734375, "completions/mean_terminated_length": 363.3791961669922, "completions/min_length": 24.5, "completions/min_terminated_length": 24.5, "epoch": 0.9984, "num_tokens": 1715682258.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.0, "rewards/confidence_uniqueness_reward": 0.0, "rewards/format_reward": 0.0, "rewards/frontier_aurc_reward": 0.0, "rewards/frontier_coverage_1": 0.0, "rewards/frontier_coverage_10": 0.0, "rewards/frontier_coverage_15": 0.0, "rewards/frontier_coverage_20": 0.0, "rewards/frontier_coverage_25": 0.0, "rewards/frontier_coverage_5": 0.0, "rewards/frontier_ece_reward": 0.0, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.0, "signal/advantage_pre_scale_abs_mean": 0.0, "signal/advantage_pre_scale_std": 0.0, "signal/advantage_std": 0.0, "signal/brier_reward/centered_abs_mean": 0.0, "signal/brier_reward/group_std_mean": 0.0, "signal/brier_reward/group_zero_std_frac": 1.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/group_std_mean": 0.0, "signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0, "signal/frontier_aurc_reward/group_std_mean": 0.0, "signal/frontier_aurc_reward/group_zero_std_frac": 1.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/centered_abs_mean": 0.0, "signal/frontier_coverage_1/group_std_mean": 0.0, "signal/frontier_coverage_1/group_zero_std_frac": 1.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/centered_abs_mean": 0.0, "signal/frontier_coverage_10/group_std_mean": 0.0, "signal/frontier_coverage_10/group_zero_std_frac": 1.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/centered_abs_mean": 0.0, "signal/frontier_coverage_15/group_std_mean": 0.0, "signal/frontier_coverage_15/group_zero_std_frac": 1.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/centered_abs_mean": 0.0, "signal/frontier_coverage_20/group_std_mean": 0.0, "signal/frontier_coverage_20/group_zero_std_frac": 1.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/centered_abs_mean": 0.0, "signal/frontier_coverage_25/group_std_mean": 0.0, "signal/frontier_coverage_25/group_zero_std_frac": 1.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/centered_abs_mean": 0.0, "signal/frontier_coverage_5/group_std_mean": 0.0, "signal/frontier_coverage_5/group_zero_std_frac": 1.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/centered_abs_mean": 0.0, "signal/frontier_ece_reward/group_std_mean": 0.0, "signal/frontier_ece_reward/group_zero_std_frac": 1.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0, "step": 312, "total_flos": 0.0, "train_loss": 0.007585880621217, "train_runtime": 111863.6752, "train_samples_per_second": 0.179, "train_steps_per_second": 0.003 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 1715682258, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }