{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.49919376007799904, "eval_steps": 50, "global_step": 208, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.5038101588421379, "calibration/batch_distribution_entropy": 0.27496546226898255, "calibration/batch_entropy_100bins": 0.34693576898254996, "calibration/batch_entropy_10bins": 0.27496546226898255, "calibration/batch_entropy_50bins": 0.40406129441079397, "calibration/batch_uniqueness": 0.4937993944856712, "calibration/confidence_entropy": 0.21554487482269122, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4653161376360508, "calibration/mean_confidence": 0.9164561588356814, "calibration/prompt_uniqueness": 0.35475345061773306, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01909722222222221, "completions/max_length": 4034.2, "completions/max_terminated_length": 4034.2, "completions/mean_length": 514.2317749023438, "completions/mean_terminated_length": 524.245458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.8, "epoch": 0.011999850001874977, "grad_norm": 0.005631071049720049, "learning_rate": 5.952380952380953e-07, "loss": 0.0022, "num_tokens": 9038158.0, "reward": 0.6567242383956909, "reward_std": 0.6597963333129883, "rewards/accuracy_reward": 0.26137152314186096, "rewards/brier_reward": 0.3122586965560913, "rewards/confidence_uniqueness_reward": 0.2905979037284851, "rewards/format_reward": 0.5998263835906983, "rewards/frontier_aurc_reward": 0.27479134798049926, "rewards/frontier_coverage_0": 0.27479134798049926, "rewards/frontier_coverage_1": 0.27479134798049926, "rewards/frontier_coverage_10": 0.27479134798049926, "rewards/frontier_coverage_15": 0.27479134798049926, "rewards/frontier_coverage_20": 0.27479134798049926, "rewards/frontier_coverage_25": 0.27479134798049926, "rewards/frontier_coverage_5": 0.27479134798049926, "rewards/frontier_ece_reward": 0.27479134798049926, "rewards/frontier_entropy_batch_reward": -0.574283504486084, "signal/accuracy_reward/centered_abs_mean": 0.3058105528354645, "signal/accuracy_reward/group_bin_occupancy": 0.23923611111111112, "signal/accuracy_reward/group_std_mean": 0.366261488199234, "signal/accuracy_reward/group_zero_std_frac": 0.0861111119389534, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15290527641773224, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15290527641773224, "signal/advantage_abs_mean": 0.5603552341461182, "signal/advantage_pre_scale_abs_mean": 0.5603552341461182, "signal/advantage_pre_scale_std": 0.6780304908752441, "signal/advantage_std": 0.6780304908752441, "signal/brier_reward/centered_abs_mean": 0.31586124300956725, "signal/brier_reward/group_bin_occupancy": 0.5177083333333333, "signal/brier_reward/group_std_mean": 0.36988683938980105, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031586124747991565, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.031586124747991565, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.23471923768520356, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.5965277777777778, "signal/confidence_uniqueness_reward/group_std_mean": 0.2862655222415924, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023471924290060998, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023471924290060998, "signal/format_reward/centered_abs_mean": 0.438910585641861, "signal/format_reward/group_bin_occupancy": 0.25, "signal/format_reward/group_std_mean": 0.4741047382354736, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2194552928209305, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.2194552928209305, "signal/frontier_aurc_reward/centered_abs_mean": 0.3059499800205231, "signal/frontier_aurc_reward/group_bin_occupancy": 0.39826388888888886, "signal/frontier_aurc_reward/group_std_mean": 0.3646116256713867, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038243749178946016, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038243749178946016, "signal/frontier_coverage_0/centered_abs_mean": 0.3059499800205231, "signal/frontier_coverage_0/group_bin_occupancy": 0.39826388888888886, "signal/frontier_coverage_0/group_std_mean": 0.3646116256713867, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030594999343156813, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.030594999343156813, "signal/frontier_coverage_1/centered_abs_mean": 0.3059499800205231, "signal/frontier_coverage_1/group_bin_occupancy": 0.39826388888888886, "signal/frontier_coverage_1/group_std_mean": 0.3646116256713867, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030594999343156813, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.030594999343156813, "signal/frontier_coverage_10/centered_abs_mean": 0.3059499800205231, "signal/frontier_coverage_10/group_bin_occupancy": 0.39826388888888886, "signal/frontier_coverage_10/group_std_mean": 0.3646116256713867, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.030594999343156813, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.030594999343156813, "signal/frontier_coverage_15/centered_abs_mean": 0.3059499800205231, "signal/frontier_coverage_15/group_bin_occupancy": 0.39826388888888886, "signal/frontier_coverage_15/group_std_mean": 0.3646116256713867, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.030594999343156813, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.030594999343156813, "signal/frontier_coverage_20/centered_abs_mean": 0.3059499800205231, "signal/frontier_coverage_20/group_bin_occupancy": 0.39826388888888886, "signal/frontier_coverage_20/group_std_mean": 0.3646116256713867, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030594999343156813, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.030594999343156813, "signal/frontier_coverage_25/centered_abs_mean": 0.3059499800205231, "signal/frontier_coverage_25/group_bin_occupancy": 0.39826388888888886, "signal/frontier_coverage_25/group_std_mean": 0.3646116256713867, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030594999343156813, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.030594999343156813, "signal/frontier_coverage_5/centered_abs_mean": 0.3059499800205231, "signal/frontier_coverage_5/group_bin_occupancy": 0.39826388888888886, "signal/frontier_coverage_5/group_std_mean": 0.3646116256713867, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030594999343156813, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.030594999343156813, "signal/frontier_ece_reward/centered_abs_mean": 0.3059499800205231, "signal/frontier_ece_reward/group_bin_occupancy": 0.39826388888888886, "signal/frontier_ece_reward/group_std_mean": 0.3646116256713867, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030594999343156813, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030594999343156813, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4482546389102936, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2986111111111111, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4814043164253235, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.044825464487075806, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.044825464487075806, "step": 5 }, { "calibration/aurc": 0.544070730834551, "calibration/batch_distribution_entropy": 0.2873650497976389, "calibration/batch_entropy_100bins": 0.3592202535931412, "calibration/batch_entropy_10bins": 0.2873650497976389, "calibration/batch_entropy_50bins": 0.41561871184308197, "calibration/batch_uniqueness": 0.5038269582994224, "calibration/confidence_entropy": 0.21970684330493354, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.48642087721955046, "calibration/mean_confidence": 0.9148550403550793, "calibration/prompt_uniqueness": 0.396802208624482, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017447916666666653, "completions/max_length": 4013.0, "completions/max_terminated_length": 4013.0, "completions/mean_length": 480.61961059570314, "completions/mean_terminated_length": 489.2898864746094, "completions/min_length": 0.0, "completions/min_terminated_length": 9.4, "epoch": 0.023999700003749954, "grad_norm": 0.005415608175098896, "learning_rate": 1.1904761904761906e-06, "loss": -0.0015, "num_tokens": 17657616.0, "reward": 0.7464439153671265, "reward_std": 0.6387369275093079, "rewards/accuracy_reward": 0.2859374940395355, "rewards/brier_reward": 0.34971494078636167, "rewards/confidence_uniqueness_reward": 0.3477144420146942, "rewards/format_reward": 0.7115451335906983, "rewards/frontier_aurc_reward": 0.302881646156311, "rewards/frontier_coverage_0": 0.302881646156311, "rewards/frontier_coverage_1": 0.302881646156311, "rewards/frontier_coverage_10": 0.302881646156311, "rewards/frontier_coverage_15": 0.302881646156311, "rewards/frontier_coverage_20": 0.302881646156311, "rewards/frontier_coverage_25": 0.302881646156311, "rewards/frontier_coverage_5": 0.302881646156311, "rewards/frontier_ece_reward": 0.302881646156311, "rewards/frontier_entropy_batch_reward": -0.6813170671463012, "signal/accuracy_reward/centered_abs_mean": 0.31569010615348814, "signal/accuracy_reward/group_bin_occupancy": 0.24131944444444448, "signal/accuracy_reward/group_std_mean": 0.37748109102249144, "signal/accuracy_reward/group_zero_std_frac": 0.06944444514811039, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15784505307674407, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15784505307674407, "signal/advantage_abs_mean": 0.533976936340332, "signal/advantage_pre_scale_abs_mean": 0.533976936340332, "signal/advantage_pre_scale_std": 0.6536542654037476, "signal/advantage_std": 0.6536542654037476, "signal/brier_reward/centered_abs_mean": 0.3154709577560425, "signal/brier_reward/group_bin_occupancy": 0.5368055555555555, "signal/brier_reward/group_std_mean": 0.36963658332824706, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03154709674417973, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03154709674417973, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.22943984270095824, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6229166666666667, "signal/confidence_uniqueness_reward/group_std_mean": 0.28361154794692994, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02294398322701454, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02294398322701454, "signal/format_reward/centered_abs_mean": 0.3570583701133728, "signal/format_reward/group_bin_occupancy": 0.24965277777777778, "signal/format_reward/group_std_mean": 0.42141222953796387, "signal/format_reward/group_zero_std_frac": 0.002777777798473835, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1785291850566864, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1785291850566864, "signal/frontier_aurc_reward/centered_abs_mean": 0.3122061789035797, "signal/frontier_aurc_reward/group_bin_occupancy": 0.41736111111111107, "signal/frontier_aurc_reward/group_std_mean": 0.37071062326431276, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003902577608823776, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003902577608823776, "signal/frontier_coverage_0/centered_abs_mean": 0.3122061789035797, "signal/frontier_coverage_0/group_bin_occupancy": 0.41736111111111107, "signal/frontier_coverage_0/group_std_mean": 0.37071062326431276, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03122062087059021, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.03122062087059021, "signal/frontier_coverage_1/centered_abs_mean": 0.3122061789035797, "signal/frontier_coverage_1/group_bin_occupancy": 0.41736111111111107, "signal/frontier_coverage_1/group_std_mean": 0.37071062326431276, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03122062087059021, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.03122062087059021, "signal/frontier_coverage_10/centered_abs_mean": 0.3122061789035797, "signal/frontier_coverage_10/group_bin_occupancy": 0.41736111111111107, "signal/frontier_coverage_10/group_std_mean": 0.37071062326431276, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03122062087059021, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.03122062087059021, "signal/frontier_coverage_15/centered_abs_mean": 0.3122061789035797, "signal/frontier_coverage_15/group_bin_occupancy": 0.41736111111111107, "signal/frontier_coverage_15/group_std_mean": 0.37071062326431276, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03122062087059021, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.03122062087059021, "signal/frontier_coverage_20/centered_abs_mean": 0.3122061789035797, "signal/frontier_coverage_20/group_bin_occupancy": 0.41736111111111107, "signal/frontier_coverage_20/group_std_mean": 0.37071062326431276, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03122062087059021, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.03122062087059021, "signal/frontier_coverage_25/centered_abs_mean": 0.3122061789035797, "signal/frontier_coverage_25/group_bin_occupancy": 0.41736111111111107, "signal/frontier_coverage_25/group_std_mean": 0.37071062326431276, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03122062087059021, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.03122062087059021, "signal/frontier_coverage_5/centered_abs_mean": 0.3122061789035797, "signal/frontier_coverage_5/group_bin_occupancy": 0.41736111111111107, "signal/frontier_coverage_5/group_std_mean": 0.37071062326431276, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03122062087059021, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.03122062087059021, "signal/frontier_ece_reward/centered_abs_mean": 0.3122061789035797, "signal/frontier_ece_reward/group_bin_occupancy": 0.41736111111111107, "signal/frontier_ece_reward/group_std_mean": 0.37071062326431276, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03122062087059021, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03122062087059021, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38358516097068784, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3041666666666667, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4429487228393555, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0383585162460804, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0383585162460804, "step": 10 }, { "calibration/aurc": 0.5192184232870535, "calibration/batch_distribution_entropy": 0.2858053834632349, "calibration/batch_entropy_100bins": 0.359574296826191, "calibration/batch_entropy_10bins": 0.2858053834632349, "calibration/batch_entropy_50bins": 0.4153101505392646, "calibration/batch_uniqueness": 0.5221154909357117, "calibration/confidence_entropy": 0.22962418332165271, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.49317452981313137, "calibration/mean_confidence": 0.9151010337113051, "calibration/prompt_uniqueness": 0.42620232373241934, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01519097222222221, "completions/max_length": 3977.2, "completions/max_terminated_length": 3977.2, "completions/mean_length": 456.0194458007812, "completions/mean_terminated_length": 463.06640625, "completions/min_length": 0.0, "completions/min_terminated_length": 40.2, "epoch": 0.03599955000562493, "grad_norm": 0.0063781049102544785, "learning_rate": 1.7857142857142859e-06, "loss": -0.0139, "num_tokens": 26012944.0, "reward": 0.9145050406455993, "reward_std": 0.5620147705078125, "rewards/accuracy_reward": 0.3283854126930237, "rewards/brier_reward": 0.4270216226577759, "rewards/confidence_uniqueness_reward": 0.49301198720932005, "rewards/format_reward": 0.9131076216697693, "rewards/frontier_aurc_reward": 0.35533509850502015, "rewards/frontier_coverage_0": 0.35533509850502015, "rewards/frontier_coverage_1": 0.35533509850502015, "rewards/frontier_coverage_10": 0.35533509850502015, "rewards/frontier_coverage_15": 0.35533509850502015, "rewards/frontier_coverage_20": 0.35533509850502015, "rewards/frontier_coverage_25": 0.35533509850502015, "rewards/frontier_coverage_5": 0.35533509850502015, "rewards/frontier_ece_reward": 0.35533509850502015, "rewards/frontier_entropy_batch_reward": -0.8695465922355652, "signal/accuracy_reward/centered_abs_mean": 0.312841796875, "signal/accuracy_reward/group_bin_occupancy": 0.23958333333333334, "signal/accuracy_reward/group_std_mean": 0.3740617513656616, "signal/accuracy_reward/group_zero_std_frac": 0.0833333358168602, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1564208984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1564208984375, "signal/advantage_abs_mean": 0.46465239524841306, "signal/advantage_pre_scale_abs_mean": 0.46465239524841306, "signal/advantage_pre_scale_std": 0.5796213746070862, "signal/advantage_std": 0.5796213746070862, "signal/brier_reward/centered_abs_mean": 0.2964577376842499, "signal/brier_reward/group_bin_occupancy": 0.590625, "signal/brier_reward/group_std_mean": 0.35084177255630494, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029645774513483047, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.029645774513483047, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.18456263542175294, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6329861111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.23556708097457885, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.018456263840198515, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.018456263840198515, "signal/format_reward/centered_abs_mean": 0.14099934846162795, "signal/format_reward/group_bin_occupancy": 0.22256944444444446, "signal/format_reward/group_std_mean": 0.22586170136928557, "signal/format_reward/group_zero_std_frac": 0.21944445110857486, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.07049967423081398, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.07049967423081398, "signal/frontier_aurc_reward/centered_abs_mean": 0.30630324482917787, "signal/frontier_aurc_reward/group_bin_occupancy": 0.4604166666666666, "signal/frontier_aurc_reward/group_std_mean": 0.3642661988735199, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038287907373160124, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038287907373160124, "signal/frontier_coverage_0/centered_abs_mean": 0.30630324482917787, "signal/frontier_coverage_0/group_bin_occupancy": 0.4604166666666666, "signal/frontier_coverage_0/group_std_mean": 0.3642661988735199, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0306303258985281, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0306303258985281, "signal/frontier_coverage_1/centered_abs_mean": 0.30630324482917787, "signal/frontier_coverage_1/group_bin_occupancy": 0.4604166666666666, "signal/frontier_coverage_1/group_std_mean": 0.3642661988735199, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0306303258985281, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0306303258985281, "signal/frontier_coverage_10/centered_abs_mean": 0.30630324482917787, "signal/frontier_coverage_10/group_bin_occupancy": 0.4604166666666666, "signal/frontier_coverage_10/group_std_mean": 0.3642661988735199, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0306303258985281, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0306303258985281, "signal/frontier_coverage_15/centered_abs_mean": 0.30630324482917787, "signal/frontier_coverage_15/group_bin_occupancy": 0.4604166666666666, "signal/frontier_coverage_15/group_std_mean": 0.3642661988735199, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0306303258985281, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0306303258985281, "signal/frontier_coverage_20/centered_abs_mean": 0.30630324482917787, "signal/frontier_coverage_20/group_bin_occupancy": 0.4604166666666666, "signal/frontier_coverage_20/group_std_mean": 0.3642661988735199, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0306303258985281, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0306303258985281, "signal/frontier_coverage_25/centered_abs_mean": 0.30630324482917787, "signal/frontier_coverage_25/group_bin_occupancy": 0.4604166666666666, "signal/frontier_coverage_25/group_std_mean": 0.3642661988735199, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0306303258985281, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0306303258985281, "signal/frontier_coverage_5/centered_abs_mean": 0.30630324482917787, "signal/frontier_coverage_5/group_bin_occupancy": 0.4604166666666666, "signal/frontier_coverage_5/group_std_mean": 0.3642661988735199, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0306303258985281, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0306303258985281, "signal/frontier_ece_reward/centered_abs_mean": 0.30630324482917787, "signal/frontier_ece_reward/group_bin_occupancy": 0.4604166666666666, "signal/frontier_ece_reward/group_std_mean": 0.3642661988735199, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0306303258985281, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0306303258985281, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2086976408958435, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3149305555555556, "signal/frontier_entropy_batch_reward/group_std_mean": 0.31270697712898254, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06111111324280501, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02086976356804371, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02086976356804371, "step": 15 }, { "calibration/aurc": 0.443977124474033, "calibration/batch_distribution_entropy": 0.43868894998445, "calibration/batch_entropy_100bins": 0.40972118599624574, "calibration/batch_entropy_10bins": 0.43868894998445, "calibration/batch_entropy_50bins": 0.4759905332800923, "calibration/batch_uniqueness": 0.6093611651716276, "calibration/buffer_distribution_entropy": 0.32067142272542265, "calibration/buffer_entropy_100bins": 0.38150793304591346, "calibration/buffer_entropy_10bins": 0.32067142272542265, "calibration/buffer_entropy_50bins": 0.440799581674695, "calibration/confidence_entropy": 0.32815220193055084, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.024802110817941952, "calibration/coverage@30%": 0.05460019151677149, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3718585004618767, "calibration/mean_confidence": 0.8746366326346813, "calibration/prompt_uniqueness": 0.5123440414977478, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009461805555555536, "completions/max_length": 3978.4, "completions/max_terminated_length": 3978.4, "completions/mean_length": 488.863720703125, "completions/mean_terminated_length": 493.5744262695313, "completions/min_length": 0.0, "completions/min_terminated_length": 101.2, "epoch": 0.04799940000749991, "grad_norm": 0.0008680089376866817, "learning_rate": 2.380952380952381e-06, "loss": -0.006, "num_tokens": 34758350.0, "reward": 0.9097142934799194, "reward_std": 0.34938407242298125, "rewards/accuracy_reward": 0.44947916865348814, "rewards/brier_reward": 0.5689934015274047, "rewards/confidence_uniqueness_reward": 0.6051031112670898, "rewards/format_reward": 0.9827256917953491, "rewards/frontier_aurc_reward": 0.20144999362528324, "rewards/frontier_coverage_0": 0.2116093705408275, "rewards/frontier_coverage_1": 0.2116093705408275, "rewards/frontier_coverage_10": 0.2116093705408275, "rewards/frontier_coverage_15": 0.2116093705408275, "rewards/frontier_coverage_20": 0.2116093705408275, "rewards/frontier_coverage_25": 0.2116093705408275, "rewards/frontier_coverage_5": 0.2116093705408275, "rewards/frontier_ece_reward": 0.19258032105863093, "rewards/frontier_entropy_batch_reward": -0.9370049834251404, "signal/accuracy_reward/centered_abs_mean": 0.2896375894546509, "signal/accuracy_reward/group_bin_occupancy": 0.23958333333333334, "signal/accuracy_reward/group_std_mean": 0.3589329898357391, "signal/accuracy_reward/group_zero_std_frac": 0.0833333358168602, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14481879472732545, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.14481879472732545, "signal/advantage_abs_mean": 0.2791608601808548, "signal/advantage_pre_scale_abs_mean": 0.2791608601808548, "signal/advantage_pre_scale_std": 0.36436753273010253, "signal/advantage_std": 0.36436753273010253, "signal/brier_reward/centered_abs_mean": 0.2495465785264969, "signal/brier_reward/group_bin_occupancy": 0.6586805555555556, "signal/brier_reward/group_std_mean": 0.3062271773815155, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024954657629132272, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.024954657629132272, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.17777037620544434, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6329861111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.21108138859272002, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.017777037993073463, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.017777037993073463, "signal/format_reward/centered_abs_mean": 0.03138563297688961, "signal/format_reward/group_bin_occupancy": 0.16562499999999997, "signal/format_reward/group_std_mean": 0.06965429857373237, "signal/format_reward/group_zero_std_frac": 0.675, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.015692816488444804, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.015692816488444804, "signal/frontier_aurc_reward/centered_abs_mean": 0.1230311962775886, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6243055555555556, "signal/frontier_aurc_reward/group_std_mean": 0.15198022853583099, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0015378899362985976, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0015378899362985976, "signal/frontier_coverage_0/centered_abs_mean": 0.14054877683520317, "signal/frontier_coverage_0/group_bin_occupancy": 0.6204861111111112, "signal/frontier_coverage_0/group_std_mean": 0.18199999555945395, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.014054877683520317, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.014054877683520317, "signal/frontier_coverage_1/centered_abs_mean": 0.14054877683520317, "signal/frontier_coverage_1/group_bin_occupancy": 0.6204861111111112, "signal/frontier_coverage_1/group_std_mean": 0.18199999555945395, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.014054877683520317, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.014054877683520317, "signal/frontier_coverage_10/centered_abs_mean": 0.14054877683520317, "signal/frontier_coverage_10/group_bin_occupancy": 0.6204861111111112, "signal/frontier_coverage_10/group_std_mean": 0.18199999555945395, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014054877683520317, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.014054877683520317, "signal/frontier_coverage_15/centered_abs_mean": 0.14054877683520317, "signal/frontier_coverage_15/group_bin_occupancy": 0.6204861111111112, "signal/frontier_coverage_15/group_std_mean": 0.18199999555945395, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014054877683520317, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.014054877683520317, "signal/frontier_coverage_20/centered_abs_mean": 0.14054877683520317, "signal/frontier_coverage_20/group_bin_occupancy": 0.6204861111111112, "signal/frontier_coverage_20/group_std_mean": 0.18199999555945395, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.014054877683520317, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.014054877683520317, "signal/frontier_coverage_25/centered_abs_mean": 0.14054877683520317, "signal/frontier_coverage_25/group_bin_occupancy": 0.6204861111111112, "signal/frontier_coverage_25/group_std_mean": 0.18199999555945395, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.014054877683520317, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.014054877683520317, "signal/frontier_coverage_5/centered_abs_mean": 0.14054877683520317, "signal/frontier_coverage_5/group_bin_occupancy": 0.6204861111111112, "signal/frontier_coverage_5/group_std_mean": 0.18199999555945395, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.014054877683520317, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.014054877683520317, "signal/frontier_ece_reward/centered_abs_mean": 0.2141006052494049, "signal/frontier_ece_reward/group_bin_occupancy": 0.5802083333333334, "signal/frontier_ece_reward/group_std_mean": 0.264327472448349, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.021410060301423072, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.021410060301423072, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10964042991399765, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2777777777777778, "signal/frontier_entropy_batch_reward/group_std_mean": 0.20423128306865693, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.25555555820465087, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01096404269337654, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01096404269337654, "step": 20 }, { "calibration/aurc": 0.35032314868572345, "calibration/batch_distribution_entropy": 0.614438890013979, "calibration/batch_entropy_100bins": 0.4634310297287791, "calibration/batch_entropy_10bins": 0.614438890013979, "calibration/batch_entropy_50bins": 0.5428867358379499, "calibration/batch_uniqueness": 0.7195552397935103, "calibration/buffer_distribution_entropy": 0.38324780384371143, "calibration/buffer_entropy_100bins": 0.4074862124458683, "calibration/buffer_entropy_10bins": 0.38324780384371143, "calibration/buffer_entropy_50bins": 0.4720090353275187, "calibration/confidence_entropy": 0.41065394887495243, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.005221932114882507, "calibration/coverage@15%": 0.0720626631853786, "calibration/coverage@20%": 0.13929356175566165, "calibration/coverage@25%": 0.3522492768234537, "calibration/coverage@30%": 0.4206896551724138, "calibration/coverage@5%": 0.0, "calibration/ece": 0.23885528444470966, "calibration/mean_confidence": 0.8212928714555188, "calibration/prompt_uniqueness": 0.6167993494757859, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009809027777777767, "completions/max_length": 3817.0, "completions/max_terminated_length": 3817.0, "completions/mean_length": 543.0953979492188, "completions/mean_terminated_length": 548.4666748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 107.6, "epoch": 0.05999925000937488, "grad_norm": 0.0006207867991179228, "learning_rate": 2.9761904761904763e-06, "loss": -0.0071, "num_tokens": 44139257.0, "reward": 0.8193583488464355, "reward_std": 0.20146073400974274, "rewards/accuracy_reward": 0.5576388835906982, "rewards/brier_reward": 0.6787164211273193, "rewards/confidence_uniqueness_reward": 0.7089365482330322, "rewards/format_reward": 0.9884548544883728, "rewards/frontier_aurc_reward": -0.0040108742192387584, "rewards/frontier_coverage_0": 0.0012683632783591747, "rewards/frontier_coverage_1": 0.0012683632783591747, "rewards/frontier_coverage_10": 0.0012683632783591747, "rewards/frontier_coverage_15": 0.0012683632783591747, "rewards/frontier_coverage_20": 0.0012683632783591747, "rewards/frontier_coverage_25": 0.0012683632783591747, "rewards/frontier_coverage_5": 0.0012683632783591747, "rewards/frontier_ece_reward": 0.018515123042743654, "rewards/frontier_entropy_batch_reward": -0.9514306664466858, "signal/accuracy_reward/centered_abs_mean": 0.2662000864744186, "signal/accuracy_reward/group_bin_occupancy": 0.23472222222222222, "signal/accuracy_reward/group_std_mean": 0.3329444944858551, "signal/accuracy_reward/group_zero_std_frac": 0.1222222238779068, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1331000432372093, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1331000432372093, "signal/advantage_abs_mean": 0.15778279304504395, "signal/advantage_pre_scale_abs_mean": 0.15778279304504395, "signal/advantage_pre_scale_std": 0.21362167000770568, "signal/advantage_std": 0.21362167000770568, "signal/brier_reward/centered_abs_mean": 0.19935325980186464, "signal/brier_reward/group_bin_occupancy": 0.7305555555555555, "signal/brier_reward/group_std_mean": 0.2499818593263626, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019935326650738716, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019935326650738716, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09970465749502182, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6690972222222223, "signal/confidence_uniqueness_reward/group_std_mean": 0.12825550884008408, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009970465674996376, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009970465674996376, "signal/format_reward/centered_abs_mean": 0.02030707523226738, "signal/format_reward/group_bin_occupancy": 0.14548611111111112, "signal/format_reward/group_std_mean": 0.039225579053163526, "signal/format_reward/group_zero_std_frac": 0.8361111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01015353761613369, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01015353761613369, "signal/frontier_aurc_reward/centered_abs_mean": 0.002486881613731384, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7027777777777778, "signal/frontier_aurc_reward/group_std_mean": 0.0036755402106791735, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.108602177235298e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.108602177235298e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.04731389135122299, "signal/frontier_coverage_0/group_bin_occupancy": 0.7815972222222223, "signal/frontier_coverage_0/group_std_mean": 0.07089887708425521, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004731389414519072, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004731389414519072, "signal/frontier_coverage_1/centered_abs_mean": 0.04731389135122299, "signal/frontier_coverage_1/group_bin_occupancy": 0.7815972222222223, "signal/frontier_coverage_1/group_std_mean": 0.07089887708425521, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004731389414519072, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004731389414519072, "signal/frontier_coverage_10/centered_abs_mean": 0.04731389135122299, "signal/frontier_coverage_10/group_bin_occupancy": 0.7815972222222223, "signal/frontier_coverage_10/group_std_mean": 0.07089887708425521, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004731389414519072, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004731389414519072, "signal/frontier_coverage_15/centered_abs_mean": 0.04731389135122299, "signal/frontier_coverage_15/group_bin_occupancy": 0.7815972222222223, "signal/frontier_coverage_15/group_std_mean": 0.07089887708425521, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004731389414519072, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004731389414519072, "signal/frontier_coverage_20/centered_abs_mean": 0.04731389135122299, "signal/frontier_coverage_20/group_bin_occupancy": 0.7815972222222223, "signal/frontier_coverage_20/group_std_mean": 0.07089887708425521, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004731389414519072, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004731389414519072, "signal/frontier_coverage_25/centered_abs_mean": 0.04731389135122299, "signal/frontier_coverage_25/group_bin_occupancy": 0.7815972222222223, "signal/frontier_coverage_25/group_std_mean": 0.07089887708425521, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004731389414519072, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004731389414519072, "signal/frontier_coverage_5/centered_abs_mean": 0.04731389135122299, "signal/frontier_coverage_5/group_bin_occupancy": 0.7815972222222223, "signal/frontier_coverage_5/group_std_mean": 0.07089887708425521, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004731389414519072, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004731389414519072, "signal/frontier_ece_reward/centered_abs_mean": 0.1279986619949341, "signal/frontier_ece_reward/group_bin_occupancy": 0.6677083333333333, "signal/frontier_ece_reward/group_std_mean": 0.16064732670783996, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.012799866311252118, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.012799866311252118, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08364634066820145, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.24097222222222223, "signal/frontier_entropy_batch_reward/group_std_mean": 0.16521921157836914, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3694444537162781, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.008364634215831756, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.008364634215831756, "step": 25 }, { "calibration/aurc": 0.28677085956170895, "calibration/batch_distribution_entropy": 0.680235851538608, "calibration/batch_entropy_100bins": 0.4680978920662368, "calibration/batch_entropy_10bins": 0.680235851538608, "calibration/batch_entropy_50bins": 0.5495048823241069, "calibration/batch_uniqueness": 0.7290616274911208, "calibration/buffer_distribution_entropy": 0.47353364621032734, "calibration/buffer_entropy_100bins": 0.443610624399519, "calibration/buffer_entropy_10bins": 0.47353364621032734, "calibration/buffer_entropy_50bins": 0.5154921368659136, "calibration/confidence_entropy": 0.48697551131200045, "calibration/coverage@0%": 0.004244966999994339, "calibration/coverage@1%": 0.004244966999994339, "calibration/coverage@10%": 0.004244966999994339, "calibration/coverage@15%": 0.027774378764700226, "calibration/coverage@20%": 0.0572825754860117, "calibration/coverage@25%": 0.2620508574117625, "calibration/coverage@30%": 0.591682610683416, "calibration/coverage@5%": 0.004244966999994339, "calibration/ece": 0.13998204933279731, "calibration/mean_confidence": 0.7710976143451138, "calibration/prompt_uniqueness": 0.6168338382498494, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01519097222222221, "completions/max_length": 3910.8, "completions/max_terminated_length": 3910.8, "completions/mean_length": 618.71025390625, "completions/mean_terminated_length": 628.3085815429688, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.07199910001124986, "grad_norm": 0.0011630720691755414, "learning_rate": 3.5714285714285718e-06, "loss": -0.0095, "num_tokens": 54376719.0, "reward": 0.8367651104927063, "reward_std": 0.17993904650211334, "rewards/accuracy_reward": 0.5965277910232544, "rewards/brier_reward": 0.7198675394058227, "rewards/confidence_uniqueness_reward": 0.7175234079360961, "rewards/format_reward": 0.98359375, "rewards/frontier_aurc_reward": -0.003244720213115215, "rewards/frontier_coverage_0": -0.006340815802104771, "rewards/frontier_coverage_1": -0.006340815802104771, "rewards/frontier_coverage_10": -0.006340815802104771, "rewards/frontier_coverage_15": -0.006340815802104771, "rewards/frontier_coverage_20": -0.006340815802104771, "rewards/frontier_coverage_25": -0.006340815802104771, "rewards/frontier_coverage_5": -0.006340815802104771, "rewards/frontier_ece_reward": 0.02065478153526783, "rewards/frontier_entropy_batch_reward": -0.9462109446525574, "signal/accuracy_reward/centered_abs_mean": 0.2334526836872101, "signal/accuracy_reward/group_bin_occupancy": 0.22569444444444448, "signal/accuracy_reward/group_std_mean": 0.29840933680534365, "signal/accuracy_reward/group_zero_std_frac": 0.19444444626569748, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11672634184360504, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.11672634184360504, "signal/advantage_abs_mean": 0.13784168660640717, "signal/advantage_pre_scale_abs_mean": 0.13784168660640717, "signal/advantage_pre_scale_std": 0.1956336259841919, "signal/advantage_std": 0.1956336259841919, "signal/brier_reward/centered_abs_mean": 0.1639205902814865, "signal/brier_reward/group_bin_occupancy": 0.7708333333333334, "signal/brier_reward/group_std_mean": 0.20927065908908843, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01639205850660801, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01639205850660801, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.10898690223693848, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6690972222222222, "signal/confidence_uniqueness_reward/group_std_mean": 0.14019887149333954, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010898690670728683, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010898690670728683, "signal/format_reward/centered_abs_mean": 0.02789171002805233, "signal/format_reward/group_bin_occupancy": 0.15243055555555554, "signal/format_reward/group_std_mean": 0.05320079177618027, "signal/format_reward/group_zero_std_frac": 0.7805555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013945855014026166, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013945855014026166, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017630874179303646, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7194444444444444, "signal/frontier_aurc_reward/group_std_mean": 0.0026808131486177446, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2038593306206168e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2038593306206168e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.05981260240077972, "signal/frontier_coverage_0/group_bin_occupancy": 0.803125, "signal/frontier_coverage_0/group_std_mean": 0.08423706740140915, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005981260538101196, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005981260538101196, "signal/frontier_coverage_1/centered_abs_mean": 0.05981260240077972, "signal/frontier_coverage_1/group_bin_occupancy": 0.803125, "signal/frontier_coverage_1/group_std_mean": 0.08423706740140915, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005981260538101196, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005981260538101196, "signal/frontier_coverage_10/centered_abs_mean": 0.05981260240077972, "signal/frontier_coverage_10/group_bin_occupancy": 0.803125, "signal/frontier_coverage_10/group_std_mean": 0.08423706740140915, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005981260538101196, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005981260538101196, "signal/frontier_coverage_15/centered_abs_mean": 0.05981260240077972, "signal/frontier_coverage_15/group_bin_occupancy": 0.803125, "signal/frontier_coverage_15/group_std_mean": 0.08423706740140915, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005981260538101196, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005981260538101196, "signal/frontier_coverage_20/centered_abs_mean": 0.05981260240077972, "signal/frontier_coverage_20/group_bin_occupancy": 0.803125, "signal/frontier_coverage_20/group_std_mean": 0.08423706740140915, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005981260538101196, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005981260538101196, "signal/frontier_coverage_25/centered_abs_mean": 0.05981260240077972, "signal/frontier_coverage_25/group_bin_occupancy": 0.803125, "signal/frontier_coverage_25/group_std_mean": 0.08423706740140915, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005981260538101196, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005981260538101196, "signal/frontier_coverage_5/centered_abs_mean": 0.05981260240077972, "signal/frontier_coverage_5/group_bin_occupancy": 0.803125, "signal/frontier_coverage_5/group_std_mean": 0.08423706740140915, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005981260538101196, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005981260538101196, "signal/frontier_ece_reward/centered_abs_mean": 0.08587390631437301, "signal/frontier_ece_reward/group_bin_occupancy": 0.6788194444444444, "signal/frontier_ece_reward/group_std_mean": 0.11601630598306656, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008587390463799239, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008587390463799239, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09288413524627685, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.23993055555555554, "signal/frontier_entropy_batch_reward/group_std_mean": 0.18082668483257294, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3861111104488373, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.00928841382265091, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.00928841382265091, "step": 30 }, { "calibration/aurc": 0.22314636175619795, "calibration/batch_distribution_entropy": 0.6770926741352014, "calibration/batch_entropy_100bins": 0.4808851389304557, "calibration/batch_entropy_10bins": 0.6770926741352014, "calibration/batch_entropy_50bins": 0.560624595713065, "calibration/batch_uniqueness": 0.7191333949569602, "calibration/buffer_distribution_entropy": 0.5387056776964307, "calibration/buffer_entropy_100bins": 0.46868923324209544, "calibration/buffer_entropy_10bins": 0.5387056776964307, "calibration/buffer_entropy_50bins": 0.5452981940231859, "calibration/confidence_entropy": 0.46155691057526704, "calibration/coverage@0%": 0.006835227390771403, "calibration/coverage@1%": 0.006835227390771403, "calibration/coverage@10%": 0.12299082440394646, "calibration/coverage@15%": 0.19852194905110143, "calibration/coverage@20%": 0.43376444645744583, "calibration/coverage@25%": 0.5982394781787371, "calibration/coverage@30%": 0.8, "calibration/coverage@5%": 0.006835227390771403, "calibration/ece": 0.10934520056155941, "calibration/mean_confidence": 0.7851432360640994, "calibration/prompt_uniqueness": 0.5947078897595999, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022222222222222233, "completions/max_length": 3787.8, "completions/max_terminated_length": 3787.8, "completions/mean_length": 656.1105102539062, "completions/mean_terminated_length": 671.1053100585938, "completions/min_length": 0.0, "completions/min_terminated_length": 204.6, "epoch": 0.08399895001312484, "grad_norm": 0.0006542339688166976, "learning_rate": 4.166666666666667e-06, "loss": -0.0139, "num_tokens": 65012552.0, "reward": 0.859082019329071, "reward_std": 0.1734073728322983, "rewards/accuracy_reward": 0.6378472089767456, "rewards/brier_reward": 0.7479536890983581, "rewards/confidence_uniqueness_reward": 0.7046342015266418, "rewards/format_reward": 0.9759548664093017, "rewards/frontier_aurc_reward": -0.0026470940094441174, "rewards/frontier_coverage_0": -0.0049528153613209724, "rewards/frontier_coverage_1": -0.0049528153613209724, "rewards/frontier_coverage_10": -0.0049528153613209724, "rewards/frontier_coverage_15": -0.0049528153613209724, "rewards/frontier_coverage_20": -0.0049528153613209724, "rewards/frontier_coverage_25": -0.0049528153613209724, "rewards/frontier_coverage_5": -0.0049528153613209724, "rewards/frontier_ece_reward": 0.028249557688832284, "rewards/frontier_entropy_batch_reward": -0.9240273833274841, "signal/accuracy_reward/centered_abs_mean": 0.20604383647441865, "signal/accuracy_reward/group_bin_occupancy": 0.21597222222222223, "signal/accuracy_reward/group_std_mean": 0.26489012539386747, "signal/accuracy_reward/group_zero_std_frac": 0.27222221791744233, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10302191823720933, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10302191823720933, "signal/advantage_abs_mean": 0.13017865568399428, "signal/advantage_pre_scale_abs_mean": 0.13017865568399428, "signal/advantage_pre_scale_std": 0.19113859236240388, "signal/advantage_std": 0.19113859236240388, "signal/brier_reward/centered_abs_mean": 0.15287761092185975, "signal/brier_reward/group_bin_occupancy": 0.7430555555555556, "signal/brier_reward/group_std_mean": 0.19788565933704377, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015287761203944683, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015287761203944683, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.13704033493995665, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6673611111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.16784389913082123, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013704033941030503, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013704033941030503, "signal/format_reward/centered_abs_mean": 0.035107421875, "signal/format_reward/group_bin_occupancy": 0.15416666666666665, "signal/format_reward/group_std_mean": 0.060845568776130676, "signal/format_reward/group_zero_std_frac": 0.7666666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0175537109375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0175537109375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018307951977476478, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7076388888888889, "signal/frontier_aurc_reward/group_std_mean": 0.002829930419102311, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.28849399718456e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.28849399718456e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.060160938650369644, "signal/frontier_coverage_0/group_bin_occupancy": 0.7854166666666668, "signal/frontier_coverage_0/group_std_mean": 0.0857668623328209, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.006016094330698251, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.006016094330698251, "signal/frontier_coverage_1/centered_abs_mean": 0.060160938650369644, "signal/frontier_coverage_1/group_bin_occupancy": 0.7854166666666668, "signal/frontier_coverage_1/group_std_mean": 0.0857668623328209, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006016094330698251, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006016094330698251, "signal/frontier_coverage_10/centered_abs_mean": 0.060160938650369644, "signal/frontier_coverage_10/group_bin_occupancy": 0.7854166666666668, "signal/frontier_coverage_10/group_std_mean": 0.0857668623328209, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006016094330698251, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006016094330698251, "signal/frontier_coverage_15/centered_abs_mean": 0.060160938650369644, "signal/frontier_coverage_15/group_bin_occupancy": 0.7854166666666668, "signal/frontier_coverage_15/group_std_mean": 0.0857668623328209, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006016094330698251, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006016094330698251, "signal/frontier_coverage_20/centered_abs_mean": 0.060160938650369644, "signal/frontier_coverage_20/group_bin_occupancy": 0.7854166666666668, "signal/frontier_coverage_20/group_std_mean": 0.0857668623328209, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006016094330698251, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006016094330698251, "signal/frontier_coverage_25/centered_abs_mean": 0.060160938650369644, "signal/frontier_coverage_25/group_bin_occupancy": 0.7854166666666668, "signal/frontier_coverage_25/group_std_mean": 0.0857668623328209, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006016094330698251, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006016094330698251, "signal/frontier_coverage_5/centered_abs_mean": 0.060160938650369644, "signal/frontier_coverage_5/group_bin_occupancy": 0.7854166666666668, "signal/frontier_coverage_5/group_std_mean": 0.0857668623328209, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006016094330698251, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006016094330698251, "signal/frontier_ece_reward/centered_abs_mean": 0.07295427918434143, "signal/frontier_ece_reward/group_bin_occupancy": 0.6996527777777778, "signal/frontier_ece_reward/group_std_mean": 0.1002663567662239, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007295427843928337, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007295427843928337, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.12212611138820648, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.26041666666666663, "signal/frontier_entropy_batch_reward/group_std_mean": 0.22245014905929567, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.28055555522441866, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.012212611176073552, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.012212611176073552, "step": 35 }, { "calibration/aurc": 0.23646651151545467, "calibration/batch_distribution_entropy": 0.6921987832818134, "calibration/batch_entropy_100bins": 0.5484838042629623, "calibration/batch_entropy_10bins": 0.6921987832818134, "calibration/batch_entropy_50bins": 0.6198041888894421, "calibration/batch_uniqueness": 0.7559171649993487, "calibration/buffer_distribution_entropy": 0.5720327716762361, "calibration/buffer_entropy_100bins": 0.4857154481548491, "calibration/buffer_entropy_10bins": 0.5720327716762361, "calibration/buffer_entropy_50bins": 0.5634428126437894, "calibration/confidence_entropy": 0.4215632545793825, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.08446315789473684, "calibration/coverage@15%": 0.14356491228070176, "calibration/coverage@20%": 0.4184106637350557, "calibration/coverage@25%": 0.5767427952208551, "calibration/coverage@30%": 0.7565249757858411, "calibration/coverage@5%": 0.021333333333333336, "calibration/ece": 0.13757307498135848, "calibration/mean_confidence": 0.7957711268796196, "calibration/prompt_uniqueness": 0.6417560778644721, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017447916666666653, "completions/max_length": 3826.8, "completions/max_terminated_length": 3826.8, "completions/mean_length": 688.469970703125, "completions/mean_terminated_length": 700.6824340820312, "completions/min_length": 0.0, "completions/min_terminated_length": 203.4, "epoch": 0.09599880001499982, "grad_norm": 0.0027070348151028156, "learning_rate": 4.761904761904762e-06, "loss": -0.0145, "num_tokens": 76063246.0, "reward": 0.8858483791351318, "reward_std": 0.18059354424476623, "rewards/accuracy_reward": 0.643749988079071, "rewards/brier_reward": 0.7548895835876465, "rewards/confidence_uniqueness_reward": 0.7504566788673401, "rewards/format_reward": 0.9811631917953492, "rewards/frontier_aurc_reward": -0.0025548926088958977, "rewards/frontier_coverage_0": 0.005814270488917828, "rewards/frontier_coverage_1": 0.005814270488917828, "rewards/frontier_coverage_10": 0.005814270488917828, "rewards/frontier_coverage_15": 0.005814270488917828, "rewards/frontier_coverage_20": 0.005814270488917828, "rewards/frontier_coverage_25": 0.005814270488917828, "rewards/frontier_coverage_5": 0.005814270488917828, "rewards/frontier_ece_reward": 0.03743142113089561, "rewards/frontier_entropy_batch_reward": -0.8492406964302063, "signal/accuracy_reward/centered_abs_mean": 0.19522569477558135, "signal/accuracy_reward/group_bin_occupancy": 0.21597222222222223, "signal/accuracy_reward/group_std_mean": 0.258097830414772, "signal/accuracy_reward/group_zero_std_frac": 0.27222221791744233, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09761284738779068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09761284738779068, "signal/advantage_abs_mean": 0.13384985327720642, "signal/advantage_pre_scale_abs_mean": 0.13384985327720642, "signal/advantage_pre_scale_std": 0.20037249326705933, "signal/advantage_std": 0.20037249326705933, "signal/brier_reward/centered_abs_mean": 0.15923767983913423, "signal/brier_reward/group_bin_occupancy": 0.7100694444444444, "signal/brier_reward/group_std_mean": 0.20939326882362366, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01592376921325922, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01592376921325922, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.14255098551511763, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6333333333333333, "signal/confidence_uniqueness_reward/group_std_mean": 0.170270636677742, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01425509825348854, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01425509825348854, "signal/format_reward/centered_abs_mean": 0.03126627653837204, "signal/format_reward/group_bin_occupancy": 0.15243055555555557, "signal/format_reward/group_std_mean": 0.05585132986307144, "signal/format_reward/group_zero_std_frac": 0.7805555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01563313826918602, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01563313826918602, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027065142057836056, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6763888888888889, "signal/frontier_aurc_reward/group_std_mean": 0.004190942086279392, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3831426480901425e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3831426480901425e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.06825486421585084, "signal/frontier_coverage_0/group_bin_occupancy": 0.734375, "signal/frontier_coverage_0/group_std_mean": 0.10302471965551377, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0068254867568612095, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0068254867568612095, "signal/frontier_coverage_1/centered_abs_mean": 0.06825486421585084, "signal/frontier_coverage_1/group_bin_occupancy": 0.734375, "signal/frontier_coverage_1/group_std_mean": 0.10302471965551377, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0068254867568612095, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0068254867568612095, "signal/frontier_coverage_10/centered_abs_mean": 0.06825486421585084, "signal/frontier_coverage_10/group_bin_occupancy": 0.734375, "signal/frontier_coverage_10/group_std_mean": 0.10302471965551377, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0068254867568612095, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0068254867568612095, "signal/frontier_coverage_15/centered_abs_mean": 0.06825486421585084, "signal/frontier_coverage_15/group_bin_occupancy": 0.734375, "signal/frontier_coverage_15/group_std_mean": 0.10302471965551377, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0068254867568612095, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0068254867568612095, "signal/frontier_coverage_20/centered_abs_mean": 0.06825486421585084, "signal/frontier_coverage_20/group_bin_occupancy": 0.734375, "signal/frontier_coverage_20/group_std_mean": 0.10302471965551377, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0068254867568612095, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0068254867568612095, "signal/frontier_coverage_25/centered_abs_mean": 0.06825486421585084, "signal/frontier_coverage_25/group_bin_occupancy": 0.734375, "signal/frontier_coverage_25/group_std_mean": 0.10302471965551377, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0068254867568612095, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0068254867568612095, "signal/frontier_coverage_5/centered_abs_mean": 0.06825486421585084, "signal/frontier_coverage_5/group_bin_occupancy": 0.734375, "signal/frontier_coverage_5/group_std_mean": 0.10302471965551377, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0068254867568612095, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0068254867568612095, "signal/frontier_ece_reward/centered_abs_mean": 0.08857372999191285, "signal/frontier_ece_reward/group_bin_occupancy": 0.709375, "signal/frontier_ece_reward/group_std_mean": 0.12290655523538589, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008857373148202896, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008857373148202896, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22595709562301636, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.39618055555555554, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34426335990428925, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06944444496184587, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022595709562301634, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022595709562301634, "step": 40 }, { "calibration/aurc": 0.2071995810953097, "calibration/batch_distribution_entropy": 0.7874181292806145, "calibration/batch_entropy_100bins": 0.7681012233343304, "calibration/batch_entropy_10bins": 0.7874181292806145, "calibration/batch_entropy_50bins": 0.7977830057272545, "calibration/batch_uniqueness": 0.8884160159309301, "calibration/buffer_distribution_entropy": 0.5995247243762318, "calibration/buffer_entropy_100bins": 0.5220286299888405, "calibration/buffer_entropy_10bins": 0.5995247243762318, "calibration/buffer_entropy_50bins": 0.5964127665323024, "calibration/confidence_entropy": 0.3926311926708884, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.09383084341533095, "calibration/coverage@15%": 0.2434566848971281, "calibration/coverage@20%": 0.4486376535129998, "calibration/coverage@25%": 0.8128549818899113, "calibration/coverage@30%": 0.9794736842105263, "calibration/coverage@5%": 0.036011080332409975, "calibration/ece": 0.13640778164325235, "calibration/mean_confidence": 0.754592191385538, "calibration/prompt_uniqueness": 0.7768243776261947, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020486111111111115, "completions/max_length": 3911.0, "completions/max_terminated_length": 3911.0, "completions/mean_length": 725.5782104492188, "completions/mean_terminated_length": 740.8074096679687, "completions/min_length": 0.0, "completions/min_terminated_length": 221.6, "epoch": 0.1079986500168748, "grad_norm": 0.0006444323225878179, "learning_rate": 4.909638554216868e-06, "loss": -0.0187, "num_tokens": 87557171.0, "reward": 0.933354115486145, "reward_std": 0.19789280295372008, "rewards/accuracy_reward": 0.6422742962837219, "rewards/brier_reward": 0.7597587704658508, "rewards/confidence_uniqueness_reward": 0.8830222606658935, "rewards/format_reward": 0.9785590291023254, "rewards/frontier_aurc_reward": -0.002267755405046046, "rewards/frontier_coverage_0": 0.01341271074488759, "rewards/frontier_coverage_1": 0.01341271074488759, "rewards/frontier_coverage_10": 0.01341271074488759, "rewards/frontier_coverage_15": 0.01341271074488759, "rewards/frontier_coverage_20": 0.01341271074488759, "rewards/frontier_coverage_25": 0.01341271074488759, "rewards/frontier_coverage_5": 0.01341271074488759, "rewards/frontier_ece_reward": 0.02467528488487005, "rewards/frontier_entropy_batch_reward": -0.5316874802112579, "signal/accuracy_reward/centered_abs_mean": 0.20063476860523224, "signal/accuracy_reward/group_bin_occupancy": 0.21944444444444447, "signal/accuracy_reward/group_std_mean": 0.26551105082035065, "signal/accuracy_reward/group_zero_std_frac": 0.2444444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10031738430261612, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10031738430261612, "signal/advantage_abs_mean": 0.14899895191192628, "signal/advantage_pre_scale_abs_mean": 0.14899895191192628, "signal/advantage_pre_scale_std": 0.21511842608451842, "signal/advantage_std": 0.21511842608451842, "signal/brier_reward/centered_abs_mean": 0.1772002249956131, "signal/brier_reward/group_bin_occupancy": 0.7756944444444445, "signal/brier_reward/group_std_mean": 0.22932115197181702, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017720023915171624, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017720023915171624, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0835119254887104, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6239583333333334, "signal/confidence_uniqueness_reward/group_std_mean": 0.11745427399873734, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008351192437112331, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008351192437112331, "signal/format_reward/centered_abs_mean": 0.03505316786468029, "signal/format_reward/group_bin_occupancy": 0.1579861111111111, "signal/format_reward/group_std_mean": 0.06526159271597862, "signal/format_reward/group_zero_std_frac": 0.7361111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.017526583932340144, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.017526583932340144, "signal/frontier_aurc_reward/centered_abs_mean": 0.0030290879774838688, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6743055555555555, "signal/frontier_aurc_reward/group_std_mean": 0.004711134731769562, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7863600300624964e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7863600300624964e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.13084534853696822, "signal/frontier_coverage_0/group_bin_occupancy": 0.7878472222222223, "signal/frontier_coverage_0/group_std_mean": 0.18352725505828857, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.013084535114467144, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.013084535114467144, "signal/frontier_coverage_1/centered_abs_mean": 0.13084534853696822, "signal/frontier_coverage_1/group_bin_occupancy": 0.7878472222222223, "signal/frontier_coverage_1/group_std_mean": 0.18352725505828857, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.013084535114467144, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.013084535114467144, "signal/frontier_coverage_10/centered_abs_mean": 0.13084534853696822, "signal/frontier_coverage_10/group_bin_occupancy": 0.7878472222222223, "signal/frontier_coverage_10/group_std_mean": 0.18352725505828857, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013084535114467144, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.013084535114467144, "signal/frontier_coverage_15/centered_abs_mean": 0.13084534853696822, "signal/frontier_coverage_15/group_bin_occupancy": 0.7878472222222223, "signal/frontier_coverage_15/group_std_mean": 0.18352725505828857, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013084535114467144, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.013084535114467144, "signal/frontier_coverage_20/centered_abs_mean": 0.13084534853696822, "signal/frontier_coverage_20/group_bin_occupancy": 0.7878472222222223, "signal/frontier_coverage_20/group_std_mean": 0.18352725505828857, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.013084535114467144, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.013084535114467144, "signal/frontier_coverage_25/centered_abs_mean": 0.13084534853696822, "signal/frontier_coverage_25/group_bin_occupancy": 0.7878472222222223, "signal/frontier_coverage_25/group_std_mean": 0.18352725505828857, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.013084535114467144, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.013084535114467144, "signal/frontier_coverage_5/centered_abs_mean": 0.13084534853696822, "signal/frontier_coverage_5/group_bin_occupancy": 0.7878472222222223, "signal/frontier_coverage_5/group_std_mean": 0.18352725505828857, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.013084535114467144, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.013084535114467144, "signal/frontier_ece_reward/centered_abs_mean": 0.08225937336683273, "signal/frontier_ece_reward/group_bin_occupancy": 0.7489583333333334, "signal/frontier_ece_reward/group_std_mean": 0.12157966494560242, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008225937373936176, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008225937373936176, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.39211310148239137, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.685763888888889, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4667708516120911, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0392113134264946, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0392113134264946, "step": 45 }, { "calibration/aurc": 0.3759205151272904, "calibration/batch_distribution_entropy": 0.9313989667528583, "calibration/batch_entropy_100bins": 0.9212269758760405, "calibration/batch_entropy_10bins": 0.9313989667528583, "calibration/batch_entropy_50bins": 0.9342826864618716, "calibration/batch_uniqueness": 0.9449244879054509, "calibration/buffer_distribution_entropy": 0.6569622120148912, "calibration/buffer_entropy_100bins": 0.599770542385045, "calibration/buffer_entropy_10bins": 0.6569622120148912, "calibration/buffer_entropy_50bins": 0.6646205868242869, "calibration/confidence_entropy": 0.424989166098224, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.007407407407407407, "calibration/coverage@15%": 0.01596355714002773, "calibration/coverage@20%": 0.04248083071612483, "calibration/coverage@25%": 0.0968950909656792, "calibration/coverage@30%": 0.3587556687321393, "calibration/coverage@5%": 0.0, "calibration/ece": 0.22700673017839854, "calibration/mean_confidence": 0.608741047048681, "calibration/prompt_uniqueness": 0.8550910596171892, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017708333333333347, "completions/max_length": 3661.0, "completions/max_terminated_length": 3661.0, "completions/mean_length": 720.4263061523437, "completions/mean_terminated_length": 733.3709228515625, "completions/min_length": 0.0, "completions/min_terminated_length": 204.4, "epoch": 0.11999850001874976, "grad_norm": 0.0005708423559553921, "learning_rate": 4.759036144578314e-06, "loss": -0.0213, "num_tokens": 98954082.0, "reward": 0.9437960147857666, "reward_std": 0.213465416431427, "rewards/accuracy_reward": 0.6332465291023255, "rewards/brier_reward": 0.7396142482757568, "rewards/confidence_uniqueness_reward": 0.9182653784751892, "rewards/format_reward": 0.9818576335906982, "rewards/frontier_aurc_reward": -0.002504592388868332, "rewards/frontier_coverage_0": 0.00757271870970726, "rewards/frontier_coverage_1": 0.00757271870970726, "rewards/frontier_coverage_10": 0.00757271870970726, "rewards/frontier_coverage_15": 0.00757271870970726, "rewards/frontier_coverage_20": 0.00757271870970726, "rewards/frontier_coverage_25": 0.00757271870970726, "rewards/frontier_coverage_5": 0.00757271870970726, "rewards/frontier_ece_reward": 0.012652286747470497, "rewards/frontier_entropy_batch_reward": -0.3607885718345642, "signal/accuracy_reward/centered_abs_mean": 0.19530707597732544, "signal/accuracy_reward/group_bin_occupancy": 0.21215277777777777, "signal/accuracy_reward/group_std_mean": 0.2530734747648239, "signal/accuracy_reward/group_zero_std_frac": 0.30277777910232545, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09765353798866272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09765353798866272, "signal/advantage_abs_mean": 0.16423482298851014, "signal/advantage_pre_scale_abs_mean": 0.16423482298851014, "signal/advantage_pre_scale_std": 0.231108620762825, "signal/advantage_std": 0.231108620762825, "signal/brier_reward/centered_abs_mean": 0.19700363278388977, "signal/brier_reward/group_bin_occupancy": 0.8135416666666666, "signal/brier_reward/group_std_mean": 0.24845842123031617, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019700363650918006, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019700363650918006, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05617346540093422, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.732638888888889, "signal/confidence_uniqueness_reward/group_std_mean": 0.08689026236534118, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005617346568033099, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005617346568033099, "signal/format_reward/centered_abs_mean": 0.02997504323720932, "signal/format_reward/group_bin_occupancy": 0.15381944444444445, "signal/format_reward/group_std_mean": 0.05596600547432899, "signal/format_reward/group_zero_std_frac": 0.7694444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01498752161860466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01498752161860466, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026452220510691403, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6597222222222222, "signal/frontier_aurc_reward/group_std_mean": 0.004009249992668629, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3065275420085524e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3065275420085524e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19767657220363616, "signal/frontier_coverage_0/group_bin_occupancy": 0.8114583333333332, "signal/frontier_coverage_0/group_std_mean": 0.267059126496315, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.019767657667398453, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.019767657667398453, "signal/frontier_coverage_1/centered_abs_mean": 0.19767657220363616, "signal/frontier_coverage_1/group_bin_occupancy": 0.8114583333333332, "signal/frontier_coverage_1/group_std_mean": 0.267059126496315, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.019767657667398453, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.019767657667398453, "signal/frontier_coverage_10/centered_abs_mean": 0.19767657220363616, "signal/frontier_coverage_10/group_bin_occupancy": 0.8114583333333332, "signal/frontier_coverage_10/group_std_mean": 0.267059126496315, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.019767657667398453, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.019767657667398453, "signal/frontier_coverage_15/centered_abs_mean": 0.19767657220363616, "signal/frontier_coverage_15/group_bin_occupancy": 0.8114583333333332, "signal/frontier_coverage_15/group_std_mean": 0.267059126496315, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.019767657667398453, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.019767657667398453, "signal/frontier_coverage_20/centered_abs_mean": 0.19767657220363616, "signal/frontier_coverage_20/group_bin_occupancy": 0.8114583333333332, "signal/frontier_coverage_20/group_std_mean": 0.267059126496315, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019767657667398453, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.019767657667398453, "signal/frontier_coverage_25/centered_abs_mean": 0.19767657220363616, "signal/frontier_coverage_25/group_bin_occupancy": 0.8114583333333332, "signal/frontier_coverage_25/group_std_mean": 0.267059126496315, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.019767657667398453, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.019767657667398453, "signal/frontier_coverage_5/centered_abs_mean": 0.19767657220363616, "signal/frontier_coverage_5/group_bin_occupancy": 0.8114583333333332, "signal/frontier_coverage_5/group_std_mean": 0.267059126496315, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.019767657667398453, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.019767657667398453, "signal/frontier_ece_reward/centered_abs_mean": 0.05850343108177185, "signal/frontier_ece_reward/group_bin_occupancy": 0.8170138888888889, "signal/frontier_ece_reward/group_std_mean": 0.0853449210524559, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005850343313068151, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005850343313068151, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3828676402568817, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7503472222222222, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4532123267650604, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.038286763429641726, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038286763429641726, "step": 50 }, { "epoch": 0.11999850001874976, "eval_calibration/aurc": 0.23050248842126433, "eval_calibration/batch_distribution_entropy": 0.8016154652334767, "eval_calibration/batch_entropy_100bins": 0.6871765896490878, "eval_calibration/batch_entropy_10bins": 0.8016154652334767, "eval_calibration/batch_entropy_50bins": 0.7412875223322276, "eval_calibration/batch_uniqueness": 0.8857323642906695, "eval_calibration/buffer_distribution_entropy": 0.6890677434360816, "eval_calibration/buffer_entropy_100bins": 0.6437866546226053, "eval_calibration/buffer_entropy_10bins": 0.6890677434360816, "eval_calibration/buffer_entropy_50bins": 0.7017015832612122, "eval_calibration/confidence_entropy": 0.40555521402257555, "eval_calibration/coverage@0%": 0.10131048387096775, "eval_calibration/coverage@1%": 0.10131048387096775, "eval_calibration/coverage@10%": 0.21959005376344085, "eval_calibration/coverage@15%": 0.34677419354838707, "eval_calibration/coverage@20%": 0.6031586021505376, "eval_calibration/coverage@25%": 0.7580645161290324, "eval_calibration/coverage@30%": 0.926747311827957, "eval_calibration/coverage@5%": 0.10131048387096775, "eval_calibration/ece": 0.23310729201976518, "eval_calibration/mean_confidence": 0.6996752747878766, "eval_calibration/prompt_uniqueness": 0.8857323642906695, "eval_completions/clipped_ratio": 0.013888888888888876, "eval_completions/max_length": 2195.1666666666665, "eval_completions/max_terminated_length": 2195.1666666666665, "eval_completions/mean_length": 699.8819681803385, "eval_completions/mean_terminated_length": 709.8047892252604, "eval_completions/min_length": 55.666666666666664, "eval_completions/min_terminated_length": 263.5, "eval_loss": 0.0, "eval_num_tokens": 98954082.0, "eval_reward": 0.9046729604403178, "eval_reward_std": 0.2568800052007039, "eval_rewards/accuracy_reward": 0.6414930621782938, "eval_rewards/brier_reward": 0.7675978740056356, "eval_rewards/confidence_uniqueness_reward": 0.8675300975640615, "eval_rewards/format_reward": 0.9861111044883728, "eval_rewards/frontier_aurc_reward": -0.002697653331172963, "eval_rewards/frontier_coverage_0": 0.034657815316071115, "eval_rewards/frontier_coverage_1": 0.034657815316071115, "eval_rewards/frontier_coverage_10": 0.034657815316071115, "eval_rewards/frontier_coverage_15": 0.034657815316071115, "eval_rewards/frontier_coverage_20": 0.034657815316071115, "eval_rewards/frontier_coverage_25": 0.034657815316071115, "eval_rewards/frontier_coverage_5": 0.034657815316071115, "eval_rewards/frontier_ece_reward": 0.01742413399430613, "eval_rewards/frontier_entropy_batch_reward": -0.9861111044883728, "eval_runtime": 190.0415, "eval_samples_per_second": 5.262, "eval_signal/accuracy_reward/centered_abs_mean": 0.4460177967945735, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4791330099105835, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22300889839728674, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22300889839728674, "eval_signal/advantage_abs_mean": 0.2126754273970922, "eval_signal/advantage_pre_scale_abs_mean": 0.2126754273970922, "eval_signal/advantage_pre_scale_std": 0.2553383409976959, "eval_signal/advantage_std": 0.2553383409976959, "eval_signal/brier_reward/centered_abs_mean": 0.2521931653221448, "eval_signal/brier_reward/group_bin_occupancy": 0.8506944444444445, "eval_signal/brier_reward/group_std_mean": 0.31014010310173035, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025219315973420937, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.025219315973420937, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07159827401240666, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.4791666666666666, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.11320321013530095, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007159827587505181, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007159827587505181, "eval_signal/format_reward/centered_abs_mean": 0.026692708333333332, "eval_signal/format_reward/group_bin_occupancy": 0.17361111111111108, "eval_signal/format_reward/group_std_mean": 0.07258860146005948, "eval_signal/format_reward/group_zero_std_frac": 0.611111119389534, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.013346354166666666, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.013346354166666666, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0034897019310543933, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.5729166666666666, "eval_signal/frontier_aurc_reward/group_std_mean": 0.006318512372672558, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.362127553273846e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.362127553273846e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.20838888734579086, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.8645833333333335, "eval_signal/frontier_coverage_0/group_std_mean": 0.3282380948464076, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02083888774116834, "eval_signal/frontier_coverage_0/weight": 0.10000000149011612, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.02083888774116834, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.20838888734579086, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.8645833333333335, "eval_signal/frontier_coverage_1/group_std_mean": 0.3282380948464076, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02083888774116834, "eval_signal/frontier_coverage_1/weight": 0.10000000149011612, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.02083888774116834, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.20838888734579086, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.8645833333333335, "eval_signal/frontier_coverage_10/group_std_mean": 0.3282380948464076, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02083888774116834, "eval_signal/frontier_coverage_10/weight": 0.10000000149011612, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.02083888774116834, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.20838888734579086, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.8645833333333335, "eval_signal/frontier_coverage_15/group_std_mean": 0.3282380948464076, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02083888774116834, "eval_signal/frontier_coverage_15/weight": 0.10000000149011612, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.02083888774116834, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.20838888734579086, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.8645833333333335, "eval_signal/frontier_coverage_20/group_std_mean": 0.3282380948464076, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02083888774116834, "eval_signal/frontier_coverage_20/weight": 0.10000000149011612, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.02083888774116834, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.20838888734579086, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.8645833333333335, "eval_signal/frontier_coverage_25/group_std_mean": 0.3282380948464076, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02083888774116834, "eval_signal/frontier_coverage_25/weight": 0.10000000149011612, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.02083888774116834, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.20838888734579086, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.8645833333333335, "eval_signal/frontier_coverage_5/group_std_mean": 0.3282380948464076, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02083888774116834, "eval_signal/frontier_coverage_5/weight": 0.10000000149011612, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.02083888774116834, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.04939149754742781, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9027777777777777, "eval_signal/frontier_ece_reward/group_std_mean": 0.07122303297122319, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004939149754742782, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004939149754742782, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.026692708333333332, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.17361111111111108, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.07258860146005948, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.611111119389534, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.002669270926465591, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.002669270926465591, "eval_steps_per_second": 0.032, "step": 50 }, { "calibration/aurc": 0.24374448404841295, "calibration/batch_distribution_entropy": 0.9289752783640071, "calibration/batch_entropy_100bins": 0.9246578251741718, "calibration/batch_entropy_10bins": 0.9289752783640071, "calibration/batch_entropy_50bins": 0.9365362505281345, "calibration/batch_uniqueness": 0.9475130563940756, "calibration/buffer_distribution_entropy": 0.7049394681050327, "calibration/buffer_entropy_100bins": 0.6685656833127869, "calibration/buffer_entropy_10bins": 0.7049394681050327, "calibration/buffer_entropy_50bins": 0.7214629566595602, "calibration/confidence_entropy": 0.4460111133708583, "calibration/coverage@0%": 0.01389920880632955, "calibration/coverage@1%": 0.01389920880632955, "calibration/coverage@10%": 0.04575679394564843, "calibration/coverage@15%": 0.17181752678656706, "calibration/coverage@20%": 0.5509832004253057, "calibration/coverage@25%": 0.6381154642399224, "calibration/coverage@30%": 0.7024434499796729, "calibration/coverage@5%": 0.026581449166588483, "calibration/ece": 0.1462999248914844, "calibration/mean_confidence": 0.6280517518430525, "calibration/prompt_uniqueness": 0.8650267205744318, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01692708333333335, "completions/max_length": 3483.2, "completions/max_terminated_length": 3483.2, "completions/mean_length": 713.0258666992188, "completions/mean_terminated_length": 725.3185424804688, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.13199835002062474, "grad_norm": 0.0005794555763714015, "learning_rate": 4.60843373493976e-06, "loss": -0.02, "num_tokens": 110248716.0, "reward": 0.9654149889945984, "reward_std": 0.20734579861164093, "rewards/accuracy_reward": 0.6355902910232544, "rewards/brier_reward": 0.7653455853462219, "rewards/confidence_uniqueness_reward": 0.9295515418052673, "rewards/format_reward": 0.9824652910232544, "rewards/frontier_aurc_reward": -0.0020523636834695936, "rewards/frontier_coverage_0": 0.0300428228918463, "rewards/frontier_coverage_1": 0.0300428228918463, "rewards/frontier_coverage_10": 0.0300428228918463, "rewards/frontier_coverage_15": 0.0300428228918463, "rewards/frontier_coverage_20": 0.0300428228918463, "rewards/frontier_coverage_25": 0.0300428228918463, "rewards/frontier_coverage_5": 0.0300428228918463, "rewards/frontier_ece_reward": 0.012284515798091889, "rewards/frontier_entropy_batch_reward": -0.35335326194763184, "signal/accuracy_reward/centered_abs_mean": 0.18991970717906953, "signal/accuracy_reward/group_bin_occupancy": 0.2125, "signal/accuracy_reward/group_std_mean": 0.24853154718875886, "signal/accuracy_reward/group_zero_std_frac": 0.30000001192092896, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09495985358953477, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09495985358953477, "signal/advantage_abs_mean": 0.16074307560920714, "signal/advantage_pre_scale_abs_mean": 0.16074307560920714, "signal/advantage_pre_scale_std": 0.22943655252456666, "signal/advantage_std": 0.22943655252456666, "signal/brier_reward/centered_abs_mean": 0.18310473561286927, "signal/brier_reward/group_bin_occupancy": 0.8090277777777779, "signal/brier_reward/group_std_mean": 0.23206418752670288, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01831047348678112, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01831047348678112, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.045183032751083374, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7715277777777778, "signal/confidence_uniqueness_reward/group_std_mean": 0.07202807888388633, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004518303461372853, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004518303461372853, "signal/format_reward/centered_abs_mean": 0.02825520820915699, "signal/format_reward/group_bin_occupancy": 0.15104166666666669, "signal/format_reward/group_std_mean": 0.05186620131134987, "signal/format_reward/group_zero_std_frac": 0.7916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014127604104578495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014127604104578495, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017425427678972483, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6809027777777777, "signal/frontier_aurc_reward/group_std_mean": 0.002860198658891022, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1781784744234757e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1781784744234757e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.20096865594387053, "signal/frontier_coverage_0/group_bin_occupancy": 0.804513888888889, "signal/frontier_coverage_0/group_std_mean": 0.2704664647579193, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02009686529636383, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.02009686529636383, "signal/frontier_coverage_1/centered_abs_mean": 0.20096865594387053, "signal/frontier_coverage_1/group_bin_occupancy": 0.804513888888889, "signal/frontier_coverage_1/group_std_mean": 0.2704664647579193, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02009686529636383, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.02009686529636383, "signal/frontier_coverage_10/centered_abs_mean": 0.20096865594387053, "signal/frontier_coverage_10/group_bin_occupancy": 0.804513888888889, "signal/frontier_coverage_10/group_std_mean": 0.2704664647579193, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02009686529636383, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.02009686529636383, "signal/frontier_coverage_15/centered_abs_mean": 0.20096865594387053, "signal/frontier_coverage_15/group_bin_occupancy": 0.804513888888889, "signal/frontier_coverage_15/group_std_mean": 0.2704664647579193, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02009686529636383, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.02009686529636383, "signal/frontier_coverage_20/centered_abs_mean": 0.20096865594387053, "signal/frontier_coverage_20/group_bin_occupancy": 0.804513888888889, "signal/frontier_coverage_20/group_std_mean": 0.2704664647579193, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02009686529636383, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.02009686529636383, "signal/frontier_coverage_25/centered_abs_mean": 0.20096865594387053, "signal/frontier_coverage_25/group_bin_occupancy": 0.804513888888889, "signal/frontier_coverage_25/group_std_mean": 0.2704664647579193, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02009686529636383, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.02009686529636383, "signal/frontier_coverage_5/centered_abs_mean": 0.20096865594387053, "signal/frontier_coverage_5/group_bin_occupancy": 0.804513888888889, "signal/frontier_coverage_5/group_std_mean": 0.2704664647579193, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02009686529636383, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.02009686529636383, "signal/frontier_ece_reward/centered_abs_mean": 0.038172975182533264, "signal/frontier_ece_reward/group_bin_occupancy": 0.83125, "signal/frontier_ece_reward/group_std_mean": 0.055047205090522765, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0038172977045178415, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0038172977045178415, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.36611982583999636, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7548611111111112, "signal/frontier_entropy_batch_reward/group_std_mean": 0.43662421107292176, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.036611984670162204, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.036611984670162204, "step": 55 }, { "calibration/aurc": 0.286214721782296, "calibration/batch_distribution_entropy": 0.9343979772512739, "calibration/batch_entropy_100bins": 0.9207935974888588, "calibration/batch_entropy_10bins": 0.9343979772512739, "calibration/batch_entropy_50bins": 0.9404285160285129, "calibration/batch_uniqueness": 0.9529432818966646, "calibration/buffer_distribution_entropy": 0.741460205758367, "calibration/buffer_entropy_100bins": 0.7172425168033953, "calibration/buffer_entropy_10bins": 0.741460205758367, "calibration/buffer_entropy_50bins": 0.7623217046840285, "calibration/confidence_entropy": 0.4786053268474739, "calibration/coverage@0%": 0.006835482317912184, "calibration/coverage@1%": 0.006835482317912184, "calibration/coverage@10%": 0.08199339007752038, "calibration/coverage@15%": 0.3267279418616805, "calibration/coverage@20%": 0.3989069448826226, "calibration/coverage@25%": 0.4664435090943801, "calibration/coverage@30%": 0.7006002191928997, "calibration/coverage@5%": 0.006835482317912184, "calibration/ece": 0.16097266382692185, "calibration/mean_confidence": 0.6059373040768069, "calibration/prompt_uniqueness": 0.8695281357757139, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011458333333333326, "completions/max_length": 3914.0, "completions/max_terminated_length": 3914.0, "completions/mean_length": 709.5424438476563, "completions/mean_terminated_length": 717.8249145507813, "completions/min_length": 0.0, "completions/min_terminated_length": 187.2, "epoch": 0.14399820002249972, "grad_norm": 0.00047888929839245975, "learning_rate": 4.457831325301205e-06, "loss": -0.0162, "num_tokens": 121519221.0, "reward": 0.9763705134391785, "reward_std": 0.196681210398674, "rewards/accuracy_reward": 0.615711796283722, "rewards/brier_reward": 0.7821933507919312, "rewards/confidence_uniqueness_reward": 0.9381864786148071, "rewards/format_reward": 0.9881944537162781, "rewards/frontier_aurc_reward": -0.001919442624785006, "rewards/frontier_coverage_0": 0.06152722500264644, "rewards/frontier_coverage_1": 0.06152722500264644, "rewards/frontier_coverage_10": 0.06152722500264644, "rewards/frontier_coverage_15": 0.06152722500264644, "rewards/frontier_coverage_20": 0.06152722500264644, "rewards/frontier_coverage_25": 0.06152722500264644, "rewards/frontier_coverage_5": 0.06152722500264644, "rewards/frontier_ece_reward": 0.011796734295785427, "rewards/frontier_entropy_batch_reward": -0.41845354437828064, "signal/accuracy_reward/centered_abs_mean": 0.2031087249517441, "signal/accuracy_reward/group_bin_occupancy": 0.21458333333333335, "signal/accuracy_reward/group_std_mean": 0.2622006803750992, "signal/accuracy_reward/group_zero_std_frac": 0.28333333432674407, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10155436247587205, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10155436247587205, "signal/advantage_abs_mean": 0.15023626685142516, "signal/advantage_pre_scale_abs_mean": 0.15023626685142516, "signal/advantage_pre_scale_std": 0.21854868531227112, "signal/advantage_std": 0.21854868531227112, "signal/brier_reward/centered_abs_mean": 0.16674597859382628, "signal/brier_reward/group_bin_occupancy": 0.8020833333333334, "signal/brier_reward/group_std_mean": 0.21431083381175994, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016674598678946495, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016674598678946495, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.038017303496599195, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7871527777777778, "signal/confidence_uniqueness_reward/group_std_mean": 0.0630945160984993, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038017303217202426, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038017303217202426, "signal/format_reward/centered_abs_mean": 0.02126736082136631, "signal/format_reward/group_bin_occupancy": 0.14895833333333333, "signal/format_reward/group_std_mean": 0.043730095773935315, "signal/format_reward/group_zero_std_frac": 0.8083333492279052, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010633680410683155, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010633680410683155, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013663872377946973, "signal/frontier_aurc_reward/group_bin_occupancy": 0.704861111111111, "signal/frontier_aurc_reward/group_std_mean": 0.002126425364986062, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7079840290534776e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7079840290534776e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19963845312595369, "signal/frontier_coverage_0/group_bin_occupancy": 0.8038194444444444, "signal/frontier_coverage_0/group_std_mean": 0.27037686109542847, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.019963844493031502, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.019963844493031502, "signal/frontier_coverage_1/centered_abs_mean": 0.19963845312595369, "signal/frontier_coverage_1/group_bin_occupancy": 0.8038194444444444, "signal/frontier_coverage_1/group_std_mean": 0.27037686109542847, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.019963844493031502, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.019963844493031502, "signal/frontier_coverage_10/centered_abs_mean": 0.19963845312595369, "signal/frontier_coverage_10/group_bin_occupancy": 0.8038194444444444, "signal/frontier_coverage_10/group_std_mean": 0.27037686109542847, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.019963844493031502, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.019963844493031502, "signal/frontier_coverage_15/centered_abs_mean": 0.19963845312595369, "signal/frontier_coverage_15/group_bin_occupancy": 0.8038194444444444, "signal/frontier_coverage_15/group_std_mean": 0.27037686109542847, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.019963844493031502, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.019963844493031502, "signal/frontier_coverage_20/centered_abs_mean": 0.19963845312595369, "signal/frontier_coverage_20/group_bin_occupancy": 0.8038194444444444, "signal/frontier_coverage_20/group_std_mean": 0.27037686109542847, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019963844493031502, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.019963844493031502, "signal/frontier_coverage_25/centered_abs_mean": 0.19963845312595369, "signal/frontier_coverage_25/group_bin_occupancy": 0.8038194444444444, "signal/frontier_coverage_25/group_std_mean": 0.27037686109542847, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.019963844493031502, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.019963844493031502, "signal/frontier_coverage_5/centered_abs_mean": 0.19963845312595369, "signal/frontier_coverage_5/group_bin_occupancy": 0.8038194444444444, "signal/frontier_coverage_5/group_std_mean": 0.27037686109542847, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.019963844493031502, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.019963844493031502, "signal/frontier_ece_reward/centered_abs_mean": 0.03020486868917942, "signal/frontier_ece_reward/group_bin_occupancy": 0.8215277777777776, "signal/frontier_ece_reward/group_std_mean": 0.04409870654344559, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030204871203750373, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030204871203750373, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3919844150543213, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7600694444444444, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4584620654582977, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.039198441058397294, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.039198441058397294, "step": 60 }, { "calibration/aurc": 0.1940140969079807, "calibration/batch_distribution_entropy": 0.8938569295594766, "calibration/batch_entropy_100bins": 0.9033844517682443, "calibration/batch_entropy_10bins": 0.8938569295594766, "calibration/batch_entropy_50bins": 0.915012654661888, "calibration/batch_uniqueness": 0.943383837453504, "calibration/buffer_distribution_entropy": 0.7683503092367889, "calibration/buffer_entropy_100bins": 0.7539086742860022, "calibration/buffer_entropy_10bins": 0.7683503092367889, "calibration/buffer_entropy_50bins": 0.793019939892889, "calibration/confidence_entropy": 0.40771152928483917, "calibration/coverage@0%": 0.016897250780865843, "calibration/coverage@1%": 0.016897250780865843, "calibration/coverage@10%": 0.3848949748426148, "calibration/coverage@15%": 0.5550847520854665, "calibration/coverage@20%": 0.6380853427409805, "calibration/coverage@25%": 0.6896129223874049, "calibration/coverage@30%": 0.7316552557390255, "calibration/coverage@5%": 0.15476867787235124, "calibration/ece": 0.1408602524415273, "calibration/mean_confidence": 0.607529479335007, "calibration/prompt_uniqueness": 0.8356702266069405, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009114583333333325, "completions/max_length": 3560.6, "completions/max_terminated_length": 3560.6, "completions/mean_length": 672.5094848632813, "completions/mean_terminated_length": 678.7419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 157.8, "epoch": 0.1559980500243747, "grad_norm": 0.0007051236461848021, "learning_rate": 4.307228915662651e-06, "loss": -0.0121, "num_tokens": 132360578.0, "reward": 0.9930898666381835, "reward_std": 0.20509625375270843, "rewards/accuracy_reward": 0.6331597208976746, "rewards/brier_reward": 0.7913673639297485, "rewards/confidence_uniqueness_reward": 0.9348729729652405, "rewards/format_reward": 0.9905381917953491, "rewards/frontier_aurc_reward": -0.0016889730701223015, "rewards/frontier_coverage_0": 0.07102360390126705, "rewards/frontier_coverage_1": 0.07102360390126705, "rewards/frontier_coverage_10": 0.07102360390126705, "rewards/frontier_coverage_15": 0.07102360390126705, "rewards/frontier_coverage_20": 0.07102360390126705, "rewards/frontier_coverage_25": 0.07102360390126705, "rewards/frontier_coverage_5": 0.07102360390126705, "rewards/frontier_ece_reward": 0.013031562231481076, "rewards/frontier_entropy_batch_reward": -0.4238172650337219, "signal/accuracy_reward/centered_abs_mean": 0.18643662929534913, "signal/accuracy_reward/group_bin_occupancy": 0.21354166666666669, "signal/accuracy_reward/group_std_mean": 0.24793701171875, "signal/accuracy_reward/group_zero_std_frac": 0.2916666716337204, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09321831464767456, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09321831464767456, "signal/advantage_abs_mean": 0.15872004330158235, "signal/advantage_pre_scale_abs_mean": 0.15872004330158235, "signal/advantage_pre_scale_std": 0.22737123370170592, "signal/advantage_std": 0.22737123370170592, "signal/brier_reward/centered_abs_mean": 0.17829251885414124, "signal/brier_reward/group_bin_occupancy": 0.7753472222222222, "signal/brier_reward/group_std_mean": 0.22675358057022094, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017829251661896705, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017829251661896705, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03666983284056187, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8017361111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.0584432914853096, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036669834051281215, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036669834051281215, "signal/format_reward/centered_abs_mean": 0.01703016497194767, "signal/format_reward/group_bin_occupancy": 0.14479166666666668, "signal/format_reward/group_std_mean": 0.035620180889964104, "signal/format_reward/group_zero_std_frac": 0.8416666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008515082485973834, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008515082485973834, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013369303196668625, "signal/frontier_aurc_reward/group_bin_occupancy": 0.696875, "signal/frontier_aurc_reward/group_std_mean": 0.002153940638527274, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.671162899583578e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.671162899583578e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.22538237869739533, "signal/frontier_coverage_0/group_bin_occupancy": 0.7586805555555556, "signal/frontier_coverage_0/group_std_mean": 0.3036982655525208, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.022538238018751145, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.022538238018751145, "signal/frontier_coverage_1/centered_abs_mean": 0.22538237869739533, "signal/frontier_coverage_1/group_bin_occupancy": 0.7586805555555556, "signal/frontier_coverage_1/group_std_mean": 0.3036982655525208, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.022538238018751145, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.022538238018751145, "signal/frontier_coverage_10/centered_abs_mean": 0.22538237869739533, "signal/frontier_coverage_10/group_bin_occupancy": 0.7586805555555556, "signal/frontier_coverage_10/group_std_mean": 0.3036982655525208, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.022538238018751145, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.022538238018751145, "signal/frontier_coverage_15/centered_abs_mean": 0.22538237869739533, "signal/frontier_coverage_15/group_bin_occupancy": 0.7586805555555556, "signal/frontier_coverage_15/group_std_mean": 0.3036982655525208, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.022538238018751145, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.022538238018751145, "signal/frontier_coverage_20/centered_abs_mean": 0.22538237869739533, "signal/frontier_coverage_20/group_bin_occupancy": 0.7586805555555556, "signal/frontier_coverage_20/group_std_mean": 0.3036982655525208, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022538238018751145, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.022538238018751145, "signal/frontier_coverage_25/centered_abs_mean": 0.22538237869739533, "signal/frontier_coverage_25/group_bin_occupancy": 0.7586805555555556, "signal/frontier_coverage_25/group_std_mean": 0.3036982655525208, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.022538238018751145, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.022538238018751145, "signal/frontier_coverage_5/centered_abs_mean": 0.22538237869739533, "signal/frontier_coverage_5/group_bin_occupancy": 0.7586805555555556, "signal/frontier_coverage_5/group_std_mean": 0.3036982655525208, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.022538238018751145, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.022538238018751145, "signal/frontier_ece_reward/centered_abs_mean": 0.030462851002812385, "signal/frontier_ece_reward/group_bin_occupancy": 0.8322916666666667, "signal/frontier_ece_reward/group_std_mean": 0.0436931237578392, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030462852213531733, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030462852213531733, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3919891953468323, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7576388888888889, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4605256378650665, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.039198920130729675, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.039198920130729675, "step": 65 }, { "calibration/aurc": 0.27194618970235607, "calibration/batch_distribution_entropy": 0.8945873610755051, "calibration/batch_entropy_100bins": 0.9131396367014031, "calibration/batch_entropy_10bins": 0.8945873610755051, "calibration/batch_entropy_50bins": 0.9192407527206786, "calibration/batch_uniqueness": 0.9371901885401208, "calibration/buffer_distribution_entropy": 0.7877129357086162, "calibration/buffer_entropy_100bins": 0.78102191001134, "calibration/buffer_entropy_10bins": 0.7877129357086162, "calibration/buffer_entropy_50bins": 0.8151882911475585, "calibration/confidence_entropy": 0.39241385084998825, "calibration/coverage@0%": 0.05326375040260959, "calibration/coverage@1%": 0.05639690967153911, "calibration/coverage@10%": 0.12010448147310568, "calibration/coverage@15%": 0.14781648540668707, "calibration/coverage@20%": 0.25953707177758134, "calibration/coverage@25%": 0.3467758145563071, "calibration/coverage@30%": 0.69023526244398, "calibration/coverage@5%": 0.11070500366631716, "calibration/ece": 0.14855692656439354, "calibration/mean_confidence": 0.592146170413358, "calibration/prompt_uniqueness": 0.8288111311405574, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007118055555555558, "completions/max_length": 3017.8, "completions/max_terminated_length": 3017.8, "completions/mean_length": 649.9505249023438, "completions/mean_terminated_length": 654.6026123046875, "completions/min_length": 0.0, "completions/min_terminated_length": 146.8, "epoch": 0.16799790002624967, "grad_norm": 0.000531592988409102, "learning_rate": 4.156626506024097e-06, "loss": -0.0119, "num_tokens": 142926152.0, "reward": 0.9971241235733033, "reward_std": 0.1946073591709137, "rewards/accuracy_reward": 0.6058159708976746, "rewards/brier_reward": 0.7913841605186462, "rewards/confidence_uniqueness_reward": 0.9257380962371826, "rewards/format_reward": 0.9927083492279053, "rewards/frontier_aurc_reward": -0.0018725383095443248, "rewards/frontier_coverage_0": 0.09908072724938392, "rewards/frontier_coverage_1": 0.09908072724938392, "rewards/frontier_coverage_10": 0.09908072724938392, "rewards/frontier_coverage_15": 0.09908072724938392, "rewards/frontier_coverage_20": 0.09908072724938392, "rewards/frontier_coverage_25": 0.09908072724938392, "rewards/frontier_coverage_5": 0.09908072724938392, "rewards/frontier_ece_reward": 0.014820458181202412, "rewards/frontier_entropy_batch_reward": -0.4466545760631561, "signal/accuracy_reward/centered_abs_mean": 0.18914388120174408, "signal/accuracy_reward/group_bin_occupancy": 0.21423611111111113, "signal/accuracy_reward/group_std_mean": 0.2506607919931412, "signal/accuracy_reward/group_zero_std_frac": 0.28611111342906953, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09457194060087204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09457194060087204, "signal/advantage_abs_mean": 0.14794844686985015, "signal/advantage_pre_scale_abs_mean": 0.14794844686985015, "signal/advantage_pre_scale_std": 0.2173856317996979, "signal/advantage_std": 0.2173856317996979, "signal/brier_reward/centered_abs_mean": 0.1662675827741623, "signal/brier_reward/group_bin_occupancy": 0.7461805555555555, "signal/brier_reward/group_std_mean": 0.21699636280536652, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01662675738334656, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01662675738334656, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03762320056557655, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8291666666666666, "signal/confidence_uniqueness_reward/group_std_mean": 0.05675676092505455, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037623200565576552, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037623200565576552, "signal/format_reward/centered_abs_mean": 0.01309678815305233, "signal/format_reward/group_bin_occupancy": 0.14166666666666666, "signal/format_reward/group_std_mean": 0.028634771704673767, "signal/format_reward/group_zero_std_frac": 0.8666666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006548394076526165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006548394076526165, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015188657911494375, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6951388888888889, "signal/frontier_aurc_reward/group_std_mean": 0.002341068908572197, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8985822680406273e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8985822680406273e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2185291677713394, "signal/frontier_coverage_0/group_bin_occupancy": 0.7368055555555557, "signal/frontier_coverage_0/group_std_mean": 0.2952149331569672, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.021852916106581688, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.021852916106581688, "signal/frontier_coverage_1/centered_abs_mean": 0.2185291677713394, "signal/frontier_coverage_1/group_bin_occupancy": 0.7368055555555557, "signal/frontier_coverage_1/group_std_mean": 0.2952149331569672, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.021852916106581688, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.021852916106581688, "signal/frontier_coverage_10/centered_abs_mean": 0.2185291677713394, "signal/frontier_coverage_10/group_bin_occupancy": 0.7368055555555557, "signal/frontier_coverage_10/group_std_mean": 0.2952149331569672, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.021852916106581688, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.021852916106581688, "signal/frontier_coverage_15/centered_abs_mean": 0.2185291677713394, "signal/frontier_coverage_15/group_bin_occupancy": 0.7368055555555557, "signal/frontier_coverage_15/group_std_mean": 0.2952149331569672, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.021852916106581688, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.021852916106581688, "signal/frontier_coverage_20/centered_abs_mean": 0.2185291677713394, "signal/frontier_coverage_20/group_bin_occupancy": 0.7368055555555557, "signal/frontier_coverage_20/group_std_mean": 0.2952149331569672, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021852916106581688, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.021852916106581688, "signal/frontier_coverage_25/centered_abs_mean": 0.2185291677713394, "signal/frontier_coverage_25/group_bin_occupancy": 0.7368055555555557, "signal/frontier_coverage_25/group_std_mean": 0.2952149331569672, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.021852916106581688, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.021852916106581688, "signal/frontier_coverage_5/centered_abs_mean": 0.2185291677713394, "signal/frontier_coverage_5/group_bin_occupancy": 0.7368055555555557, "signal/frontier_coverage_5/group_std_mean": 0.2952149331569672, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.021852916106581688, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.021852916106581688, "signal/frontier_ece_reward/centered_abs_mean": 0.030513783916831017, "signal/frontier_ece_reward/group_bin_occupancy": 0.8253472222222221, "signal/frontier_ece_reward/group_std_mean": 0.04309252202510834, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030513783451169727, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030513783451169727, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.39379770755767823, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7395833333333333, "signal/frontier_entropy_batch_reward/group_std_mean": 0.46024208068847655, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03937977254390716, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03937977254390716, "step": 70 }, { "calibration/aurc": 0.19853769462627072, "calibration/batch_distribution_entropy": 0.8961173951478291, "calibration/batch_entropy_100bins": 0.912091298899328, "calibration/batch_entropy_10bins": 0.8961173951478291, "calibration/batch_entropy_50bins": 0.9179681022942207, "calibration/batch_uniqueness": 0.9348791493897538, "calibration/buffer_distribution_entropy": 0.8003268741179234, "calibration/buffer_entropy_100bins": 0.80209686504773, "calibration/buffer_entropy_10bins": 0.8003268741179234, "calibration/buffer_entropy_50bins": 0.8315238709106142, "calibration/confidence_entropy": 0.4322237036978451, "calibration/coverage@0%": 0.04036413590679416, "calibration/coverage@1%": 0.04036413590679416, "calibration/coverage@10%": 0.376274421695374, "calibration/coverage@15%": 0.4921271605087534, "calibration/coverage@20%": 0.6069650945898191, "calibration/coverage@25%": 0.6777106476805124, "calibration/coverage@30%": 0.7231799896952824, "calibration/coverage@5%": 0.22371359918982808, "calibration/ece": 0.09717652729180778, "calibration/mean_confidence": 0.6359411054528509, "calibration/prompt_uniqueness": 0.8333672983024417, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005295138888888884, "completions/max_length": 3205.2, "completions/max_terminated_length": 3205.2, "completions/mean_length": 677.5071166992187, "completions/mean_terminated_length": 681.2035278320312, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.17999775002812465, "grad_norm": 0.0005247980006970465, "learning_rate": 4.006024096385543e-06, "loss": -0.0067, "num_tokens": 153795930.0, "reward": 1.0187444925308227, "reward_std": 0.17705624103546141, "rewards/accuracy_reward": 0.6374131917953492, "rewards/brier_reward": 0.8208310961723327, "rewards/confidence_uniqueness_reward": 0.929869544506073, "rewards/format_reward": 0.9943576455116272, "rewards/frontier_aurc_reward": -0.001502724504098296, "rewards/frontier_coverage_0": 0.09719437658786774, "rewards/frontier_coverage_1": 0.09719437658786774, "rewards/frontier_coverage_10": 0.09719437658786774, "rewards/frontier_coverage_15": 0.09719437658786774, "rewards/frontier_coverage_20": 0.09719437658786774, "rewards/frontier_coverage_25": 0.09719437658786774, "rewards/frontier_coverage_5": 0.09719437658786774, "rewards/frontier_ece_reward": 0.014616208896040917, "rewards/frontier_entropy_batch_reward": -0.4168988406658173, "signal/accuracy_reward/centered_abs_mean": 0.2002549946308136, "signal/accuracy_reward/group_bin_occupancy": 0.21215277777777777, "signal/accuracy_reward/group_std_mean": 0.2571962982416153, "signal/accuracy_reward/group_zero_std_frac": 0.3027777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1001274973154068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1001274973154068, "signal/advantage_abs_mean": 0.132818341255188, "signal/advantage_pre_scale_abs_mean": 0.132818341255188, "signal/advantage_pre_scale_std": 0.19871813356876372, "signal/advantage_std": 0.19871813356876372, "signal/brier_reward/centered_abs_mean": 0.14455785602331161, "signal/brier_reward/group_bin_occupancy": 0.7569444444444444, "signal/brier_reward/group_std_mean": 0.1915825366973877, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014455785788595677, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014455785788595677, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03241968899965286, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8350694444444444, "signal/confidence_uniqueness_reward/group_std_mean": 0.05105073526501656, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003241968993097544, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003241968993097544, "signal/format_reward/centered_abs_mean": 0.010378689225763082, "signal/format_reward/group_bin_occupancy": 0.140625, "signal/format_reward/group_std_mean": 0.02502230368554592, "signal/format_reward/group_zero_std_frac": 0.875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005189344612881541, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005189344612881541, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012487402884289623, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7135416666666666, "signal/frontier_aurc_reward/group_std_mean": 0.0019265936687588691, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.56092533870833e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.56092533870833e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.21457959413528443, "signal/frontier_coverage_0/group_bin_occupancy": 0.751736111111111, "signal/frontier_coverage_0/group_std_mean": 0.29035847187042235, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.021457960084080695, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.021457960084080695, "signal/frontier_coverage_1/centered_abs_mean": 0.21457959413528443, "signal/frontier_coverage_1/group_bin_occupancy": 0.751736111111111, "signal/frontier_coverage_1/group_std_mean": 0.29035847187042235, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.021457960084080695, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.021457960084080695, "signal/frontier_coverage_10/centered_abs_mean": 0.21457959413528443, "signal/frontier_coverage_10/group_bin_occupancy": 0.751736111111111, "signal/frontier_coverage_10/group_std_mean": 0.29035847187042235, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.021457960084080695, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.021457960084080695, "signal/frontier_coverage_15/centered_abs_mean": 0.21457959413528443, "signal/frontier_coverage_15/group_bin_occupancy": 0.751736111111111, "signal/frontier_coverage_15/group_std_mean": 0.29035847187042235, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.021457960084080695, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.021457960084080695, "signal/frontier_coverage_20/centered_abs_mean": 0.21457959413528443, "signal/frontier_coverage_20/group_bin_occupancy": 0.751736111111111, "signal/frontier_coverage_20/group_std_mean": 0.29035847187042235, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021457960084080695, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.021457960084080695, "signal/frontier_coverage_25/centered_abs_mean": 0.21457959413528443, "signal/frontier_coverage_25/group_bin_occupancy": 0.751736111111111, "signal/frontier_coverage_25/group_std_mean": 0.29035847187042235, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.021457960084080695, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.021457960084080695, "signal/frontier_coverage_5/centered_abs_mean": 0.21457959413528443, "signal/frontier_coverage_5/group_bin_occupancy": 0.751736111111111, "signal/frontier_coverage_5/group_std_mean": 0.29035847187042235, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.021457960084080695, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.021457960084080695, "signal/frontier_ece_reward/centered_abs_mean": 0.028026602417230605, "signal/frontier_ece_reward/group_bin_occupancy": 0.7993055555555555, "signal/frontier_ece_reward/group_std_mean": 0.040372003614902494, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0028026602696627377, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0028026602696627377, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38128851652145385, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7371527777777778, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4472618103027344, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03812885135412216, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03812885135412216, "step": 75 }, { "calibration/aurc": 0.21202390893452958, "calibration/batch_distribution_entropy": 0.892489770208589, "calibration/batch_entropy_100bins": 0.9105575405217194, "calibration/batch_entropy_10bins": 0.892489770208589, "calibration/batch_entropy_50bins": 0.9111948565750734, "calibration/batch_uniqueness": 0.9341327315004448, "calibration/buffer_distribution_entropy": 0.8131000388841617, "calibration/buffer_entropy_100bins": 0.8196942522115224, "calibration/buffer_entropy_10bins": 0.8131000388841617, "calibration/buffer_entropy_50bins": 0.8455434265915864, "calibration/confidence_entropy": 0.42654255632151405, "calibration/coverage@0%": 0.014085920925357768, "calibration/coverage@1%": 0.014085920925357768, "calibration/coverage@10%": 0.2734964364699554, "calibration/coverage@15%": 0.32363363627234387, "calibration/coverage@20%": 0.5826203423632129, "calibration/coverage@25%": 0.7059773098189186, "calibration/coverage@30%": 0.793550096136303, "calibration/coverage@5%": 0.11101395617339956, "calibration/ece": 0.13445512312765592, "calibration/mean_confidence": 0.606925372178709, "calibration/prompt_uniqueness": 0.8242422611106097, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005034722222222232, "completions/max_length": 3563.0, "completions/max_terminated_length": 3563.0, "completions/mean_length": 714.0626708984375, "completions/mean_terminated_length": 717.676416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 145.2, "epoch": 0.19199760002999963, "grad_norm": 0.0004452217253856361, "learning_rate": 3.855421686746989e-06, "loss": -0.0068, "num_tokens": 165075212.0, "reward": 1.0306865215301513, "reward_std": 0.18099702000617982, "rewards/accuracy_reward": 0.5858506977558136, "rewards/brier_reward": 0.8206128120422364, "rewards/confidence_uniqueness_reward": 0.9320234894752503, "rewards/format_reward": 0.99453125, "rewards/frontier_aurc_reward": -0.0016457670368254184, "rewards/frontier_coverage_0": 0.14450157731771468, "rewards/frontier_coverage_1": 0.14450157731771468, "rewards/frontier_coverage_10": 0.14450157731771468, "rewards/frontier_coverage_15": 0.14450157731771468, "rewards/frontier_coverage_20": 0.14450157731771468, "rewards/frontier_coverage_25": 0.14450157731771468, "rewards/frontier_coverage_5": 0.14450157731771468, "rewards/frontier_ece_reward": 0.012375526875257493, "rewards/frontier_entropy_batch_reward": -0.37136178016662597, "signal/accuracy_reward/centered_abs_mean": 0.21248372495174409, "signal/accuracy_reward/group_bin_occupancy": 0.21597222222222223, "signal/accuracy_reward/group_std_mean": 0.2697928935289383, "signal/accuracy_reward/group_zero_std_frac": 0.272222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10624186247587204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10624186247587204, "signal/advantage_abs_mean": 0.13688016831874847, "signal/advantage_pre_scale_abs_mean": 0.13688016831874847, "signal/advantage_pre_scale_std": 0.19873642921447754, "signal/advantage_std": 0.19873642921447754, "signal/brier_reward/centered_abs_mean": 0.1460920125246048, "signal/brier_reward/group_bin_occupancy": 0.7590277777777777, "signal/brier_reward/group_std_mean": 0.19507495164871216, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01460920162498951, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01460920162498951, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03345326967537403, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8486111111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.04838352724909782, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003345327032729983, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003345327032729983, "signal/format_reward/centered_abs_mean": 0.009836154337972403, "signal/format_reward/group_bin_occupancy": 0.13541666666666669, "signal/format_reward/group_std_mean": 0.019612624868750574, "signal/format_reward/group_zero_std_frac": 0.9166666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004918077168986202, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004918077168986202, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013075984083116055, "signal/frontier_aurc_reward/group_bin_occupancy": 0.704861111111111, "signal/frontier_aurc_reward/group_std_mean": 0.002080147247761488, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6344982032023837e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6344982032023837e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2356353372335434, "signal/frontier_coverage_0/group_bin_occupancy": 0.7520833333333333, "signal/frontier_coverage_0/group_std_mean": 0.3123137831687927, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.023563534021377563, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.023563534021377563, "signal/frontier_coverage_1/centered_abs_mean": 0.2356353372335434, "signal/frontier_coverage_1/group_bin_occupancy": 0.7520833333333333, "signal/frontier_coverage_1/group_std_mean": 0.3123137831687927, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.023563534021377563, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.023563534021377563, "signal/frontier_coverage_10/centered_abs_mean": 0.2356353372335434, "signal/frontier_coverage_10/group_bin_occupancy": 0.7520833333333333, "signal/frontier_coverage_10/group_std_mean": 0.3123137831687927, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.023563534021377563, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.023563534021377563, "signal/frontier_coverage_15/centered_abs_mean": 0.2356353372335434, "signal/frontier_coverage_15/group_bin_occupancy": 0.7520833333333333, "signal/frontier_coverage_15/group_std_mean": 0.3123137831687927, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.023563534021377563, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.023563534021377563, "signal/frontier_coverage_20/centered_abs_mean": 0.2356353372335434, "signal/frontier_coverage_20/group_bin_occupancy": 0.7520833333333333, "signal/frontier_coverage_20/group_std_mean": 0.3123137831687927, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023563534021377563, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.023563534021377563, "signal/frontier_coverage_25/centered_abs_mean": 0.2356353372335434, "signal/frontier_coverage_25/group_bin_occupancy": 0.7520833333333333, "signal/frontier_coverage_25/group_std_mean": 0.3123137831687927, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.023563534021377563, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.023563534021377563, "signal/frontier_coverage_5/centered_abs_mean": 0.2356353372335434, "signal/frontier_coverage_5/group_bin_occupancy": 0.7520833333333333, "signal/frontier_coverage_5/group_std_mean": 0.3123137831687927, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.023563534021377563, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.023563534021377563, "signal/frontier_ece_reward/centered_abs_mean": 0.025293727964162828, "signal/frontier_ece_reward/group_bin_occupancy": 0.809375, "signal/frontier_ece_reward/group_std_mean": 0.036943011730909345, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002529372926801443, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002529372926801443, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3852746546268463, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.753125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4537887334823608, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0385274663567543, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0385274663567543, "step": 80 }, { "calibration/aurc": 0.19887768920185828, "calibration/batch_distribution_entropy": 0.8582793325801639, "calibration/batch_entropy_100bins": 0.874851770121175, "calibration/batch_entropy_10bins": 0.8582793325801639, "calibration/batch_entropy_50bins": 0.8765912987534377, "calibration/batch_uniqueness": 0.8977951813469284, "calibration/buffer_distribution_entropy": 0.8264599677058859, "calibration/buffer_entropy_100bins": 0.8342157208556948, "calibration/buffer_entropy_10bins": 0.8264599677058859, "calibration/buffer_entropy_50bins": 0.8574377668441269, "calibration/confidence_entropy": 0.3511930719190495, "calibration/coverage@0%": 0.04928223038710476, "calibration/coverage@1%": 0.04928223038710476, "calibration/coverage@10%": 0.3719955622775556, "calibration/coverage@15%": 0.5484676920756779, "calibration/coverage@20%": 0.6220975910365909, "calibration/coverage@25%": 0.6842214398364115, "calibration/coverage@30%": 0.7458859138684379, "calibration/coverage@5%": 0.18465859511343805, "calibration/ece": 0.08830501621552686, "calibration/mean_confidence": 0.4861689044034094, "calibration/prompt_uniqueness": 0.7554502920014838, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003385416666666674, "completions/max_length": 3001.6, "completions/max_terminated_length": 3001.6, "completions/mean_length": 723.7444458007812, "completions/mean_terminated_length": 726.2084716796875, "completions/min_length": 0.0, "completions/min_terminated_length": 194.2, "epoch": 0.2039974500318746, "grad_norm": 0.000452884822152555, "learning_rate": 3.7048192771084342e-06, "loss": -0.0038, "num_tokens": 176499948.0, "reward": 1.0673803091049194, "reward_std": 0.16003829836845399, "rewards/accuracy_reward": 0.57890625, "rewards/brier_reward": 0.8611330986022949, "rewards/confidence_uniqueness_reward": 0.9070895791053772, "rewards/format_reward": 0.9963541626930237, "rewards/frontier_aurc_reward": -0.0014048191718757153, "rewards/frontier_coverage_0": 0.20752938687801362, "rewards/frontier_coverage_1": 0.20752938687801362, "rewards/frontier_coverage_10": 0.20752938687801362, "rewards/frontier_coverage_15": 0.20752938687801362, "rewards/frontier_coverage_20": 0.20752938687801362, "rewards/frontier_coverage_25": 0.20752938687801362, "rewards/frontier_coverage_5": 0.20752938687801362, "rewards/frontier_ece_reward": 0.016395201347768305, "rewards/frontier_entropy_batch_reward": -0.4396472811698914, "signal/accuracy_reward/centered_abs_mean": 0.1973470091819763, "signal/accuracy_reward/group_bin_occupancy": 0.21805555555555559, "signal/accuracy_reward/group_std_mean": 0.2611843168735504, "signal/accuracy_reward/group_zero_std_frac": 0.2555555611848831, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09867350459098816, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09867350459098816, "signal/advantage_abs_mean": 0.11756447702646255, "signal/advantage_pre_scale_abs_mean": 0.11756447702646255, "signal/advantage_pre_scale_std": 0.1783807784318924, "signal/advantage_std": 0.1783807784318924, "signal/brier_reward/centered_abs_mean": 0.1257694497704506, "signal/brier_reward/group_bin_occupancy": 0.7149305555555556, "signal/brier_reward/group_std_mean": 0.17371391355991364, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012576944567263127, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012576944567263127, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.053402946889400484, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7479166666666667, "signal/confidence_uniqueness_reward/group_std_mean": 0.07172206267714501, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005340294633060694, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005340294633060694, "signal/format_reward/centered_abs_mean": 0.006532118155155331, "signal/format_reward/group_bin_occupancy": 0.134375, "signal/format_reward/group_std_mean": 0.015146102197468281, "signal/format_reward/group_zero_std_frac": 0.925, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0032660590775776656, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0032660590775776656, "signal/frontier_aurc_reward/centered_abs_mean": 0.001343308249488473, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6805555555555556, "signal/frontier_aurc_reward/group_std_mean": 0.0021895582554861902, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.679135348240379e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.679135348240379e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.22067134380340575, "signal/frontier_coverage_0/group_bin_occupancy": 0.6989583333333333, "signal/frontier_coverage_0/group_std_mean": 0.2975525438785553, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02206713445484638, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.02206713445484638, "signal/frontier_coverage_1/centered_abs_mean": 0.22067134380340575, "signal/frontier_coverage_1/group_bin_occupancy": 0.6989583333333333, "signal/frontier_coverage_1/group_std_mean": 0.2975525438785553, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02206713445484638, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.02206713445484638, "signal/frontier_coverage_10/centered_abs_mean": 0.22067134380340575, "signal/frontier_coverage_10/group_bin_occupancy": 0.6989583333333333, "signal/frontier_coverage_10/group_std_mean": 0.2975525438785553, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02206713445484638, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.02206713445484638, "signal/frontier_coverage_15/centered_abs_mean": 0.22067134380340575, "signal/frontier_coverage_15/group_bin_occupancy": 0.6989583333333333, "signal/frontier_coverage_15/group_std_mean": 0.2975525438785553, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02206713445484638, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.02206713445484638, "signal/frontier_coverage_20/centered_abs_mean": 0.22067134380340575, "signal/frontier_coverage_20/group_bin_occupancy": 0.6989583333333333, "signal/frontier_coverage_20/group_std_mean": 0.2975525438785553, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02206713445484638, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.02206713445484638, "signal/frontier_coverage_25/centered_abs_mean": 0.22067134380340575, "signal/frontier_coverage_25/group_bin_occupancy": 0.6989583333333333, "signal/frontier_coverage_25/group_std_mean": 0.2975525438785553, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02206713445484638, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.02206713445484638, "signal/frontier_coverage_5/centered_abs_mean": 0.22067134380340575, "signal/frontier_coverage_5/group_bin_occupancy": 0.6989583333333333, "signal/frontier_coverage_5/group_std_mean": 0.2975525438785553, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02206713445484638, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.02206713445484638, "signal/frontier_ece_reward/centered_abs_mean": 0.026366091147065163, "signal/frontier_ece_reward/group_bin_occupancy": 0.7888888888888889, "signal/frontier_ece_reward/group_std_mean": 0.03810814470052719, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0026366091333329678, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0026366091333329678, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38300331830978396, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7260416666666667, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4546321153640747, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.00555555559694767, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03830033168196678, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03830033168196678, "step": 85 }, { "calibration/aurc": 0.5399305014070592, "calibration/batch_distribution_entropy": 0.49741838830536295, "calibration/batch_entropy_100bins": 0.6470591660026435, "calibration/batch_entropy_10bins": 0.49741838830536295, "calibration/batch_entropy_50bins": 0.6181749096895081, "calibration/batch_uniqueness": 0.5907532200896098, "calibration/buffer_distribution_entropy": 0.8363355079750854, "calibration/buffer_entropy_100bins": 0.8450362323859538, "calibration/buffer_entropy_10bins": 0.8363355079750854, "calibration/buffer_entropy_50bins": 0.8657301026926824, "calibration/confidence_entropy": 0.20564557254488328, "calibration/coverage@0%": 0.05212015712053617, "calibration/coverage@1%": 0.10264099045386951, "calibration/coverage@10%": 0.2891577985478643, "calibration/coverage@15%": 0.3256461134468179, "calibration/coverage@20%": 0.35065699247205717, "calibration/coverage@25%": 0.376712279335677, "calibration/coverage@30%": 0.3907883995326303, "calibration/coverage@5%": 0.15422116886988343, "calibration/ece": 0.05554478321511012, "calibration/mean_confidence": 0.27574555197338846, "calibration/prompt_uniqueness": 0.46331776524020124, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0015625000000000222, "completions/max_length": 2791.0, "completions/max_terminated_length": 2791.0, "completions/mean_length": 710.73837890625, "completions/mean_terminated_length": 711.8608520507812, "completions/min_length": 42.0, "completions/min_terminated_length": 190.8, "epoch": 0.2159973000337496, "grad_norm": 0.00019236108346376568, "learning_rate": 3.5542168674698798e-06, "loss": -0.0031, "num_tokens": 187756326.0, "reward": 1.1478140115737916, "reward_std": 0.10408884063363075, "rewards/accuracy_reward": 0.24027777976589276, "rewards/brier_reward": 0.9400920748710633, "rewards/confidence_uniqueness_reward": 0.58316290974617, "rewards/format_reward": 0.9970486044883728, "rewards/frontier_aurc_reward": -0.0016796960728242994, "rewards/frontier_coverage_0": 0.6316808700561524, "rewards/frontier_coverage_1": 0.6316808700561524, "rewards/frontier_coverage_10": 0.6316808700561524, "rewards/frontier_coverage_15": 0.6316808700561524, "rewards/frontier_coverage_20": 0.6316808700561524, "rewards/frontier_coverage_25": 0.6316808700561524, "rewards/frontier_coverage_5": 0.6316808700561524, "rewards/frontier_ece_reward": 0.014462851732969285, "rewards/frontier_entropy_batch_reward": -0.6677661180496216, "signal/accuracy_reward/centered_abs_mean": 0.12567274437751622, "signal/accuracy_reward/group_bin_occupancy": 0.1840277777777778, "signal/accuracy_reward/group_std_mean": 0.16602826602756976, "signal/accuracy_reward/group_zero_std_frac": 0.5277777761220932, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06283637218875811, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06283637218875811, "signal/advantage_abs_mean": 0.0688327431678772, "signal/advantage_pre_scale_abs_mean": 0.0688327431678772, "signal/advantage_pre_scale_std": 0.12689779996871947, "signal/advantage_std": 0.12689779996871947, "signal/brier_reward/centered_abs_mean": 0.06333923451602459, "signal/brier_reward/group_bin_occupancy": 0.5947916666666667, "signal/brier_reward/group_std_mean": 0.10030964910984039, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.006333923456259072, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.006333923456259072, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2524725556373596, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.5340277777777779, "signal/confidence_uniqueness_reward/group_std_mean": 0.28764230757951736, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025247253943234682, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.025247253943234682, "signal/format_reward/centered_abs_mean": 0.005674913222901523, "signal/format_reward/group_bin_occupancy": 0.13541666666666666, "signal/format_reward/group_std_mean": 0.015499813482165337, "signal/format_reward/group_zero_std_frac": 0.9166666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0028374566114507615, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0028374566114507615, "signal/frontier_aurc_reward/centered_abs_mean": 0.0008010723817278631, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6520833333333333, "signal/frontier_aurc_reward/group_std_mean": 0.0013616787298815324, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.0013404175879258e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.0013404175879258e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1786520630121231, "signal/frontier_coverage_0/group_bin_occupancy": 0.690625, "signal/frontier_coverage_0/group_std_mean": 0.236984321475029, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01786520555615425, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.01786520555615425, "signal/frontier_coverage_1/centered_abs_mean": 0.1786520630121231, "signal/frontier_coverage_1/group_bin_occupancy": 0.690625, "signal/frontier_coverage_1/group_std_mean": 0.236984321475029, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01786520555615425, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.01786520555615425, "signal/frontier_coverage_10/centered_abs_mean": 0.1786520630121231, "signal/frontier_coverage_10/group_bin_occupancy": 0.690625, "signal/frontier_coverage_10/group_std_mean": 0.236984321475029, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01786520555615425, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.01786520555615425, "signal/frontier_coverage_15/centered_abs_mean": 0.1786520630121231, "signal/frontier_coverage_15/group_bin_occupancy": 0.690625, "signal/frontier_coverage_15/group_std_mean": 0.236984321475029, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01786520555615425, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.01786520555615425, "signal/frontier_coverage_20/centered_abs_mean": 0.1786520630121231, "signal/frontier_coverage_20/group_bin_occupancy": 0.690625, "signal/frontier_coverage_20/group_std_mean": 0.236984321475029, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01786520555615425, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.01786520555615425, "signal/frontier_coverage_25/centered_abs_mean": 0.1786520630121231, "signal/frontier_coverage_25/group_bin_occupancy": 0.690625, "signal/frontier_coverage_25/group_std_mean": 0.236984321475029, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01786520555615425, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.01786520555615425, "signal/frontier_coverage_5/centered_abs_mean": 0.1786520630121231, "signal/frontier_coverage_5/group_bin_occupancy": 0.690625, "signal/frontier_coverage_5/group_std_mean": 0.236984321475029, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01786520555615425, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.01786520555615425, "signal/frontier_ece_reward/centered_abs_mean": 0.016731801349669694, "signal/frontier_ece_reward/group_bin_occupancy": 0.7232638888888889, "signal/frontier_ece_reward/group_std_mean": 0.02399433497339487, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016731801675632595, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016731801675632595, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31356381475925443, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.523611111111111, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4009668231010437, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.08333333171904087, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031356383487582205, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031356383487582205, "step": 90 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.03994664725036534, "calibration/batch_entropy_100bins": 0.4099580796026087, "calibration/batch_entropy_10bins": 0.03994664725036534, "calibration/batch_entropy_50bins": 0.32650286573394777, "calibration/batch_uniqueness": 0.272314453125, "calibration/buffer_distribution_entropy": 0.8455020693172232, "calibration/buffer_entropy_100bins": 0.848574391143036, "calibration/buffer_entropy_10bins": 0.8455020693172232, "calibration/buffer_entropy_50bins": 0.8680614704922407, "calibration/confidence_entropy": 0.0955832500869643, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.024320078501806765, "calibration/mean_confidence": 0.024320078501806765, "calibration/prompt_uniqueness": 0.18600260416666667, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0006944444444444642, "completions/max_length": 2236.4, "completions/max_terminated_length": 2236.4, "completions/mean_length": 714.6057373046875, "completions/mean_terminated_length": 715.1013305664062, "completions/min_length": 33.4, "completions/min_terminated_length": 164.4, "epoch": 0.22799715003562457, "grad_norm": 9.691954619484022e-05, "learning_rate": 3.4036144578313257e-06, "loss": -0.0009, "num_tokens": 199080264.0, "reward": 1.1923882484436035, "reward_std": 0.034006135910749434, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9971752405166626, "rewards/confidence_uniqueness_reward": 0.21403581481426953, "rewards/format_reward": 0.9986979126930237, "rewards/frontier_aurc_reward": -0.0019425456179305912, "rewards/frontier_coverage_0": 0.9546570420265198, "rewards/frontier_coverage_1": 0.9546570420265198, "rewards/frontier_coverage_10": 0.9546570420265198, "rewards/frontier_coverage_15": 0.9546570420265198, "rewards/frontier_coverage_20": 0.9546570420265198, "rewards/frontier_coverage_25": 0.9546570420265198, "rewards/frontier_coverage_5": 0.9546570420265198, "rewards/frontier_ece_reward": 0.005286368634551763, "rewards/frontier_entropy_batch_reward": -0.9684616804122925, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.02305524256080389, "signal/advantage_pre_scale_abs_mean": 0.02305524256080389, "signal/advantage_pre_scale_std": 0.048858624696731565, "signal/advantage_std": 0.048858624696731565, "signal/brier_reward/centered_abs_mean": 0.004363900749012828, "signal/brier_reward/group_bin_occupancy": 0.6322916666666666, "signal/brier_reward/group_std_mean": 0.01022816812619567, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00043639009818434714, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.00043639009818434714, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.36309358179569245, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.60625, "signal/confidence_uniqueness_reward/group_std_mean": 0.4173290342092514, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0363093588501215, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0363093588501215, "signal/format_reward/centered_abs_mean": 0.002511935762595385, "signal/format_reward/group_bin_occupancy": 0.12986111111111112, "signal/format_reward/group_std_mean": 0.007066754624247551, "signal/format_reward/group_zero_std_frac": 0.9611111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012559678812976926, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012559678812976926, "signal/frontier_aurc_reward/centered_abs_mean": 3.0336726194946095e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.8013888888888889, "signal/frontier_aurc_reward/group_std_mean": 4.735183538286947e-05, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.792090922161151e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.792090922161151e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.04291480258107185, "signal/frontier_coverage_0/group_bin_occupancy": 0.8003472222222221, "signal/frontier_coverage_0/group_std_mean": 0.05929781645536423, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004291480267420411, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004291480267420411, "signal/frontier_coverage_1/centered_abs_mean": 0.04291480258107185, "signal/frontier_coverage_1/group_bin_occupancy": 0.8003472222222221, "signal/frontier_coverage_1/group_std_mean": 0.05929781645536423, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004291480267420411, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004291480267420411, "signal/frontier_coverage_10/centered_abs_mean": 0.04291480258107185, "signal/frontier_coverage_10/group_bin_occupancy": 0.8003472222222221, "signal/frontier_coverage_10/group_std_mean": 0.05929781645536423, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004291480267420411, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004291480267420411, "signal/frontier_coverage_15/centered_abs_mean": 0.04291480258107185, "signal/frontier_coverage_15/group_bin_occupancy": 0.8003472222222221, "signal/frontier_coverage_15/group_std_mean": 0.05929781645536423, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004291480267420411, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004291480267420411, "signal/frontier_coverage_20/centered_abs_mean": 0.04291480258107185, "signal/frontier_coverage_20/group_bin_occupancy": 0.8003472222222221, "signal/frontier_coverage_20/group_std_mean": 0.05929781645536423, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004291480267420411, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004291480267420411, "signal/frontier_coverage_25/centered_abs_mean": 0.04291480258107185, "signal/frontier_coverage_25/group_bin_occupancy": 0.8003472222222221, "signal/frontier_coverage_25/group_std_mean": 0.05929781645536423, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004291480267420411, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004291480267420411, "signal/frontier_coverage_5/centered_abs_mean": 0.04291480258107185, "signal/frontier_coverage_5/group_bin_occupancy": 0.8003472222222221, "signal/frontier_coverage_5/group_std_mean": 0.05929781645536423, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004291480267420411, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004291480267420411, "signal/frontier_ece_reward/centered_abs_mean": 0.004431074485182762, "signal/frontier_ece_reward/group_bin_occupancy": 0.6114583333333334, "signal/frontier_ece_reward/group_std_mean": 0.006472232099622488, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004431074601598084, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004431074601598084, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.05818961188197136, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.221875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.12989502102136613, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.4555555522441864, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.005818961281329393, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.005818961281329393, "step": 95 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.03401586624223902, "calibration/batch_entropy_100bins": 0.44183909396831, "calibration/batch_entropy_10bins": 0.03401586624223902, "calibration/batch_entropy_50bins": 0.3508888331700525, "calibration/batch_uniqueness": 0.4576632450931252, "calibration/buffer_distribution_entropy": 0.845870082499727, "calibration/buffer_entropy_100bins": 0.8497800340010169, "calibration/buffer_entropy_10bins": 0.845870082499727, "calibration/buffer_entropy_50bins": 0.8667770466827367, "calibration/confidence_entropy": 0.11965276406860055, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.029416174452166655, "calibration/mean_confidence": 0.02941617445216666, "calibration/prompt_uniqueness": 0.40579098606555386, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0016493055555555358, "completions/max_length": 2943.8, "completions/max_terminated_length": 2943.8, "completions/mean_length": 714.0312377929688, "completions/mean_terminated_length": 715.2037109375, "completions/min_length": 29.0, "completions/min_terminated_length": 176.4, "epoch": 0.23999700003749952, "grad_norm": 0.00013115812907926738, "learning_rate": 3.2530120481927713e-06, "loss": -0.0021, "num_tokens": 210404976.0, "reward": 1.2067232370376586, "reward_std": 0.032975076138973235, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9963070631027222, "rewards/confidence_uniqueness_reward": 0.45519496202468873, "rewards/format_reward": 0.9979166746139526, "rewards/frontier_aurc_reward": -0.002177492156624794, "rewards/frontier_coverage_0": 0.9418468713760376, "rewards/frontier_coverage_1": 0.9418468713760376, "rewards/frontier_coverage_10": 0.9418468713760376, "rewards/frontier_coverage_15": 0.9418468713760376, "rewards/frontier_coverage_20": 0.9418468713760376, "rewards/frontier_coverage_25": 0.9418468713760376, "rewards/frontier_coverage_5": 0.9418468713760376, "rewards/frontier_ece_reward": 0.0026272932533174752, "rewards/frontier_entropy_batch_reward": -0.9691365361213684, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.020837346091866493, "signal/advantage_pre_scale_abs_mean": 0.020837346091866493, "signal/advantage_pre_scale_std": 0.05691418126225471, "signal/advantage_std": 0.05691418126225471, "signal/brier_reward/centered_abs_mean": 0.005596226127818227, "signal/brier_reward/group_bin_occupancy": 0.69375, "signal/brier_reward/group_std_mean": 0.011906285118311644, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0005596226139459759, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0005596226139459759, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2057848244905472, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9458333333333334, "signal/confidence_uniqueness_reward/group_std_mean": 0.24700284898281097, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020578482002019883, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020578482002019883, "signal/format_reward/centered_abs_mean": 0.003927951387595385, "signal/format_reward/group_bin_occupancy": 0.1305555555555556, "signal/format_reward/group_std_mean": 0.009288318641483783, "signal/format_reward/group_zero_std_frac": 0.9555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0019639756937976927, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0019639756937976927, "signal/frontier_aurc_reward/centered_abs_mean": 3.2242565794149414e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.8621527777777777, "signal/frontier_aurc_reward/group_std_mean": 5.1109886408085e-05, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0303208947989333e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0303208947989333e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.04101281464099884, "signal/frontier_coverage_0/group_bin_occupancy": 0.8722222222222221, "signal/frontier_coverage_0/group_std_mean": 0.05649868324398995, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0041012815199792385, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0041012815199792385, "signal/frontier_coverage_1/centered_abs_mean": 0.04101281464099884, "signal/frontier_coverage_1/group_bin_occupancy": 0.8722222222222221, "signal/frontier_coverage_1/group_std_mean": 0.05649868324398995, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0041012815199792385, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0041012815199792385, "signal/frontier_coverage_10/centered_abs_mean": 0.04101281464099884, "signal/frontier_coverage_10/group_bin_occupancy": 0.8722222222222221, "signal/frontier_coverage_10/group_std_mean": 0.05649868324398995, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0041012815199792385, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0041012815199792385, "signal/frontier_coverage_15/centered_abs_mean": 0.04101281464099884, "signal/frontier_coverage_15/group_bin_occupancy": 0.8722222222222221, "signal/frontier_coverage_15/group_std_mean": 0.05649868324398995, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0041012815199792385, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0041012815199792385, "signal/frontier_coverage_20/centered_abs_mean": 0.04101281464099884, "signal/frontier_coverage_20/group_bin_occupancy": 0.8722222222222221, "signal/frontier_coverage_20/group_std_mean": 0.05649868324398995, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0041012815199792385, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0041012815199792385, "signal/frontier_coverage_25/centered_abs_mean": 0.04101281464099884, "signal/frontier_coverage_25/group_bin_occupancy": 0.8722222222222221, "signal/frontier_coverage_25/group_std_mean": 0.05649868324398995, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0041012815199792385, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0041012815199792385, "signal/frontier_coverage_5/centered_abs_mean": 0.04101281464099884, "signal/frontier_coverage_5/group_bin_occupancy": 0.8722222222222221, "signal/frontier_coverage_5/group_std_mean": 0.05649868324398995, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0041012815199792385, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0041012815199792385, "signal/frontier_ece_reward/centered_abs_mean": 0.0019168400205671788, "signal/frontier_ece_reward/group_bin_occupancy": 0.5527777777777778, "signal/frontier_ece_reward/group_std_mean": 0.003455847967416048, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00019168400613125413, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00019168400613125413, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.05724479258060455, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.22361111111111112, "signal/frontier_entropy_batch_reward/group_std_mean": 0.13107622563838958, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.43888888955116273, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.005724479351192713, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.005724479351192713, "step": 100 }, { "epoch": 0.23999700003749952, "eval_calibration/aurc": 1.0, "eval_calibration/batch_distribution_entropy": 0.020131112472651442, "eval_calibration/batch_entropy_100bins": 0.3400135574714165, "eval_calibration/batch_entropy_10bins": 0.020131112472651442, "eval_calibration/batch_entropy_50bins": 0.250985950605582, "eval_calibration/batch_uniqueness": 0.2688802083333333, "eval_calibration/buffer_distribution_entropy": 0.8427708260785672, "eval_calibration/buffer_entropy_100bins": 0.8483835963339752, "eval_calibration/buffer_entropy_10bins": 0.8427708260785672, "eval_calibration/buffer_entropy_50bins": 0.8634748665870949, "eval_calibration/confidence_entropy": 0.08676234349291741, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.0, "eval_calibration/coverage@20%": 0.0, "eval_calibration/coverage@25%": 0.0, "eval_calibration/coverage@30%": 0.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/ece": 0.02021172808892777, "eval_calibration/mean_confidence": 0.02021172808892777, "eval_calibration/prompt_uniqueness": 0.2688802083333333, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 1824.6666666666667, "eval_completions/max_terminated_length": 1824.6666666666667, "eval_completions/mean_length": 717.3307495117188, "eval_completions/mean_terminated_length": 717.3307495117188, "eval_completions/min_length": 241.0, "eval_completions/min_terminated_length": 241.0, "eval_loss": 0.0, "eval_num_tokens": 210404976.0, "eval_reward": 1.1970368425051372, "eval_reward_std": 0.022655950548748176, "eval_rewards/accuracy_reward": 0.0, "eval_rewards/brier_reward": 0.9990857243537903, "eval_rewards/confidence_uniqueness_reward": 0.2510850702722867, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0022718874970450997, "eval_rewards/frontier_coverage_0": 0.9599077602227529, "eval_rewards/frontier_coverage_1": 0.9599077602227529, "eval_rewards/frontier_coverage_10": 0.9599077602227529, "eval_rewards/frontier_coverage_15": 0.9599077602227529, "eval_rewards/frontier_coverage_20": 0.9599077602227529, "eval_rewards/frontier_coverage_25": 0.9599077602227529, "eval_rewards/frontier_coverage_5": 0.9599077602227529, "eval_rewards/frontier_ece_reward": 0.0011265517581099023, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 101.635, "eval_samples_per_second": 9.839, "eval_signal/accuracy_reward/centered_abs_mean": 0.0, "eval_signal/accuracy_reward/group_bin_occupancy": 0.125, "eval_signal/accuracy_reward/group_std_mean": 0.0, "eval_signal/accuracy_reward/group_zero_std_frac": 1.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "eval_signal/advantage_abs_mean": 0.01836820226162672, "eval_signal/advantage_pre_scale_abs_mean": 0.01836820226162672, "eval_signal/advantage_pre_scale_std": 0.022806229380269844, "eval_signal/advantage_std": 0.022806229380269844, "eval_signal/brier_reward/centered_abs_mean": 0.0011060868758553017, "eval_signal/brier_reward/group_bin_occupancy": 0.6493055555555555, "eval_signal/brier_reward/group_std_mean": 0.0018368690895537536, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00011060868928325363, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.00011060868928325363, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.2825656458735466, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8611111111111112, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.34159958362579346, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028256564401090145, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.028256564401090145, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 1.9667225690985408e-05, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.8472222222222223, "eval_signal/frontier_aurc_reward/group_std_mean": 2.66848749864342e-05, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.45840328242745e-07, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.45840328242745e-07, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.03097957745194435, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.84375, "eval_signal/frontier_coverage_0/group_std_mean": 0.041869492580493294, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0030979577374334135, "eval_signal/frontier_coverage_0/weight": 0.10000000149011612, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030979577374334135, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.03097957745194435, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.84375, "eval_signal/frontier_coverage_1/group_std_mean": 0.041869492580493294, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030979577374334135, "eval_signal/frontier_coverage_1/weight": 0.10000000149011612, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030979577374334135, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.03097957745194435, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.84375, "eval_signal/frontier_coverage_10/group_std_mean": 0.041869492580493294, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030979577374334135, "eval_signal/frontier_coverage_10/weight": 0.10000000149011612, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030979577374334135, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.03097957745194435, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.84375, "eval_signal/frontier_coverage_15/group_std_mean": 0.041869492580493294, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030979577374334135, "eval_signal/frontier_coverage_15/weight": 0.10000000149011612, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030979577374334135, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.03097957745194435, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.84375, "eval_signal/frontier_coverage_20/group_std_mean": 0.041869492580493294, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030979577374334135, "eval_signal/frontier_coverage_20/weight": 0.10000000149011612, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030979577374334135, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.03097957745194435, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.84375, "eval_signal/frontier_coverage_25/group_std_mean": 0.041869492580493294, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030979577374334135, "eval_signal/frontier_coverage_25/weight": 0.10000000149011612, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030979577374334135, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.03097957745194435, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.84375, "eval_signal/frontier_coverage_5/group_std_mean": 0.041869492580493294, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030979577374334135, "eval_signal/frontier_coverage_5/weight": 0.10000000149011612, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030979577374334135, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.0009274356222401062, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.4930555555555555, "eval_signal/frontier_ece_reward/group_std_mean": 0.0017319663796418656, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 9.274356186021275e-05, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 9.274356186021275e-05, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.059, "step": 100 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.012341952991260587, "calibration/batch_entropy_100bins": 0.37614392423453313, "calibration/batch_entropy_10bins": 0.012341952991260587, "calibration/batch_entropy_50bins": 0.27599843753881276, "calibration/batch_uniqueness": 0.33745739403507496, "calibration/buffer_distribution_entropy": 0.8390653652277222, "calibration/buffer_entropy_100bins": 0.8455699458385212, "calibration/buffer_entropy_10bins": 0.8390653652277222, "calibration/buffer_entropy_50bins": 0.8593265890928249, "calibration/confidence_entropy": 0.08470434211228765, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.01988270931589781, "calibration/mean_confidence": 0.019882709315897806, "calibration/prompt_uniqueness": 0.26092733805931323, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0007812500000000222, "completions/max_length": 2558.6, "completions/max_terminated_length": 2558.6, "completions/mean_length": 702.9444580078125, "completions/mean_terminated_length": 703.4955688476563, "completions/min_length": 0.0, "completions/min_terminated_length": 164.8, "epoch": 0.2519968500393745, "grad_norm": 9.564626816427335e-05, "learning_rate": 3.1024096385542172e-06, "loss": -0.001, "num_tokens": 221579760.0, "reward": 1.2081443309783935, "reward_std": 0.023845688626170158, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9981184363365173, "rewards/confidence_uniqueness_reward": 0.3303511440753937, "rewards/format_reward": 0.9990451335906982, "rewards/frontier_aurc_reward": -0.0024016556330025197, "rewards/frontier_coverage_0": 0.9605079174041748, "rewards/frontier_coverage_1": 0.9605079174041748, "rewards/frontier_coverage_10": 0.9605079174041748, "rewards/frontier_coverage_15": 0.9605079174041748, "rewards/frontier_coverage_20": 0.9605079174041748, "rewards/frontier_coverage_25": 0.9605079174041748, "rewards/frontier_coverage_5": 0.9605079174041748, "rewards/frontier_ece_reward": 0.0008044078014791012, "rewards/frontier_entropy_batch_reward": -0.9663121461868286, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.014495623297989368, "signal/advantage_pre_scale_abs_mean": 0.014495623297989368, "signal/advantage_pre_scale_std": 0.04096686318516731, "signal/advantage_std": 0.04096686318516731, "signal/brier_reward/centered_abs_mean": 0.00292236702516675, "signal/brier_reward/group_bin_occupancy": 0.6513888888888889, "signal/brier_reward/group_std_mean": 0.006811278499662876, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0002922367071732879, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0002922367071732879, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.25255054533481597, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8784722222222221, "signal/confidence_uniqueness_reward/group_std_mean": 0.3084090232849121, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025255054607987402, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.025255054607987402, "signal/format_reward/centered_abs_mean": 0.0018391926656477152, "signal/format_reward/group_bin_occupancy": 0.12847222222222224, "signal/format_reward/group_std_mean": 0.005102569004520774, "signal/format_reward/group_zero_std_frac": 0.9722222089767456, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0009195963328238576, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0009195963328238576, "signal/frontier_aurc_reward/centered_abs_mean": 2.5563345479895362e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.8350694444444444, "signal/frontier_aurc_reward/group_std_mean": 4.01370954932645e-05, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1954182304616553e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1954182304616553e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.03394378535449505, "signal/frontier_coverage_0/group_bin_occupancy": 0.8385416666666667, "signal/frontier_coverage_0/group_std_mean": 0.046593782305717465, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003394378535449505, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003394378535449505, "signal/frontier_coverage_1/centered_abs_mean": 0.03394378535449505, "signal/frontier_coverage_1/group_bin_occupancy": 0.8385416666666667, "signal/frontier_coverage_1/group_std_mean": 0.046593782305717465, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003394378535449505, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003394378535449505, "signal/frontier_coverage_10/centered_abs_mean": 0.03394378535449505, "signal/frontier_coverage_10/group_bin_occupancy": 0.8385416666666667, "signal/frontier_coverage_10/group_std_mean": 0.046593782305717465, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003394378535449505, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003394378535449505, "signal/frontier_coverage_15/centered_abs_mean": 0.03394378535449505, "signal/frontier_coverage_15/group_bin_occupancy": 0.8385416666666667, "signal/frontier_coverage_15/group_std_mean": 0.046593782305717465, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003394378535449505, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003394378535449505, "signal/frontier_coverage_20/centered_abs_mean": 0.03394378535449505, "signal/frontier_coverage_20/group_bin_occupancy": 0.8385416666666667, "signal/frontier_coverage_20/group_std_mean": 0.046593782305717465, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003394378535449505, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003394378535449505, "signal/frontier_coverage_25/centered_abs_mean": 0.03394378535449505, "signal/frontier_coverage_25/group_bin_occupancy": 0.8385416666666667, "signal/frontier_coverage_25/group_std_mean": 0.046593782305717465, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003394378535449505, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003394378535449505, "signal/frontier_coverage_5/centered_abs_mean": 0.03394378535449505, "signal/frontier_coverage_5/group_bin_occupancy": 0.8385416666666667, "signal/frontier_coverage_5/group_std_mean": 0.046593782305717465, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003394378535449505, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003394378535449505, "signal/frontier_ece_reward/centered_abs_mean": 0.0007472379831597209, "signal/frontier_ece_reward/group_bin_occupancy": 0.49756944444444445, "signal/frontier_ece_reward/group_std_mean": 0.001472054049372673, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 7.472379875252954e-05, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 7.472379875252954e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0619256779551506, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2510416666666667, "signal/frontier_entropy_batch_reward/group_std_mean": 0.1331087276339531, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3027777820825577, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0061925679445266725, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0061925679445266725, "step": 105 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.021284326771816582, "calibration/batch_entropy_100bins": 0.3951721251871505, "calibration/batch_entropy_10bins": 0.021284326771816582, "calibration/batch_entropy_50bins": 0.30880453250559664, "calibration/batch_uniqueness": 0.4211642795138889, "calibration/buffer_distribution_entropy": 0.829256019029493, "calibration/buffer_entropy_100bins": 0.8388420472420786, "calibration/buffer_entropy_10bins": 0.829256019029493, "calibration/buffer_entropy_50bins": 0.8499276619128068, "calibration/confidence_entropy": 0.08871296935789795, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.022094368099568412, "calibration/mean_confidence": 0.022094368099568412, "calibration/prompt_uniqueness": 0.3543619791666667, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0004340277777777901, "completions/max_length": 2544.4, "completions/max_terminated_length": 2544.4, "completions/mean_length": 696.4470581054687, "completions/mean_terminated_length": 696.752587890625, "completions/min_length": 112.6, "completions/min_terminated_length": 184.8, "epoch": 0.2639967000412495, "grad_norm": 0.00010304038733011112, "learning_rate": 2.9518072289156627e-06, "loss": -0.0007, "num_tokens": 232711278.0, "reward": 1.2140116214752197, "reward_std": 0.020644556544721127, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9981519222259522, "rewards/confidence_uniqueness_reward": 0.42851542234420775, "rewards/format_reward": 0.9993923664093017, "rewards/frontier_aurc_reward": -0.002601869171485305, "rewards/frontier_coverage_0": 0.9562180042266846, "rewards/frontier_coverage_1": 0.9562180042266846, "rewards/frontier_coverage_10": 0.9562180042266846, "rewards/frontier_coverage_15": 0.9562180042266846, "rewards/frontier_coverage_20": 0.9562180042266846, "rewards/frontier_coverage_25": 0.9562180042266846, "rewards/frontier_coverage_5": 0.9562180042266846, "rewards/frontier_ece_reward": 0.0006137272284831852, "rewards/frontier_entropy_batch_reward": -0.9773278951644897, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.012931106984615326, "signal/advantage_pre_scale_abs_mean": 0.012931106984615326, "signal/advantage_pre_scale_std": 0.03138108551502228, "signal/advantage_std": 0.03138108551502228, "signal/brier_reward/centered_abs_mean": 0.0026462335139513017, "signal/brier_reward/group_bin_occupancy": 0.6840277777777778, "signal/brier_reward/group_std_mean": 0.005694417608901858, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00026462336245458573, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.00026462336245458573, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2552150249481201, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7388888888888888, "signal/confidence_uniqueness_reward/group_std_mean": 0.2904451459646225, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025521503388881685, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.025521503388881685, "signal/format_reward/centered_abs_mean": 0.0011773003148846327, "signal/format_reward/group_bin_occupancy": 0.12743055555555557, "signal/format_reward/group_std_mean": 0.0034373244270682335, "signal/format_reward/group_zero_std_frac": 0.9805555462837219, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005886501574423164, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0005886501574423164, "signal/frontier_aurc_reward/centered_abs_mean": 2.938565012300387e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.8614583333333332, "signal/frontier_aurc_reward/group_std_mean": 4.244408264639787e-05, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6732062653754837e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6732062653754837e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.04040106013417244, "signal/frontier_coverage_0/group_bin_occupancy": 0.8690972222222223, "signal/frontier_coverage_0/group_std_mean": 0.05285699814558029, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004040106106549502, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004040106106549502, "signal/frontier_coverage_1/centered_abs_mean": 0.04040106013417244, "signal/frontier_coverage_1/group_bin_occupancy": 0.8690972222222223, "signal/frontier_coverage_1/group_std_mean": 0.05285699814558029, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004040106106549502, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004040106106549502, "signal/frontier_coverage_10/centered_abs_mean": 0.04040106013417244, "signal/frontier_coverage_10/group_bin_occupancy": 0.8690972222222223, "signal/frontier_coverage_10/group_std_mean": 0.05285699814558029, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004040106106549502, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004040106106549502, "signal/frontier_coverage_15/centered_abs_mean": 0.04040106013417244, "signal/frontier_coverage_15/group_bin_occupancy": 0.8690972222222223, "signal/frontier_coverage_15/group_std_mean": 0.05285699814558029, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004040106106549502, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004040106106549502, "signal/frontier_coverage_20/centered_abs_mean": 0.04040106013417244, "signal/frontier_coverage_20/group_bin_occupancy": 0.8690972222222223, "signal/frontier_coverage_20/group_std_mean": 0.05285699814558029, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004040106106549502, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004040106106549502, "signal/frontier_coverage_25/centered_abs_mean": 0.04040106013417244, "signal/frontier_coverage_25/group_bin_occupancy": 0.8690972222222223, "signal/frontier_coverage_25/group_std_mean": 0.05285699814558029, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004040106106549502, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004040106106549502, "signal/frontier_coverage_5/centered_abs_mean": 0.04040106013417244, "signal/frontier_coverage_5/group_bin_occupancy": 0.8690972222222223, "signal/frontier_coverage_5/group_std_mean": 0.05285699814558029, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004040106106549502, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004040106106549502, "signal/frontier_ece_reward/centered_abs_mean": 0.0007259678619448096, "signal/frontier_ece_reward/group_bin_occupancy": 0.471875, "signal/frontier_ece_reward/group_std_mean": 0.001656959392130375, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 7.259679259732365e-05, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 7.259679259732365e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.042409443855285646, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.21388888888888888, "signal/frontier_entropy_batch_reward/group_std_mean": 0.10218364298343659, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.46111112236976626, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0042409445624798535, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0042409445624798535, "step": 110 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.01673821934029086, "calibration/batch_entropy_100bins": 0.38926544017604686, "calibration/batch_entropy_10bins": 0.01673821934029086, "calibration/batch_entropy_50bins": 0.2993580485835123, "calibration/batch_uniqueness": 0.4159125434027778, "calibration/buffer_distribution_entropy": 0.8175787124013256, "calibration/buffer_entropy_100bins": 0.8315024676718213, "calibration/buffer_entropy_10bins": 0.8175787124013256, "calibration/buffer_entropy_50bins": 0.8402204734165609, "calibration/confidence_entropy": 0.08572288929480461, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.021108248916521234, "calibration/mean_confidence": 0.02110824891652123, "calibration/prompt_uniqueness": 0.3557942708333333, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00017361111111111605, "completions/max_length": 2377.6, "completions/max_terminated_length": 2377.6, "completions/mean_length": 671.6215209960938, "completions/mean_terminated_length": 671.7394165039062, "completions/min_length": 89.6, "completions/min_terminated_length": 164.0, "epoch": 0.27599655004312446, "grad_norm": 5.828886787639931e-05, "learning_rate": 2.8012048192771087e-06, "loss": 0.0, "num_tokens": 243527558.0, "reward": 1.2144116401672362, "reward_std": 0.01996447965502739, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9981747031211853, "rewards/confidence_uniqueness_reward": 0.4300437688827515, "rewards/format_reward": 0.9993923544883728, "rewards/frontier_aurc_reward": -0.002783898450434208, "rewards/frontier_coverage_0": 0.9569729804992676, "rewards/frontier_coverage_1": 0.9569729804992676, "rewards/frontier_coverage_10": 0.9569729804992676, "rewards/frontier_coverage_15": 0.9569729804992676, "rewards/frontier_coverage_20": 0.9569729804992676, "rewards/frontier_coverage_25": 0.9569729804992676, "rewards/frontier_coverage_5": 0.9569729804992676, "rewards/frontier_ece_reward": 0.0003723478992469609, "rewards/frontier_entropy_batch_reward": -0.9798990249633789, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.012076887860894203, "signal/advantage_pre_scale_abs_mean": 0.012076887860894203, "signal/advantage_pre_scale_std": 0.03225056380033493, "signal/advantage_std": 0.03225056380033493, "signal/brier_reward/centered_abs_mean": 0.00263253313023597, "signal/brier_reward/group_bin_occupancy": 0.6972222222222222, "signal/brier_reward/group_std_mean": 0.005726341786794364, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0002632533200085163, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0002632533200085163, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2511452704668045, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7500000000000001, "signal/confidence_uniqueness_reward/group_std_mean": 0.2867334961891174, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025114526599645616, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.025114526599645616, "signal/format_reward/centered_abs_mean": 0.001177300326526165, "signal/format_reward/group_bin_occupancy": 0.12743055555555555, "signal/format_reward/group_std_mean": 0.0034373244270682335, "signal/format_reward/group_zero_std_frac": 0.9805555462837219, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005886501632630825, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0005886501632630825, "signal/frontier_aurc_reward/centered_abs_mean": 2.965504681924358e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.8607638888888888, "signal/frontier_aurc_reward/group_std_mean": 4.3591349822236225e-05, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.706880761455977e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.706880761455977e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.03983094990253448, "signal/frontier_coverage_0/group_bin_occupancy": 0.8743055555555556, "signal/frontier_coverage_0/group_std_mean": 0.052179882675409316, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003983095102012157, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003983095102012157, "signal/frontier_coverage_1/centered_abs_mean": 0.03983094990253448, "signal/frontier_coverage_1/group_bin_occupancy": 0.8743055555555556, "signal/frontier_coverage_1/group_std_mean": 0.052179882675409316, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003983095102012157, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003983095102012157, "signal/frontier_coverage_10/centered_abs_mean": 0.03983094990253448, "signal/frontier_coverage_10/group_bin_occupancy": 0.8743055555555556, "signal/frontier_coverage_10/group_std_mean": 0.052179882675409316, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003983095102012157, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003983095102012157, "signal/frontier_coverage_15/centered_abs_mean": 0.03983094990253448, "signal/frontier_coverage_15/group_bin_occupancy": 0.8743055555555556, "signal/frontier_coverage_15/group_std_mean": 0.052179882675409316, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003983095102012157, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003983095102012157, "signal/frontier_coverage_20/centered_abs_mean": 0.03983094990253448, "signal/frontier_coverage_20/group_bin_occupancy": 0.8743055555555556, "signal/frontier_coverage_20/group_std_mean": 0.052179882675409316, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003983095102012157, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003983095102012157, "signal/frontier_coverage_25/centered_abs_mean": 0.03983094990253448, "signal/frontier_coverage_25/group_bin_occupancy": 0.8743055555555556, "signal/frontier_coverage_25/group_std_mean": 0.052179882675409316, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003983095102012157, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003983095102012157, "signal/frontier_coverage_5/centered_abs_mean": 0.03983094990253448, "signal/frontier_coverage_5/group_bin_occupancy": 0.8743055555555556, "signal/frontier_coverage_5/group_std_mean": 0.052179882675409316, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003983095102012157, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003983095102012157, "signal/frontier_ece_reward/centered_abs_mean": 0.0004976392199750989, "signal/frontier_ece_reward/group_bin_occupancy": 0.478125, "signal/frontier_ece_reward/group_std_mean": 0.0012285706703551113, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 4.9763925198931246e-05, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 4.9763925198931246e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.03792983740568161, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.19652777777777777, "signal/frontier_entropy_batch_reward/group_std_mean": 0.09351965636014939, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.5333333432674408, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.003792983898892999, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.003792983898892999, "step": 115 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.015287037913899185, "calibration/batch_entropy_100bins": 0.3848550389267389, "calibration/batch_entropy_10bins": 0.015287037913899185, "calibration/batch_entropy_50bins": 0.2882901972717423, "calibration/batch_uniqueness": 0.425244140625, "calibration/buffer_distribution_entropy": 0.8046537012281669, "calibration/buffer_entropy_100bins": 0.8234345031545167, "calibration/buffer_entropy_10bins": 0.8046537012281669, "calibration/buffer_entropy_50bins": 0.8296096064776775, "calibration/confidence_entropy": 0.08368989254666424, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.020141531457395022, "calibration/mean_confidence": 0.020141531457395025, "calibration/prompt_uniqueness": 0.3623046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 2541.8, "completions/max_terminated_length": 2541.8, "completions/mean_length": 682.83447265625, "completions/mean_terminated_length": 683.363427734375, "completions/min_length": 59.2, "completions/min_terminated_length": 159.4, "epoch": 0.28799640004499943, "grad_norm": 5.349236380425282e-05, "learning_rate": 2.6506024096385547e-06, "loss": -0.001, "num_tokens": 254475667.0, "reward": 1.217322826385498, "reward_std": 0.018410124257206915, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9980918765068054, "rewards/confidence_uniqueness_reward": 0.43044655919075014, "rewards/format_reward": 0.9991319298744201, "rewards/frontier_aurc_reward": -0.002950493525713682, "rewards/frontier_coverage_0": 0.959316098690033, "rewards/frontier_coverage_1": 0.959316098690033, "rewards/frontier_coverage_10": 0.959316098690033, "rewards/frontier_coverage_15": 0.959316098690033, "rewards/frontier_coverage_20": 0.959316098690033, "rewards/frontier_coverage_25": 0.959316098690033, "rewards/frontier_coverage_5": 0.959316098690033, "rewards/frontier_ece_reward": 0.0002875441190553829, "rewards/frontier_entropy_batch_reward": -0.9661013126373291, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.01139154490083456, "signal/advantage_pre_scale_abs_mean": 0.01139154490083456, "signal/advantage_pre_scale_std": 0.03482802901417017, "signal/advantage_std": 0.03482802901417017, "signal/brier_reward/centered_abs_mean": 0.00280342239420861, "signal/brier_reward/group_bin_occupancy": 0.6600694444444445, "signal/brier_reward/group_std_mean": 0.005472193798050284, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00028034225688315927, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.00028034225688315927, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.21315207481384277, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8375, "signal/confidence_uniqueness_reward/group_std_mean": 0.2526503801345825, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021315207704901695, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.021315207704901695, "signal/format_reward/centered_abs_mean": 0.0015733506879769266, "signal/format_reward/group_bin_occupancy": 0.12708333333333333, "signal/format_reward/group_std_mean": 0.0034799596294760706, "signal/format_reward/group_zero_std_frac": 0.9833333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007866753439884633, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007866753439884633, "signal/frontier_aurc_reward/centered_abs_mean": 2.8699574977508745e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.829861111111111, "signal/frontier_aurc_reward/group_std_mean": 4.1820811020443216e-05, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.587446883557277e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.587446883557277e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.0366835243999958, "signal/frontier_coverage_0/group_bin_occupancy": 0.85, "signal/frontier_coverage_0/group_std_mean": 0.04874376505613327, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0036683525424450634, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036683525424450634, "signal/frontier_coverage_1/centered_abs_mean": 0.0366835243999958, "signal/frontier_coverage_1/group_bin_occupancy": 0.85, "signal/frontier_coverage_1/group_std_mean": 0.04874376505613327, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036683525424450634, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036683525424450634, "signal/frontier_coverage_10/centered_abs_mean": 0.0366835243999958, "signal/frontier_coverage_10/group_bin_occupancy": 0.85, "signal/frontier_coverage_10/group_std_mean": 0.04874376505613327, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036683525424450634, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036683525424450634, "signal/frontier_coverage_15/centered_abs_mean": 0.0366835243999958, "signal/frontier_coverage_15/group_bin_occupancy": 0.85, "signal/frontier_coverage_15/group_std_mean": 0.04874376505613327, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036683525424450634, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036683525424450634, "signal/frontier_coverage_20/centered_abs_mean": 0.0366835243999958, "signal/frontier_coverage_20/group_bin_occupancy": 0.85, "signal/frontier_coverage_20/group_std_mean": 0.04874376505613327, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036683525424450634, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036683525424450634, "signal/frontier_coverage_25/centered_abs_mean": 0.0366835243999958, "signal/frontier_coverage_25/group_bin_occupancy": 0.85, "signal/frontier_coverage_25/group_std_mean": 0.04874376505613327, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036683525424450634, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036683525424450634, "signal/frontier_coverage_5/centered_abs_mean": 0.0366835243999958, "signal/frontier_coverage_5/group_bin_occupancy": 0.85, "signal/frontier_coverage_5/group_std_mean": 0.04874376505613327, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036683525424450634, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036683525424450634, "signal/frontier_ece_reward/centered_abs_mean": 0.00045878663659095766, "signal/frontier_ece_reward/group_bin_occupancy": 0.4222222222222222, "signal/frontier_ece_reward/group_std_mean": 0.0012177627184428274, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 4.587866424117237e-05, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 4.587866424117237e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06285881474614144, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.25069444444444444, "signal/frontier_entropy_batch_reward/group_std_mean": 0.14192153960466386, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3166666716337204, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.00628588180989027, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.00628588180989027, "step": 120 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.022472576476576352, "calibration/batch_entropy_100bins": 0.38102577658556147, "calibration/batch_entropy_10bins": 0.022472576476576352, "calibration/batch_entropy_50bins": 0.28351199674802774, "calibration/batch_uniqueness": 0.4171188269475567, "calibration/buffer_distribution_entropy": 0.791119751195128, "calibration/buffer_entropy_100bins": 0.8150867335752293, "calibration/buffer_entropy_10bins": 0.791119751195128, "calibration/buffer_entropy_50bins": 0.8186152341744503, "calibration/confidence_entropy": 0.08195105191568414, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.019687573368622573, "calibration/mean_confidence": 0.01968757336862257, "calibration/prompt_uniqueness": 0.34676777180406215, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0007812500000000222, "completions/max_length": 2195.6, "completions/max_terminated_length": 2195.6, "completions/mean_length": 685.1104248046875, "completions/mean_terminated_length": 685.647119140625, "completions/min_length": 42.6, "completions/min_terminated_length": 190.8, "epoch": 0.2999962500468744, "grad_norm": 7.856736920075491e-05, "learning_rate": 2.5e-06, "loss": -0.001, "num_tokens": 265485803.0, "reward": 1.2181265592575072, "reward_std": 0.01852573864161968, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9982476234436035, "rewards/confidence_uniqueness_reward": 0.42563745379447937, "rewards/format_reward": 0.999218738079071, "rewards/frontier_aurc_reward": -0.003106745798140764, "rewards/frontier_coverage_0": 0.960704791545868, "rewards/frontier_coverage_1": 0.960704791545868, "rewards/frontier_coverage_10": 0.960704791545868, "rewards/frontier_coverage_15": 0.960704791545868, "rewards/frontier_coverage_20": 0.960704791545868, "rewards/frontier_coverage_25": 0.960704791545868, "rewards/frontier_coverage_5": 0.960704791545868, "rewards/frontier_ece_reward": 0.00022829854860901834, "rewards/frontier_entropy_batch_reward": -0.9634865045547485, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.011229231022298336, "signal/advantage_pre_scale_abs_mean": 0.011229231022298336, "signal/advantage_pre_scale_std": 0.03441160153597593, "signal/advantage_std": 0.03441160153597593, "signal/brier_reward/centered_abs_mean": 0.0026414696127176284, "signal/brier_reward/group_bin_occupancy": 0.640625, "signal/brier_reward/group_std_mean": 0.0057011493248865005, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00026414696621941404, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.00026414696621941404, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.19697971045970916, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8625, "signal/confidence_uniqueness_reward/group_std_mean": 0.23735004365444184, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019697971269488336, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.019697971269488336, "signal/format_reward/centered_abs_mean": 0.0014919704641215502, "signal/format_reward/group_bin_occupancy": 0.12743055555555555, "signal/format_reward/group_std_mean": 0.003821535501629114, "signal/format_reward/group_zero_std_frac": 0.9805555462837219, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007459852320607751, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007459852320607751, "signal/frontier_aurc_reward/centered_abs_mean": 2.7659153784043155e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.8177083333333333, "signal/frontier_aurc_reward/group_std_mean": 4.221230701659806e-05, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4573943707982835e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4573943707982835e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.03492500931024552, "signal/frontier_coverage_0/group_bin_occupancy": 0.835763888888889, "signal/frontier_coverage_0/group_std_mean": 0.04719159454107284, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0034925009589642287, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0034925009589642287, "signal/frontier_coverage_1/centered_abs_mean": 0.03492500931024552, "signal/frontier_coverage_1/group_bin_occupancy": 0.835763888888889, "signal/frontier_coverage_1/group_std_mean": 0.04719159454107284, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034925009589642287, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034925009589642287, "signal/frontier_coverage_10/centered_abs_mean": 0.03492500931024552, "signal/frontier_coverage_10/group_bin_occupancy": 0.835763888888889, "signal/frontier_coverage_10/group_std_mean": 0.04719159454107284, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034925009589642287, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034925009589642287, "signal/frontier_coverage_15/centered_abs_mean": 0.03492500931024552, "signal/frontier_coverage_15/group_bin_occupancy": 0.835763888888889, "signal/frontier_coverage_15/group_std_mean": 0.04719159454107284, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034925009589642287, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034925009589642287, "signal/frontier_coverage_20/centered_abs_mean": 0.03492500931024552, "signal/frontier_coverage_20/group_bin_occupancy": 0.835763888888889, "signal/frontier_coverage_20/group_std_mean": 0.04719159454107284, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034925009589642287, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034925009589642287, "signal/frontier_coverage_25/centered_abs_mean": 0.03492500931024552, "signal/frontier_coverage_25/group_bin_occupancy": 0.835763888888889, "signal/frontier_coverage_25/group_std_mean": 0.04719159454107284, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0034925009589642287, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034925009589642287, "signal/frontier_coverage_5/centered_abs_mean": 0.03492500931024552, "signal/frontier_coverage_5/group_bin_occupancy": 0.835763888888889, "signal/frontier_coverage_5/group_std_mean": 0.04719159454107284, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034925009589642287, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034925009589642287, "signal/frontier_ece_reward/centered_abs_mean": 0.00042187916114926337, "signal/frontier_ece_reward/group_bin_occupancy": 0.45069444444444445, "signal/frontier_ece_reward/group_std_mean": 0.0011129227350465953, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 4.2187915096292275e-05, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 4.2187915096292275e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06738204509019852, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.253125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.14837366938591004, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3000000059604645, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.006738204788416624, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.006738204788416624, "step": 125 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.029921545629725488, "calibration/batch_entropy_100bins": 0.3938974837071955, "calibration/batch_entropy_10bins": 0.029921545629725488, "calibration/batch_entropy_50bins": 0.2948700326136235, "calibration/batch_uniqueness": 0.4545985713361545, "calibration/buffer_distribution_entropy": 0.7775809153332081, "calibration/buffer_entropy_100bins": 0.8068847106686515, "calibration/buffer_entropy_10bins": 0.7775809153332081, "calibration/buffer_entropy_50bins": 0.8077963576382909, "calibration/confidence_entropy": 0.08758419464326617, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.021199454071341377, "calibration/mean_confidence": 0.02119945407134138, "calibration/prompt_uniqueness": 0.4083991149627124, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0004340277777777901, "completions/max_length": 2295.8, "completions/max_terminated_length": 2295.8, "completions/mean_length": 692.5526977539063, "completions/mean_terminated_length": 692.8485107421875, "completions/min_length": 28.6, "completions/min_terminated_length": 171.6, "epoch": 0.3119961000487494, "grad_norm": 4.43594872194808e-05, "learning_rate": 2.349397590361446e-06, "loss": -0.0005, "num_tokens": 276588810.0, "reward": 1.219011116027832, "reward_std": 0.016876889020204545, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9985480904579163, "rewards/confidence_uniqueness_reward": 0.42221215963363645, "rewards/format_reward": 0.9995659589767456, "rewards/frontier_aurc_reward": -0.003253296110779047, "rewards/frontier_coverage_0": 0.9612909436225892, "rewards/frontier_coverage_1": 0.9612909436225892, "rewards/frontier_coverage_10": 0.9612909436225892, "rewards/frontier_coverage_15": 0.9612909436225892, "rewards/frontier_coverage_20": 0.9612909436225892, "rewards/frontier_coverage_25": 0.9612909436225892, "rewards/frontier_coverage_5": 0.9612909436225892, "rewards/frontier_ece_reward": 0.0002832911792211235, "rewards/frontier_entropy_batch_reward": -0.9573925971984864, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.010039843246340751, "signal/advantage_pre_scale_abs_mean": 0.010039843246340751, "signal/advantage_pre_scale_std": 0.028154394030570982, "signal/advantage_std": 0.028154394030570982, "signal/brier_reward/centered_abs_mean": 0.002099990099668503, "signal/brier_reward/group_bin_occupancy": 0.6159722222222221, "signal/brier_reward/group_std_mean": 0.00470021041110158, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0002099990117130801, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0002099990117130801, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1907490074634552, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8645833333333334, "signal/confidence_uniqueness_reward/group_std_mean": 0.23286633491516112, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019074901193380355, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.019074901193380355, "signal/format_reward/centered_abs_mean": 0.0008409288129769266, "signal/format_reward/group_bin_occupancy": 0.1267361111111111, "signal/format_reward/group_std_mean": 0.0024552317336201668, "signal/format_reward/group_zero_std_frac": 0.9861111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004204644064884633, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004204644064884633, "signal/frontier_aurc_reward/centered_abs_mean": 2.587656126706861e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.8006944444444445, "signal/frontier_aurc_reward/group_std_mean": 3.971766345784999e-05, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.2345701015401576e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.2345701015401576e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.034396450221538546, "signal/frontier_coverage_0/group_bin_occupancy": 0.8170138888888889, "signal/frontier_coverage_0/group_std_mean": 0.04737475067377091, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0034396452363580467, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0034396452363580467, "signal/frontier_coverage_1/centered_abs_mean": 0.034396450221538546, "signal/frontier_coverage_1/group_bin_occupancy": 0.8170138888888889, "signal/frontier_coverage_1/group_std_mean": 0.04737475067377091, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034396452363580467, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034396452363580467, "signal/frontier_coverage_10/centered_abs_mean": 0.034396450221538546, "signal/frontier_coverage_10/group_bin_occupancy": 0.8170138888888889, "signal/frontier_coverage_10/group_std_mean": 0.04737475067377091, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034396452363580467, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034396452363580467, "signal/frontier_coverage_15/centered_abs_mean": 0.034396450221538546, "signal/frontier_coverage_15/group_bin_occupancy": 0.8170138888888889, "signal/frontier_coverage_15/group_std_mean": 0.04737475067377091, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034396452363580467, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034396452363580467, "signal/frontier_coverage_20/centered_abs_mean": 0.034396450221538546, "signal/frontier_coverage_20/group_bin_occupancy": 0.8170138888888889, "signal/frontier_coverage_20/group_std_mean": 0.04737475067377091, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034396452363580467, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034396452363580467, "signal/frontier_coverage_25/centered_abs_mean": 0.034396450221538546, "signal/frontier_coverage_25/group_bin_occupancy": 0.8170138888888889, "signal/frontier_coverage_25/group_std_mean": 0.04737475067377091, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0034396452363580467, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034396452363580467, "signal/frontier_coverage_5/centered_abs_mean": 0.034396450221538546, "signal/frontier_coverage_5/group_bin_occupancy": 0.8170138888888889, "signal/frontier_coverage_5/group_std_mean": 0.04737475067377091, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034396452363580467, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034396452363580467, "signal/frontier_ece_reward/centered_abs_mean": 0.0005845244158990681, "signal/frontier_ece_reward/group_bin_occupancy": 0.45833333333333337, "signal/frontier_ece_reward/group_std_mean": 0.001524832844734192, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 5.845244086231105e-05, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 5.845244086231105e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07861145734786987, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2697916666666667, "signal/frontier_entropy_batch_reward/group_std_mean": 0.1693776398897171, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.24999999701976777, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.007861145678907633, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.007861145678907633, "step": 130 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.033724537578294043, "calibration/batch_entropy_100bins": 0.3790440291598166, "calibration/batch_entropy_10bins": 0.033724537578294043, "calibration/batch_entropy_50bins": 0.27996729733610637, "calibration/batch_uniqueness": 0.42015942923298233, "calibration/buffer_distribution_entropy": 0.7640241372764862, "calibration/buffer_entropy_100bins": 0.7985068573608476, "calibration/buffer_entropy_10bins": 0.7640241372764862, "calibration/buffer_entropy_50bins": 0.796776148875944, "calibration/confidence_entropy": 0.08183363627321727, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.019873024668607783, "calibration/mean_confidence": 0.019873024668607787, "calibration/prompt_uniqueness": 0.3658195672910164, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0013020833333333482, "completions/max_length": 3082.2, "completions/max_terminated_length": 3082.2, "completions/mean_length": 669.673876953125, "completions/mean_terminated_length": 670.5369018554687, "completions/min_length": 0.0, "completions/min_terminated_length": 172.4, "epoch": 0.32399595005062437, "grad_norm": 5.57123712496832e-05, "learning_rate": 2.1987951807228917e-06, "loss": -0.0017, "num_tokens": 287396477.0, "reward": 1.2182500600814818, "reward_std": 0.019175108522176743, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9975671648979187, "rewards/confidence_uniqueness_reward": 0.43408032059669494, "rewards/format_reward": 0.9986979246139527, "rewards/frontier_aurc_reward": -0.003387561673298478, "rewards/frontier_coverage_0": 0.9586145401000976, "rewards/frontier_coverage_1": 0.9586145401000976, "rewards/frontier_coverage_10": 0.9586145401000976, "rewards/frontier_coverage_15": 0.9586145401000976, "rewards/frontier_coverage_20": 0.9586145401000976, "rewards/frontier_coverage_25": 0.9586145401000976, "rewards/frontier_coverage_5": 0.9586145401000976, "rewards/frontier_ece_reward": 0.00035531093017198144, "rewards/frontier_entropy_batch_reward": -0.9528710126876831, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.011726399697363377, "signal/advantage_pre_scale_abs_mean": 0.011726399697363377, "signal/advantage_pre_scale_std": 0.04185779392719269, "signal/advantage_std": 0.04185779392719269, "signal/brier_reward/centered_abs_mean": 0.003680155472829938, "signal/brier_reward/group_bin_occupancy": 0.6277777777777778, "signal/brier_reward/group_std_mean": 0.006919549405574798, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0003680155467009172, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0003680155467009172, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.20299766063690186, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8690972222222222, "signal/confidence_uniqueness_reward/group_std_mean": 0.24341756701469422, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020299766212701797, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020299766212701797, "signal/format_reward/centered_abs_mean": 0.002273220452480018, "signal/format_reward/group_bin_occupancy": 0.12743055555555555, "signal/format_reward/group_std_mean": 0.004497193172574044, "signal/format_reward/group_zero_std_frac": 0.9805555462837219, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001136610226240009, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001136610226240009, "signal/frontier_aurc_reward/centered_abs_mean": 3.237565179006197e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.8003472222222221, "signal/frontier_aurc_reward/group_std_mean": 4.882146386080422e-05, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0469563487022244e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0469563487022244e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.03778303116559982, "signal/frontier_coverage_0/group_bin_occupancy": 0.8256944444444443, "signal/frontier_coverage_0/group_std_mean": 0.05185385718941689, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003778303088620305, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003778303088620305, "signal/frontier_coverage_1/centered_abs_mean": 0.03778303116559982, "signal/frontier_coverage_1/group_bin_occupancy": 0.8256944444444443, "signal/frontier_coverage_1/group_std_mean": 0.05185385718941689, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003778303088620305, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003778303088620305, "signal/frontier_coverage_10/centered_abs_mean": 0.03778303116559982, "signal/frontier_coverage_10/group_bin_occupancy": 0.8256944444444443, "signal/frontier_coverage_10/group_std_mean": 0.05185385718941689, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003778303088620305, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003778303088620305, "signal/frontier_coverage_15/centered_abs_mean": 0.03778303116559982, "signal/frontier_coverage_15/group_bin_occupancy": 0.8256944444444443, "signal/frontier_coverage_15/group_std_mean": 0.05185385718941689, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003778303088620305, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003778303088620305, "signal/frontier_coverage_20/centered_abs_mean": 0.03778303116559982, "signal/frontier_coverage_20/group_bin_occupancy": 0.8256944444444443, "signal/frontier_coverage_20/group_std_mean": 0.05185385718941689, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003778303088620305, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003778303088620305, "signal/frontier_coverage_25/centered_abs_mean": 0.03778303116559982, "signal/frontier_coverage_25/group_bin_occupancy": 0.8256944444444443, "signal/frontier_coverage_25/group_std_mean": 0.05185385718941689, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003778303088620305, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003778303088620305, "signal/frontier_coverage_5/centered_abs_mean": 0.03778303116559982, "signal/frontier_coverage_5/group_bin_occupancy": 0.8256944444444443, "signal/frontier_coverage_5/group_std_mean": 0.05185385718941689, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003778303088620305, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003778303088620305, "signal/frontier_ece_reward/centered_abs_mean": 0.0007637530216015875, "signal/frontier_ece_reward/group_bin_occupancy": 0.44340277777777776, "signal/frontier_ece_reward/group_std_mean": 0.001957472856156528, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 7.637530070496723e-05, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 7.637530070496723e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08594719767570495, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2774305555555555, "signal/frontier_entropy_batch_reward/group_std_mean": 0.17783950865268708, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.24166666865348815, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.008594720251858234, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.008594720251858234, "step": 135 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.030543712678111984, "calibration/batch_entropy_100bins": 0.3845846721133629, "calibration/batch_entropy_10bins": 0.030543712678111984, "calibration/batch_entropy_50bins": 0.2859978978931486, "calibration/batch_uniqueness": 0.4320583767361111, "calibration/buffer_distribution_entropy": 0.7508008699338516, "calibration/buffer_entropy_100bins": 0.7925527781072645, "calibration/buffer_entropy_10bins": 0.7508008699338516, "calibration/buffer_entropy_50bins": 0.7866199361280733, "calibration/confidence_entropy": 0.08360223614543279, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.020143632359686154, "calibration/mean_confidence": 0.020143632359686154, "calibration/prompt_uniqueness": 0.3770182291666667, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0004340277777777901, "completions/max_length": 3079.4, "completions/max_terminated_length": 3079.4, "completions/mean_length": 677.6828979492187, "completions/mean_terminated_length": 677.9762817382813, "completions/min_length": 33.4, "completions/min_terminated_length": 176.2, "epoch": 0.33599580005249935, "grad_norm": 5.097528628539294e-05, "learning_rate": 2.0481927710843377e-06, "loss": -0.0005, "num_tokens": 298307608.0, "reward": 1.219898509979248, "reward_std": 0.016702464036643504, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9983464241027832, "rewards/confidence_uniqueness_reward": 0.4385930418968201, "rewards/format_reward": 0.9995659589767456, "rewards/frontier_aurc_reward": -0.0035497402306646107, "rewards/frontier_coverage_0": 0.958567988872528, "rewards/frontier_coverage_1": 0.958567988872528, "rewards/frontier_coverage_10": 0.958567988872528, "rewards/frontier_coverage_15": 0.958567988872528, "rewards/frontier_coverage_20": 0.958567988872528, "rewards/frontier_coverage_25": 0.958567988872528, "rewards/frontier_coverage_5": 0.958567988872528, "rewards/frontier_ece_reward": 0.00036459730472415686, "rewards/frontier_entropy_batch_reward": -0.9456805467605591, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.009702853672206403, "signal/advantage_pre_scale_abs_mean": 0.009702853672206403, "signal/advantage_pre_scale_std": 0.027841240912675858, "signal/advantage_std": 0.027841240912675858, "signal/brier_reward/centered_abs_mean": 0.0023789591854438187, "signal/brier_reward/group_bin_occupancy": 0.6232638888888888, "signal/brier_reward/group_std_mean": 0.005185263650491834, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00023789591796230525, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.00023789591796230525, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.20213373899459838, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8822916666666668, "signal/confidence_uniqueness_reward/group_std_mean": 0.2441387802362442, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020213373750448228, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020213373750448228, "signal/format_reward/centered_abs_mean": 0.0008409288129769266, "signal/format_reward/group_bin_occupancy": 0.1267361111111111, "signal/format_reward/group_std_mean": 0.0024552317336201668, "signal/format_reward/group_zero_std_frac": 0.9861111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004204644064884633, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004204644064884633, "signal/frontier_aurc_reward/centered_abs_mean": 2.9672855816897936e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7934027777777779, "signal/frontier_aurc_reward/group_std_mean": 4.578574880724773e-05, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.70910709079908e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.70910709079908e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.03771033436059952, "signal/frontier_coverage_0/group_bin_occupancy": 0.8166666666666667, "signal/frontier_coverage_0/group_std_mean": 0.052084099501371384, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0037710336968302728, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0037710336968302728, "signal/frontier_coverage_1/centered_abs_mean": 0.03771033436059952, "signal/frontier_coverage_1/group_bin_occupancy": 0.8166666666666667, "signal/frontier_coverage_1/group_std_mean": 0.052084099501371384, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037710336968302728, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037710336968302728, "signal/frontier_coverage_10/centered_abs_mean": 0.03771033436059952, "signal/frontier_coverage_10/group_bin_occupancy": 0.8166666666666667, "signal/frontier_coverage_10/group_std_mean": 0.052084099501371384, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037710336968302728, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037710336968302728, "signal/frontier_coverage_15/centered_abs_mean": 0.03771033436059952, "signal/frontier_coverage_15/group_bin_occupancy": 0.8166666666666667, "signal/frontier_coverage_15/group_std_mean": 0.052084099501371384, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037710336968302728, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037710336968302728, "signal/frontier_coverage_20/centered_abs_mean": 0.03771033436059952, "signal/frontier_coverage_20/group_bin_occupancy": 0.8166666666666667, "signal/frontier_coverage_20/group_std_mean": 0.052084099501371384, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037710336968302728, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037710336968302728, "signal/frontier_coverage_25/centered_abs_mean": 0.03771033436059952, "signal/frontier_coverage_25/group_bin_occupancy": 0.8166666666666667, "signal/frontier_coverage_25/group_std_mean": 0.052084099501371384, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037710336968302728, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037710336968302728, "signal/frontier_coverage_5/centered_abs_mean": 0.03771033436059952, "signal/frontier_coverage_5/group_bin_occupancy": 0.8166666666666667, "signal/frontier_coverage_5/group_std_mean": 0.052084099501371384, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037710336968302728, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037710336968302728, "signal/frontier_ece_reward/centered_abs_mean": 0.0008136974531225861, "signal/frontier_ece_reward/group_bin_occupancy": 0.4142361111111111, "signal/frontier_ece_reward/group_std_mean": 0.0020070787984877827, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 8.136974647641182e-05, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 8.136974647641182e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09818044751882553, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3, "signal/frontier_entropy_batch_reward/group_std_mean": 0.19889387488365173, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1805555522441864, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.009818044863641262, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.009818044863641262, "step": 140 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.0669231489987575, "calibration/batch_entropy_100bins": 0.4233465784923978, "calibration/batch_entropy_10bins": 0.0669231489987575, "calibration/batch_entropy_50bins": 0.3287001157455989, "calibration/batch_uniqueness": 0.5034230651863096, "calibration/buffer_distribution_entropy": 0.7353438977404082, "calibration/buffer_entropy_100bins": 0.787261287203169, "calibration/buffer_entropy_10bins": 0.7353438977404082, "calibration/buffer_entropy_50bins": 0.7757620948845135, "calibration/confidence_entropy": 0.0983193265772078, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.02480179368577078, "calibration/mean_confidence": 0.024801793685770783, "calibration/prompt_uniqueness": 0.4471254579647936, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009548611111111161, "completions/max_length": 2415.0, "completions/max_terminated_length": 2415.0, "completions/mean_length": 661.5493041992188, "completions/mean_terminated_length": 662.1683471679687, "completions/min_length": 0.0, "completions/min_terminated_length": 162.4, "epoch": 0.34799565005437433, "grad_norm": 5.204364060773514e-05, "learning_rate": 1.8975903614457832e-06, "loss": -0.0013, "num_tokens": 308993264.0, "reward": 1.2135377645492553, "reward_std": 0.01914830394089222, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9976012587547303, "rewards/confidence_uniqueness_reward": 0.46597900390625, "rewards/format_reward": 0.9990451335906982, "rewards/frontier_aurc_reward": -0.003718587104231119, "rewards/frontier_coverage_0": 0.9543892621994019, "rewards/frontier_coverage_1": 0.9543892621994019, "rewards/frontier_coverage_10": 0.9543892621994019, "rewards/frontier_coverage_15": 0.9543892621994019, "rewards/frontier_coverage_20": 0.9543892621994019, "rewards/frontier_coverage_25": 0.892327880859375, "rewards/frontier_coverage_5": 0.9543892621994019, "rewards/frontier_ece_reward": 0.0004319649888202548, "rewards/frontier_entropy_batch_reward": -0.942059063911438, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.011962970346212387, "signal/advantage_pre_scale_abs_mean": 0.011962970346212387, "signal/advantage_pre_scale_std": 0.036182846128940585, "signal/advantage_std": 0.036182846128940585, "signal/brier_reward/centered_abs_mean": 0.003550727292895317, "signal/brier_reward/group_bin_occupancy": 0.6284722222222222, "signal/brier_reward/group_std_mean": 0.007100052293390035, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00035507273860275746, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.00035507273860275746, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.20121129155158995, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8690972222222222, "signal/confidence_uniqueness_reward/group_std_mean": 0.23962823748588563, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020121129229664803, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020121129229664803, "signal/format_reward/centered_abs_mean": 0.0017415364738553763, "signal/format_reward/group_bin_occupancy": 0.12743055555555557, "signal/format_reward/group_std_mean": 0.003971005976200104, "signal/format_reward/group_zero_std_frac": 0.9805555462837219, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008707682369276881, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008707682369276881, "signal/frontier_aurc_reward/centered_abs_mean": 3.7338528636610134e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7989583333333334, "signal/frontier_aurc_reward/group_std_mean": 5.7591882068663836e-05, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.6673160909449506e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.6673160909449506e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.041554590314626695, "signal/frontier_coverage_0/group_bin_occupancy": 0.8256944444444445, "signal/frontier_coverage_0/group_std_mean": 0.05725188627839088, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00415545916184783, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00415545916184783, "signal/frontier_coverage_1/centered_abs_mean": 0.041554590314626695, "signal/frontier_coverage_1/group_bin_occupancy": 0.8256944444444445, "signal/frontier_coverage_1/group_std_mean": 0.05725188627839088, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00415545916184783, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00415545916184783, "signal/frontier_coverage_10/centered_abs_mean": 0.041554590314626695, "signal/frontier_coverage_10/group_bin_occupancy": 0.8256944444444445, "signal/frontier_coverage_10/group_std_mean": 0.05725188627839088, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00415545916184783, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00415545916184783, "signal/frontier_coverage_15/centered_abs_mean": 0.041554590314626695, "signal/frontier_coverage_15/group_bin_occupancy": 0.8256944444444445, "signal/frontier_coverage_15/group_std_mean": 0.05725188627839088, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00415545916184783, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00415545916184783, "signal/frontier_coverage_20/centered_abs_mean": 0.041554590314626695, "signal/frontier_coverage_20/group_bin_occupancy": 0.8256944444444445, "signal/frontier_coverage_20/group_std_mean": 0.05725188627839088, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00415545916184783, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00415545916184783, "signal/frontier_coverage_25/centered_abs_mean": 0.04010423347353935, "signal/frontier_coverage_25/group_bin_occupancy": 0.8260416666666666, "signal/frontier_coverage_25/group_std_mean": 0.055193550139665606, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00401042359881103, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00401042359881103, "signal/frontier_coverage_5/centered_abs_mean": 0.041554590314626695, "signal/frontier_coverage_5/group_bin_occupancy": 0.8256944444444445, "signal/frontier_coverage_5/group_std_mean": 0.05725188627839088, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00415545916184783, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00415545916184783, "signal/frontier_ece_reward/centered_abs_mean": 0.0009842410683631897, "signal/frontier_ece_reward/group_bin_occupancy": 0.4177083333333334, "signal/frontier_ece_reward/group_std_mean": 0.0023657533805817367, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 9.842410509008914e-05, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 9.842410509008914e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10397121906280518, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.31354166666666666, "signal/frontier_entropy_batch_reward/group_std_mean": 0.20375558137893676, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.205555559694767, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.010397122614085675, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.010397122614085675, "step": 145 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.057605848650572555, "calibration/batch_entropy_100bins": 0.39059723252365697, "calibration/batch_entropy_10bins": 0.057605848650572555, "calibration/batch_entropy_50bins": 0.29644429432289326, "calibration/batch_uniqueness": 0.4236975737013129, "calibration/buffer_distribution_entropy": 0.7161117607960759, "calibration/buffer_entropy_100bins": 0.7796128759731576, "calibration/buffer_entropy_10bins": 0.7161117607960759, "calibration/buffer_entropy_50bins": 0.762393356954156, "calibration/confidence_entropy": 0.08499628039622698, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.021287106376922514, "calibration/mean_confidence": 0.021287106376922514, "calibration/prompt_uniqueness": 0.3726409393426986, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0006944444444444642, "completions/max_length": 2865.8, "completions/max_terminated_length": 2865.8, "completions/mean_length": 712.83681640625, "completions/mean_terminated_length": 713.329345703125, "completions/min_length": 31.6, "completions/min_terminated_length": 173.0, "epoch": 0.3599955000562493, "grad_norm": 5.70491720282007e-05, "learning_rate": 1.7469879518072292e-06, "loss": -0.0008, "num_tokens": 320315480.0, "reward": 1.1840367317199707, "reward_std": 0.01757926493883133, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9977462530136109, "rewards/confidence_uniqueness_reward": 0.4628437697887421, "rewards/format_reward": 0.9993055582046508, "rewards/frontier_aurc_reward": -0.0038903028704226016, "rewards/frontier_coverage_0": 0.9538887143135071, "rewards/frontier_coverage_1": 0.9538887143135071, "rewards/frontier_coverage_10": 0.9538887143135071, "rewards/frontier_coverage_15": 0.9538887143135071, "rewards/frontier_coverage_20": 0.8945078253746033, "rewards/frontier_coverage_25": 0.6542877435684205, "rewards/frontier_coverage_5": 0.9538887143135071, "rewards/frontier_ece_reward": 0.0004184367600828409, "rewards/frontier_entropy_batch_reward": -0.9349219322204589, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.01071018297225237, "signal/advantage_pre_scale_abs_mean": 0.01071018297225237, "signal/advantage_pre_scale_std": 0.031720586493611334, "signal/advantage_std": 0.031720586493611334, "signal/brier_reward/centered_abs_mean": 0.0033094821963459255, "signal/brier_reward/group_bin_occupancy": 0.6034722222222222, "signal/brier_reward/group_std_mean": 0.006449029874056577, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0003309482126496732, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0003309482126496732, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.21715166866779329, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8694444444444445, "signal/confidence_uniqueness_reward/group_std_mean": 0.2559134304523468, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02171516865491867, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02171516865491867, "signal/format_reward/centered_abs_mean": 0.0013020833255723118, "signal/format_reward/group_bin_occupancy": 0.1267361111111111, "signal/format_reward/group_std_mean": 0.00297891478985548, "signal/format_reward/group_zero_std_frac": 0.9861111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006510416627861559, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006510416627861559, "signal/frontier_aurc_reward/centered_abs_mean": 4.027687973575667e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7857638888888889, "signal/frontier_aurc_reward/group_std_mean": 6.0778418992413207e-05, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.034610012444318e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.034610012444318e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.043310850858688354, "signal/frontier_coverage_0/group_bin_occupancy": 0.8069444444444445, "signal/frontier_coverage_0/group_std_mean": 0.059559579193592074, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004331085272133351, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004331085272133351, "signal/frontier_coverage_1/centered_abs_mean": 0.043310850858688354, "signal/frontier_coverage_1/group_bin_occupancy": 0.8069444444444445, "signal/frontier_coverage_1/group_std_mean": 0.059559579193592074, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004331085272133351, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004331085272133351, "signal/frontier_coverage_10/centered_abs_mean": 0.043310850858688354, "signal/frontier_coverage_10/group_bin_occupancy": 0.8069444444444445, "signal/frontier_coverage_10/group_std_mean": 0.059559579193592074, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004331085272133351, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004331085272133351, "signal/frontier_coverage_15/centered_abs_mean": 0.043310850858688354, "signal/frontier_coverage_15/group_bin_occupancy": 0.8069444444444445, "signal/frontier_coverage_15/group_std_mean": 0.059559579193592074, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004331085272133351, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004331085272133351, "signal/frontier_coverage_20/centered_abs_mean": 0.04183773845434189, "signal/frontier_coverage_20/group_bin_occupancy": 0.8069444444444445, "signal/frontier_coverage_20/group_std_mean": 0.057459451258182526, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004183773742988705, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004183773742988705, "signal/frontier_coverage_25/centered_abs_mean": 0.03539830669760704, "signal/frontier_coverage_25/group_bin_occupancy": 0.8079861111111111, "signal/frontier_coverage_25/group_std_mean": 0.048354345560073855, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035398307256400583, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035398307256400583, "signal/frontier_coverage_5/centered_abs_mean": 0.043310850858688354, "signal/frontier_coverage_5/group_bin_occupancy": 0.8069444444444445, "signal/frontier_coverage_5/group_std_mean": 0.059559579193592074, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004331085272133351, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004331085272133351, "signal/frontier_ece_reward/centered_abs_mean": 0.0009813750861212612, "signal/frontier_ece_reward/group_bin_occupancy": 0.4104166666666667, "signal/frontier_ece_reward/group_std_mean": 0.0022871824447065594, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 9.813751239562407e-05, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 9.813751239562407e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.11633445173501969, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3201388888888889, "signal/frontier_entropy_batch_reward/group_std_mean": 0.22344749867916108, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.138888893276453, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011633445136249065, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011633445136249065, "step": 150 }, { "epoch": 0.3599955000562493, "eval_calibration/aurc": 1.0, "eval_calibration/batch_distribution_entropy": 0.07963192987356056, "eval_calibration/batch_entropy_100bins": 0.38521584880230836, "eval_calibration/batch_entropy_10bins": 0.07963192987356056, "eval_calibration/batch_entropy_50bins": 0.3103724394449236, "eval_calibration/batch_uniqueness": 0.4830729166666667, "eval_calibration/buffer_distribution_entropy": 0.7010500110603589, "eval_calibration/buffer_entropy_100bins": 0.7731452309964988, "eval_calibration/buffer_entropy_10bins": 0.7010500110603589, "eval_calibration/buffer_entropy_50bins": 0.7519987833032866, "eval_calibration/confidence_entropy": 0.10205378013778547, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.0, "eval_calibration/coverage@20%": 0.0, "eval_calibration/coverage@25%": 0.0, "eval_calibration/coverage@30%": 0.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/ece": 0.02598170129704982, "eval_calibration/mean_confidence": 0.02598170129704982, "eval_calibration/prompt_uniqueness": 0.4830729166666667, "eval_completions/clipped_ratio": 0.0017361111111111234, "eval_completions/max_length": 1706.8333333333333, "eval_completions/max_terminated_length": 1706.8333333333333, "eval_completions/mean_length": 674.4022725423177, "eval_completions/mean_terminated_length": 675.5978190104166, "eval_completions/min_length": 171.66666666666666, "eval_completions/min_terminated_length": 246.16666666666666, "eval_loss": 0.0, "eval_num_tokens": 320315480.0, "eval_reward": 1.152519702911377, "eval_reward_std": 0.036670129746198654, "eval_rewards/accuracy_reward": 0.0, "eval_rewards/brier_reward": 0.9963855147361755, "eval_rewards/confidence_uniqueness_reward": 0.46905451516310376, "eval_rewards/format_reward": 0.9982638955116272, "eval_rewards/frontier_aurc_reward": -0.004004960569242637, "eval_rewards/frontier_coverage_0": 0.9477129876613617, "eval_rewards/frontier_coverage_1": 0.9477129876613617, "eval_rewards/frontier_coverage_10": 0.9477129876613617, "eval_rewards/frontier_coverage_15": 0.9477129876613617, "eval_rewards/frontier_coverage_20": 0.8091484904289246, "eval_rewards/frontier_coverage_25": 0.5189993580182394, "eval_rewards/frontier_coverage_5": 0.9477129876613617, "eval_rewards/frontier_ece_reward": 0.0004888492646083856, "eval_rewards/frontier_entropy_batch_reward": -0.9982638955116272, "eval_runtime": 126.377, "eval_samples_per_second": 7.913, "eval_signal/accuracy_reward/centered_abs_mean": 0.0, "eval_signal/accuracy_reward/group_bin_occupancy": 0.125, "eval_signal/accuracy_reward/group_std_mean": 0.0, "eval_signal/accuracy_reward/group_zero_std_frac": 1.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "eval_signal/advantage_abs_mean": 0.021798545494675636, "eval_signal/advantage_pre_scale_abs_mean": 0.021798545494675636, "eval_signal/advantage_pre_scale_std": 0.04753624647855759, "eval_signal/advantage_std": 0.04753624647855759, "eval_signal/brier_reward/centered_abs_mean": 0.005646458788154026, "eval_signal/brier_reward/group_bin_occupancy": 0.5868055555555556, "eval_signal/brier_reward/group_std_mean": 0.013697403056236604, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0005646459176205099, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.0005646459176205099, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.21048006663719812, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7777777777777778, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.24474786967039108, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02104800660163164, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02104800660163164, "eval_signal/format_reward/centered_abs_mean": 0.0033637151742974916, "eval_signal/format_reward/group_bin_occupancy": 0.13194444444444445, "eval_signal/format_reward/group_std_mean": 0.009820927555362383, "eval_signal/format_reward/group_zero_std_frac": 0.944444457689921, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0016818575871487458, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0016818575871487458, "eval_signal/frontier_aurc_reward/centered_abs_mean": 5.219127585102493e-05, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.7465277777777777, "eval_signal/frontier_aurc_reward/group_std_mean": 9.327459156338591e-05, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.523909803490824e-07, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.523909803490824e-07, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.0491910595446825, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.8020833333333335, "eval_signal/frontier_coverage_0/group_std_mean": 0.06921165560682614, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004919106063122551, "eval_signal/frontier_coverage_0/weight": 0.10000000149011612, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004919106063122551, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.0491910595446825, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.8020833333333335, "eval_signal/frontier_coverage_1/group_std_mean": 0.06921165560682614, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004919106063122551, "eval_signal/frontier_coverage_1/weight": 0.10000000149011612, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004919106063122551, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.0491910595446825, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.8020833333333335, "eval_signal/frontier_coverage_10/group_std_mean": 0.06921165560682614, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004919106063122551, "eval_signal/frontier_coverage_10/weight": 0.10000000149011612, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004919106063122551, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.0491910595446825, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.8020833333333335, "eval_signal/frontier_coverage_15/group_std_mean": 0.06921165560682614, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004919106063122551, "eval_signal/frontier_coverage_15/weight": 0.10000000149011612, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004919106063122551, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.04529993608593941, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.7986111111111112, "eval_signal/frontier_coverage_20/group_std_mean": 0.06353887729346752, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004529993748292327, "eval_signal/frontier_coverage_20/weight": 0.10000000149011612, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004529993748292327, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.03559759445488453, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.8055555555555557, "eval_signal/frontier_coverage_25/group_std_mean": 0.049026252080996834, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035597594687715173, "eval_signal/frontier_coverage_25/weight": 0.10000000149011612, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035597594687715173, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.0491910595446825, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.8020833333333335, "eval_signal/frontier_coverage_5/group_std_mean": 0.06921165560682614, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004919106063122551, "eval_signal/frontier_coverage_5/weight": 0.10000000149011612, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004919106063122551, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.0011601041769608855, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.4166666666666666, "eval_signal/frontier_ece_reward/group_std_mean": 0.002786213590297848, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00011601041721102472, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00011601041721102472, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0033637151742974916, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.13194444444444445, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.009820927555362383, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.944444457689921, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0003363715174297492, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0003363715174297492, "eval_steps_per_second": 0.047, "step": 150 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.07433150140530483, "calibration/batch_entropy_100bins": 0.40955894700259393, "calibration/batch_entropy_10bins": 0.07433150140530483, "calibration/batch_entropy_50bins": 0.31573255203630846, "calibration/batch_uniqueness": 0.4638189879413958, "calibration/buffer_distribution_entropy": 0.6885061820464987, "calibration/buffer_entropy_100bins": 0.7678003824579658, "calibration/buffer_entropy_10bins": 0.6885061820464987, "calibration/buffer_entropy_50bins": 0.7438445488804817, "calibration/confidence_entropy": 0.09220933377109393, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.02361125712010372, "calibration/mean_confidence": 0.023611257120103722, "calibration/prompt_uniqueness": 0.41207164032691634, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0006944444444444642, "completions/max_length": 2119.8, "completions/max_terminated_length": 2119.8, "completions/mean_length": 668.18134765625, "completions/mean_terminated_length": 668.652392578125, "completions/min_length": 80.2, "completions/min_terminated_length": 179.8, "epoch": 0.3719953500581243, "grad_norm": 0.00011576049291761592, "learning_rate": 1.5963855421686747e-06, "loss": -0.0008, "num_tokens": 331120641.0, "reward": 1.145693302154541, "reward_std": 0.01810295507311821, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9974946975708008, "rewards/confidence_uniqueness_reward": 0.47134496569633483, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.004138502664864064, "rewards/frontier_coverage_0": 0.9522245168685913, "rewards/frontier_coverage_1": 0.9522245168685913, "rewards/frontier_coverage_10": 0.9522245168685913, "rewards/frontier_coverage_15": 0.9380970597267151, "rewards/frontier_coverage_20": 0.7361512064933777, "rewards/frontier_coverage_25": 0.44430738091468813, "rewards/frontier_coverage_5": 0.9522245168685913, "rewards/frontier_ece_reward": 0.0003640947339590639, "rewards/frontier_entropy_batch_reward": -0.9353014349937439, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.010529661551117897, "signal/advantage_pre_scale_abs_mean": 0.010529661551117897, "signal/advantage_pre_scale_std": 0.03242862112820148, "signal/advantage_std": 0.03242862112820148, "signal/brier_reward/centered_abs_mean": 0.003718013083562255, "signal/brier_reward/group_bin_occupancy": 0.6059027777777778, "signal/brier_reward/group_std_mean": 0.007659111265093088, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0003718013147590682, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0003718013147590682, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2201870173215866, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8548611111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.2593624711036682, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02201870158314705, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02201870158314705, "signal/format_reward/centered_abs_mean": 0.001491970452480018, "signal/format_reward/group_bin_occupancy": 0.12743055555555555, "signal/format_reward/group_std_mean": 0.0038215355947613717, "signal/format_reward/group_zero_std_frac": 0.9805555462837219, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000745985226240009, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000745985226240009, "signal/frontier_aurc_reward/centered_abs_mean": 4.6402324369410056e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7687499999999999, "signal/frontier_aurc_reward/group_std_mean": 7.237604731926694e-05, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.800290864499402e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.800290864499402e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.045414050668478013, "signal/frontier_coverage_0/group_bin_occupancy": 0.8079861111111111, "signal/frontier_coverage_0/group_std_mean": 0.06280554011464119, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004541405383497477, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004541405383497477, "signal/frontier_coverage_1/centered_abs_mean": 0.045414050668478013, "signal/frontier_coverage_1/group_bin_occupancy": 0.8079861111111111, "signal/frontier_coverage_1/group_std_mean": 0.06280554011464119, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004541405383497477, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004541405383497477, "signal/frontier_coverage_10/centered_abs_mean": 0.045414050668478013, "signal/frontier_coverage_10/group_bin_occupancy": 0.8079861111111111, "signal/frontier_coverage_10/group_std_mean": 0.06280554011464119, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004541405383497477, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004541405383497477, "signal/frontier_coverage_15/centered_abs_mean": 0.04506048187613487, "signal/frontier_coverage_15/group_bin_occupancy": 0.8083333333333332, "signal/frontier_coverage_15/group_std_mean": 0.062302114069461824, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004506048187613488, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004506048187613488, "signal/frontier_coverage_20/centered_abs_mean": 0.03970897793769836, "signal/frontier_coverage_20/group_bin_occupancy": 0.8104166666666668, "signal/frontier_coverage_20/group_std_mean": 0.05463676452636719, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003970897663384676, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003970897663384676, "signal/frontier_coverage_25/centered_abs_mean": 0.030375007912516593, "signal/frontier_coverage_25/group_bin_occupancy": 0.8152777777777779, "signal/frontier_coverage_25/group_std_mean": 0.04137557670474053, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003037500847131014, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003037500847131014, "signal/frontier_coverage_5/centered_abs_mean": 0.045414050668478013, "signal/frontier_coverage_5/group_bin_occupancy": 0.8079861111111111, "signal/frontier_coverage_5/group_std_mean": 0.06280554011464119, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004541405383497477, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004541405383497477, "signal/frontier_ece_reward/centered_abs_mean": 0.0009263153071515263, "signal/frontier_ece_reward/group_bin_occupancy": 0.44548611111111114, "signal/frontier_ece_reward/group_std_mean": 0.0021566389128565787, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 9.263153333449736e-05, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 9.263153333449736e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.11519579887390137, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.33472222222222225, "signal/frontier_entropy_batch_reward/group_std_mean": 0.21977143883705139, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.14166666865348815, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011519579775631427, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011519579775631427, "step": 155 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.09049986613417273, "calibration/batch_entropy_100bins": 0.41836594311176223, "calibration/batch_entropy_10bins": 0.09049986613417273, "calibration/batch_entropy_50bins": 0.3264790681345277, "calibration/batch_uniqueness": 0.47830015613269194, "calibration/buffer_distribution_entropy": 0.6578983569779517, "calibration/buffer_entropy_100bins": 0.7546975009480232, "calibration/buffer_entropy_10bins": 0.6578983569779517, "calibration/buffer_entropy_50bins": 0.7244401883194689, "calibration/confidence_entropy": 0.09609417117910231, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.024954540242279003, "calibration/mean_confidence": 0.024954540242279007, "calibration/prompt_uniqueness": 0.4211417824074074, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0007812500000000222, "completions/max_length": 2455.2, "completions/max_terminated_length": 2455.2, "completions/mean_length": 671.3717895507813, "completions/mean_terminated_length": 671.8844848632813, "completions/min_length": 29.4, "completions/min_terminated_length": 172.6, "epoch": 0.38399520005999926, "grad_norm": 5.154682366992347e-05, "learning_rate": 1.4457831325301204e-06, "loss": -0.0009, "num_tokens": 341942140.0, "reward": 1.1252743005752563, "reward_std": 0.01854655463248491, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9972919225692749, "rewards/confidence_uniqueness_reward": 0.47839406728744505, "rewards/format_reward": 0.9991319298744201, "rewards/frontier_aurc_reward": -0.004414523858577013, "rewards/frontier_coverage_0": 0.9507062673568726, "rewards/frontier_coverage_1": 0.9507062673568726, "rewards/frontier_coverage_10": 0.9507062673568726, "rewards/frontier_coverage_15": 0.9317117929458618, "rewards/frontier_coverage_20": 0.6626289486885071, "rewards/frontier_coverage_25": 0.3123237192630768, "rewards/frontier_coverage_5": 0.9507062673568726, "rewards/frontier_ece_reward": 0.0003186647722031921, "rewards/frontier_entropy_batch_reward": -0.9278595924377442, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.010827501863241195, "signal/advantage_pre_scale_abs_mean": 0.010827501863241195, "signal/advantage_pre_scale_std": 0.03373810928314924, "signal/advantage_std": 0.03373810928314924, "signal/brier_reward/centered_abs_mean": 0.004014838207513094, "signal/brier_reward/group_bin_occupancy": 0.5802083333333334, "signal/brier_reward/group_std_mean": 0.0078026833012700084, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00040148382540792225, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.00040148382540792225, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.21236040890216829, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8649305555555558, "signal/confidence_uniqueness_reward/group_std_mean": 0.25193352103233335, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021236040443181992, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.021236040443181992, "signal/format_reward/centered_abs_mean": 0.0016059027751907707, "signal/format_reward/group_bin_occupancy": 0.12708333333333333, "signal/format_reward/group_std_mean": 0.0035807004664093257, "signal/format_reward/group_zero_std_frac": 0.9833333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008029513875953854, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008029513875953854, "signal/frontier_aurc_reward/centered_abs_mean": 5.2095612045377496e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7486111111111111, "signal/frontier_aurc_reward/group_std_mean": 8.059373503783717e-05, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.511951596621657e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.511951596621657e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.046503888070583345, "signal/frontier_coverage_0/group_bin_occupancy": 0.795138888888889, "signal/frontier_coverage_0/group_std_mean": 0.06489058881998062, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004650388844311237, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004650388844311237, "signal/frontier_coverage_1/centered_abs_mean": 0.046503888070583345, "signal/frontier_coverage_1/group_bin_occupancy": 0.795138888888889, "signal/frontier_coverage_1/group_std_mean": 0.06489058881998062, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004650388844311237, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004650388844311237, "signal/frontier_coverage_10/centered_abs_mean": 0.046503888070583345, "signal/frontier_coverage_10/group_bin_occupancy": 0.795138888888889, "signal/frontier_coverage_10/group_std_mean": 0.06489058881998062, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004650388844311237, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004650388844311237, "signal/frontier_coverage_15/centered_abs_mean": 0.04594070836901665, "signal/frontier_coverage_15/group_bin_occupancy": 0.795138888888889, "signal/frontier_coverage_15/group_std_mean": 0.06408572494983673, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004594070836901665, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004594070836901665, "signal/frontier_coverage_20/centered_abs_mean": 0.03836109861731529, "signal/frontier_coverage_20/group_bin_occupancy": 0.7961805555555557, "signal/frontier_coverage_20/group_std_mean": 0.05321277379989624, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038361097220331432, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038361097220331432, "signal/frontier_coverage_25/centered_abs_mean": 0.025570582970976828, "signal/frontier_coverage_25/group_bin_occupancy": 0.8079861111111111, "signal/frontier_coverage_25/group_std_mean": 0.03507376089692116, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002557058446109295, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002557058446109295, "signal/frontier_coverage_5/centered_abs_mean": 0.046503888070583345, "signal/frontier_coverage_5/group_bin_occupancy": 0.795138888888889, "signal/frontier_coverage_5/group_std_mean": 0.06489058881998062, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004650388844311237, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004650388844311237, "signal/frontier_ece_reward/centered_abs_mean": 0.0008886751136742532, "signal/frontier_ece_reward/group_bin_occupancy": 0.44722222222222224, "signal/frontier_ece_reward/group_std_mean": 0.002127536362968385, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 8.886751311365515e-05, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 8.886751311365515e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.12815287709236145, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3399305555555555, "signal/frontier_entropy_batch_reward/group_std_mean": 0.23999074399471282, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.11388888880610466, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01281528789550066, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01281528789550066, "step": 160 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.11315508988168552, "calibration/batch_entropy_100bins": 0.4314929978888915, "calibration/batch_entropy_10bins": 0.11315508988168552, "calibration/batch_entropy_50bins": 0.3423038694920577, "calibration/batch_uniqueness": 0.48717437446107875, "calibration/buffer_distribution_entropy": 0.6237706258889, "calibration/buffer_entropy_100bins": 0.7391337749032101, "calibration/buffer_entropy_10bins": 0.6237706258889, "calibration/buffer_entropy_50bins": 0.7026557564520889, "calibration/confidence_entropy": 0.10202881321508014, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.02723046109240631, "calibration/mean_confidence": 0.027230461092406316, "calibration/prompt_uniqueness": 0.4402623542100243, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0003472222222222321, "completions/max_length": 2025.0, "completions/max_terminated_length": 2025.0, "completions/mean_length": 663.9360229492188, "completions/mean_terminated_length": 664.1629760742187, "completions/min_length": 66.6, "completions/min_terminated_length": 171.6, "epoch": 0.39599505006187424, "grad_norm": 7.190422184066847e-05, "learning_rate": 1.2951807228915664e-06, "loss": -0.0004, "num_tokens": 352729755.0, "reward": 1.1091915130615235, "reward_std": 0.017433946020901203, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9974352121353149, "rewards/confidence_uniqueness_reward": 0.5001117169857026, "rewards/format_reward": 0.9995659708976745, "rewards/frontier_aurc_reward": -0.004717729333788156, "rewards/frontier_coverage_0": 0.9473315596580505, "rewards/frontier_coverage_1": 0.9473315596580505, "rewards/frontier_coverage_10": 0.9473315596580505, "rewards/frontier_coverage_15": 0.9283288359642029, "rewards/frontier_coverage_20": 0.5928752660751343, "rewards/frontier_coverage_25": 0.20689672827720643, "rewards/frontier_coverage_5": 0.9473315596580505, "rewards/frontier_ece_reward": 0.00027219949988648294, "rewards/frontier_entropy_batch_reward": -0.9205714225769043, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.010360554978251457, "signal/advantage_pre_scale_abs_mean": 0.010360554978251457, "signal/advantage_pre_scale_std": 0.0259440615773201, "signal/advantage_std": 0.0259440615773201, "signal/brier_reward/centered_abs_mean": 0.0036302336025983094, "signal/brier_reward/group_bin_occupancy": 0.5999999999999999, "signal/brier_reward/group_std_mean": 0.0071899168193340305, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00036302337539382277, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.00036302337539382277, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.21097120344638826, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8465277777777777, "signal/confidence_uniqueness_reward/group_std_mean": 0.24810958802700042, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021097120270133017, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.021097120270133017, "signal/format_reward/centered_abs_mean": 0.0008409288013353944, "signal/format_reward/group_bin_occupancy": 0.1267361111111111, "signal/format_reward/group_std_mean": 0.0024552317336201668, "signal/format_reward/group_zero_std_frac": 0.9861111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004204644006676972, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004204644006676972, "signal/frontier_aurc_reward/centered_abs_mean": 5.836021373397671e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7534722222222222, "signal/frontier_aurc_reward/group_std_mean": 8.929047762649134e-05, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.295026534848148e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.295026534848148e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.0495891772210598, "signal/frontier_coverage_0/group_bin_occupancy": 0.8024305555555555, "signal/frontier_coverage_0/group_std_mean": 0.06838846206665039, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00495891785249114, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00495891785249114, "signal/frontier_coverage_1/centered_abs_mean": 0.0495891772210598, "signal/frontier_coverage_1/group_bin_occupancy": 0.8024305555555555, "signal/frontier_coverage_1/group_std_mean": 0.06838846206665039, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00495891785249114, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00495891785249114, "signal/frontier_coverage_10/centered_abs_mean": 0.0495891772210598, "signal/frontier_coverage_10/group_bin_occupancy": 0.8024305555555555, "signal/frontier_coverage_10/group_std_mean": 0.06838846206665039, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00495891785249114, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00495891785249114, "signal/frontier_coverage_15/centered_abs_mean": 0.04905526265501976, "signal/frontier_coverage_15/group_bin_occupancy": 0.8024305555555555, "signal/frontier_coverage_15/group_std_mean": 0.06764949411153794, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00490552643314004, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00490552643314004, "signal/frontier_coverage_20/centered_abs_mean": 0.03878045603632927, "signal/frontier_coverage_20/group_bin_occupancy": 0.8069444444444445, "signal/frontier_coverage_20/group_std_mean": 0.05315817221999168, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038780457805842163, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038780457805842163, "signal/frontier_coverage_25/centered_abs_mean": 0.021978146955370904, "signal/frontier_coverage_25/group_bin_occupancy": 0.820138888888889, "signal/frontier_coverage_25/group_std_mean": 0.029635490104556083, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002197814825922251, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002197814825922251, "signal/frontier_coverage_5/centered_abs_mean": 0.0495891772210598, "signal/frontier_coverage_5/group_bin_occupancy": 0.8024305555555555, "signal/frontier_coverage_5/group_std_mean": 0.06838846206665039, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00495891785249114, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00495891785249114, "signal/frontier_ece_reward/centered_abs_mean": 0.0008334407932125032, "signal/frontier_ece_reward/group_bin_occupancy": 0.4770833333333334, "signal/frontier_ece_reward/group_std_mean": 0.0019264052622020244, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 8.334408194059506e-05, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 8.334408194059506e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1398734837770462, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3708333333333333, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2520256280899048, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06944444626569748, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013987349346280098, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013987349346280098, "step": 165 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.12664223510339462, "calibration/batch_entropy_100bins": 0.4533190068324675, "calibration/batch_entropy_10bins": 0.12664223510339462, "calibration/batch_entropy_50bins": 0.36405689826353826, "calibration/batch_uniqueness": 0.5366184798988781, "calibration/buffer_distribution_entropy": 0.5884116370549787, "calibration/buffer_entropy_100bins": 0.7212360190144044, "calibration/buffer_entropy_10bins": 0.5884116370549787, "calibration/buffer_entropy_50bins": 0.6790585356261435, "calibration/confidence_entropy": 0.1112857625829545, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.03005219126552771, "calibration/mean_confidence": 0.03005219126552771, "calibration/prompt_uniqueness": 0.48039785879629626, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0006944444444444642, "completions/max_length": 2424.8, "completions/max_terminated_length": 2424.8, "completions/mean_length": 664.7514892578125, "completions/mean_terminated_length": 665.209765625, "completions/min_length": 58.2, "completions/min_terminated_length": 159.8, "epoch": 0.4079949000637492, "grad_norm": 5.9971396694891155e-05, "learning_rate": 1.1445783132530121e-06, "loss": -0.0008, "num_tokens": 363476876.0, "reward": 1.0961509227752686, "reward_std": 0.02001706659793854, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9968130946159363, "rewards/confidence_uniqueness_reward": 0.5170513212680816, "rewards/format_reward": 0.9991319417953491, "rewards/frontier_aurc_reward": -0.005049161147326231, "rewards/frontier_coverage_0": 0.9443602681159973, "rewards/frontier_coverage_1": 0.9443602681159973, "rewards/frontier_coverage_10": 0.9443602681159973, "rewards/frontier_coverage_15": 0.9223458051681519, "rewards/frontier_coverage_20": 0.5228747487068176, "rewards/frontier_coverage_25": 0.149213108420372, "rewards/frontier_coverage_5": 0.9443602681159973, "rewards/frontier_ece_reward": 0.0002292450924869627, "rewards/frontier_entropy_batch_reward": -0.9194879293441772, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.01188025027513504, "signal/advantage_pre_scale_abs_mean": 0.01188025027513504, "signal/advantage_pre_scale_std": 0.033895105868577954, "signal/advantage_std": 0.033895105868577954, "signal/brier_reward/centered_abs_mean": 0.004660056484863162, "signal/brier_reward/group_bin_occupancy": 0.603125, "signal/brier_reward/group_std_mean": 0.009344376530498267, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0004660056554712355, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0004660056554712355, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.20060275495052338, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8604166666666666, "signal/confidence_uniqueness_reward/group_std_mean": 0.2357197880744934, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020060275867581367, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020060275867581367, "signal/format_reward/centered_abs_mean": 0.0016601562150754035, "signal/format_reward/group_bin_occupancy": 0.1277777777777778, "signal/format_reward/group_std_mean": 0.004312581941485405, "signal/format_reward/group_zero_std_frac": 0.9777777671813965, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008300781075377018, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008300781075377018, "signal/frontier_aurc_reward/centered_abs_mean": 7.052028959151358e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7475694444444444, "signal/frontier_aurc_reward/group_std_mean": 0.00011104991572210565, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.815036608211813e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.815036608211813e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.05183984935283661, "signal/frontier_coverage_0/group_bin_occupancy": 0.8131944444444444, "signal/frontier_coverage_0/group_std_mean": 0.0717699870467186, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0051839851774275305, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0051839851774275305, "signal/frontier_coverage_1/centered_abs_mean": 0.05183984935283661, "signal/frontier_coverage_1/group_bin_occupancy": 0.8131944444444444, "signal/frontier_coverage_1/group_std_mean": 0.0717699870467186, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0051839851774275305, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0051839851774275305, "signal/frontier_coverage_10/centered_abs_mean": 0.05183984935283661, "signal/frontier_coverage_10/group_bin_occupancy": 0.8131944444444444, "signal/frontier_coverage_10/group_std_mean": 0.0717699870467186, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0051839851774275305, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0051839851774275305, "signal/frontier_coverage_15/centered_abs_mean": 0.05118511840701103, "signal/frontier_coverage_15/group_bin_occupancy": 0.8131944444444444, "signal/frontier_coverage_15/group_std_mean": 0.07084002643823624, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0051185118034482, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0051185118034482, "signal/frontier_coverage_20/centered_abs_mean": 0.03787098824977875, "signal/frontier_coverage_20/group_bin_occupancy": 0.8190972222222221, "signal/frontier_coverage_20/group_std_mean": 0.05193596109747887, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037870988249778747, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037870988249778747, "signal/frontier_coverage_25/centered_abs_mean": 0.019114065170288085, "signal/frontier_coverage_25/group_bin_occupancy": 0.8402777777777777, "signal/frontier_coverage_25/group_std_mean": 0.025573540851473807, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019114065449684857, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019114065449684857, "signal/frontier_coverage_5/centered_abs_mean": 0.05183984935283661, "signal/frontier_coverage_5/group_bin_occupancy": 0.8131944444444444, "signal/frontier_coverage_5/group_std_mean": 0.0717699870467186, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0051839851774275305, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0051839851774275305, "signal/frontier_ece_reward/centered_abs_mean": 0.0008435256313532591, "signal/frontier_ece_reward/group_bin_occupancy": 0.5055555555555555, "signal/frontier_ece_reward/group_std_mean": 0.001923717954196036, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 8.435256313532591e-05, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 8.435256313532591e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.14058307111263274, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.37777777777777777, "signal/frontier_entropy_batch_reward/group_std_mean": 0.249654358625412, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.07500000223517418, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014058307558298112, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014058307558298112, "step": 170 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.11065710270652036, "calibration/batch_entropy_100bins": 0.4242565693032717, "calibration/batch_entropy_10bins": 0.11065710270652036, "calibration/batch_entropy_50bins": 0.3340393237755341, "calibration/batch_uniqueness": 0.4780490451388889, "calibration/buffer_distribution_entropy": 0.5503343485727277, "calibration/buffer_entropy_100bins": 0.699339396111367, "calibration/buffer_entropy_10bins": 0.5503343485727277, "calibration/buffer_entropy_50bins": 0.6523164013443059, "calibration/confidence_entropy": 0.09850787980656697, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.02609543780615011, "calibration/mean_confidence": 0.02609543780615011, "calibration/prompt_uniqueness": 0.4217447916666666, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0008680555555555802, "completions/max_length": 2943.6, "completions/max_terminated_length": 2943.6, "completions/mean_length": 690.4206787109375, "completions/mean_terminated_length": 691.0146606445312, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.4199947500656242, "grad_norm": 6.962921179365367e-05, "learning_rate": 9.93975903614458e-07, "loss": -0.0011, "num_tokens": 374538490.0, "reward": 1.0829426765441894, "reward_std": 0.019469749182462692, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9969998836517334, "rewards/confidence_uniqueness_reward": 0.48848451375961305, "rewards/format_reward": 0.9991319417953491, "rewards/frontier_aurc_reward": -0.005386561527848244, "rewards/frontier_coverage_0": 0.9478386521339417, "rewards/frontier_coverage_1": 0.9478386521339417, "rewards/frontier_coverage_10": 0.9452585101127624, "rewards/frontier_coverage_15": 0.9100270628929138, "rewards/frontier_coverage_20": 0.45304937958717345, "rewards/frontier_coverage_25": 0.11471473574638366, "rewards/frontier_coverage_5": 0.9478386521339417, "rewards/frontier_ece_reward": 7.881744513724698e-05, "rewards/frontier_entropy_batch_reward": -0.9176892876625061, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.01158843245357275, "signal/advantage_pre_scale_abs_mean": 0.01158843245357275, "signal/advantage_pre_scale_std": 0.034636962413787845, "signal/advantage_std": 0.034636962413787845, "signal/brier_reward/centered_abs_mean": 0.00444792709313333, "signal/brier_reward/group_bin_occupancy": 0.60625, "signal/brier_reward/group_std_mean": 0.0084507011808455, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00044479272910393777, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.00044479272910393777, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.22068254351615907, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8458333333333332, "signal/confidence_uniqueness_reward/group_std_mean": 0.2589582115411758, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02206825464963913, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02206825464963913, "signal/format_reward/centered_abs_mean": 0.0016276041511446237, "signal/format_reward/group_bin_occupancy": 0.12708333333333333, "signal/format_reward/group_std_mean": 0.003662066673859954, "signal/format_reward/group_zero_std_frac": 0.9833333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008138020755723118, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008138020755723118, "signal/frontier_aurc_reward/centered_abs_mean": 7.466641545761377e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.74375, "signal/frontier_aurc_reward/group_std_mean": 0.00011583235755097121, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.333302159575397e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.333302159575397e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.05025492906570435, "signal/frontier_coverage_0/group_bin_occupancy": 0.8079861111111111, "signal/frontier_coverage_0/group_std_mean": 0.06967095285654068, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005025493167340756, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005025493167340756, "signal/frontier_coverage_1/centered_abs_mean": 0.05025492906570435, "signal/frontier_coverage_1/group_bin_occupancy": 0.8079861111111111, "signal/frontier_coverage_1/group_std_mean": 0.06967095285654068, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005025493167340756, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005025493167340756, "signal/frontier_coverage_10/centered_abs_mean": 0.050183454900979994, "signal/frontier_coverage_10/group_bin_occupancy": 0.8079861111111111, "signal/frontier_coverage_10/group_std_mean": 0.06957284063100815, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005018345545977354, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005018345545977354, "signal/frontier_coverage_15/centered_abs_mean": 0.04914246648550034, "signal/frontier_coverage_15/group_bin_occupancy": 0.8079861111111113, "signal/frontier_coverage_15/group_std_mean": 0.06813211217522622, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004914247151464224, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004914247151464224, "signal/frontier_coverage_20/centered_abs_mean": 0.033781594783067706, "signal/frontier_coverage_20/group_bin_occupancy": 0.814236111111111, "signal/frontier_coverage_20/group_std_mean": 0.04629666060209274, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033781595062464475, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033781595062464475, "signal/frontier_coverage_25/centered_abs_mean": 0.01581826526671648, "signal/frontier_coverage_25/group_bin_occupancy": 0.8413194444444445, "signal/frontier_coverage_25/group_std_mean": 0.021130923926830292, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00158182664308697, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00158182664308697, "signal/frontier_coverage_5/centered_abs_mean": 0.05025492906570435, "signal/frontier_coverage_5/group_bin_occupancy": 0.8079861111111111, "signal/frontier_coverage_5/group_std_mean": 0.06967095285654068, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005025493167340756, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005025493167340756, "signal/frontier_ece_reward/centered_abs_mean": 0.0006432678666897119, "signal/frontier_ece_reward/group_bin_occupancy": 0.5315972222222223, "signal/frontier_ece_reward/group_std_mean": 0.0014886658871546387, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 6.43267878331244e-05, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 6.43267878331244e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.14317719340324403, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.37430555555555556, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2517729789018631, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.07222222350537777, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014317720010876656, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014317720010876656, "step": 175 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.12243381522418999, "calibration/batch_entropy_100bins": 0.43948610026723023, "calibration/batch_entropy_10bins": 0.12243381522418999, "calibration/batch_entropy_50bins": 0.3486525593485308, "calibration/batch_uniqueness": 0.5136855712749895, "calibration/buffer_distribution_entropy": 0.5074215023331706, "calibration/buffer_entropy_100bins": 0.6735423239876466, "calibration/buffer_entropy_10bins": 0.5074215023331706, "calibration/buffer_entropy_50bins": 0.6219486115552227, "calibration/confidence_entropy": 0.10537724605773757, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.028224827259382624, "calibration/mean_confidence": 0.028224827259382624, "calibration/prompt_uniqueness": 0.4567347923170309, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0013020833333333482, "completions/max_length": 2687.4, "completions/max_terminated_length": 2687.4, "completions/mean_length": 673.0866333007813, "completions/mean_terminated_length": 673.9573852539063, "completions/min_length": 33.6, "completions/min_terminated_length": 171.0, "epoch": 0.4319946000674992, "grad_norm": 9.977127774618566e-05, "learning_rate": 8.433734939759036e-07, "loss": -0.0018, "num_tokens": 385392416.0, "reward": 1.0712470054626464, "reward_std": 0.021333076059818268, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9964566230773926, "rewards/confidence_uniqueness_reward": 0.494760000705719, "rewards/format_reward": 0.9986979246139527, "rewards/frontier_aurc_reward": -0.005718124844133854, "rewards/frontier_coverage_0": 0.9464028477668762, "rewards/frontier_coverage_1": 0.9464028477668762, "rewards/frontier_coverage_10": 0.941408348083496, "rewards/frontier_coverage_15": 0.8598459959030151, "rewards/frontier_coverage_20": 0.40984349250793456, "rewards/frontier_coverage_25": 0.090379236638546, "rewards/frontier_coverage_5": 0.9464028477668762, "rewards/frontier_ece_reward": -0.00010452797578182071, "rewards/frontier_entropy_batch_reward": -0.9121028780937195, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.012168660387396812, "signal/advantage_pre_scale_abs_mean": 0.012168660387396812, "signal/advantage_pre_scale_std": 0.03872519172728062, "signal/advantage_std": 0.03872519172728062, "signal/brier_reward/centered_abs_mean": 0.005366379115730524, "signal/brier_reward/group_bin_occupancy": 0.5815972222222221, "signal/brier_reward/group_std_mean": 0.010433847829699517, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0005366379395127296, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0005366379395127296, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.21540333926677704, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.851736111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.25403536260128023, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021540333330631257, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.021540333330631257, "signal/format_reward/centered_abs_mean": 0.002392578055150807, "signal/format_reward/group_bin_occupancy": 0.128125, "signal/format_reward/group_std_mean": 0.005337309651076793, "signal/format_reward/group_zero_std_frac": 0.9749999880790711, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0011962890275754035, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0011962890275754035, "signal/frontier_aurc_reward/centered_abs_mean": 8.230793027905748e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7076388888888889, "signal/frontier_aurc_reward/group_std_mean": 0.00013148134166840464, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.0288491466781125e-06, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.0288491466781125e-06, "signal/frontier_coverage_0/centered_abs_mean": 0.051775246113538745, "signal/frontier_coverage_0/group_bin_occupancy": 0.7840277777777779, "signal/frontier_coverage_0/group_std_mean": 0.07248672246932983, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005177524592727423, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005177524592727423, "signal/frontier_coverage_1/centered_abs_mean": 0.051775246113538745, "signal/frontier_coverage_1/group_bin_occupancy": 0.7840277777777779, "signal/frontier_coverage_1/group_std_mean": 0.07248672246932983, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005177524592727423, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005177524592727423, "signal/frontier_coverage_10/centered_abs_mean": 0.05163362696766853, "signal/frontier_coverage_10/group_bin_occupancy": 0.7840277777777779, "signal/frontier_coverage_10/group_std_mean": 0.0722737193107605, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005163362808525562, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005163362808525562, "signal/frontier_coverage_15/centered_abs_mean": 0.049173231422901156, "signal/frontier_coverage_15/group_bin_occupancy": 0.7840277777777779, "signal/frontier_coverage_15/group_std_mean": 0.06863442212343215, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004917323403060436, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004917323403060436, "signal/frontier_coverage_20/centered_abs_mean": 0.03243098296225071, "signal/frontier_coverage_20/group_bin_occupancy": 0.7975694444444444, "signal/frontier_coverage_20/group_std_mean": 0.04470113664865494, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032430985011160374, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032430985011160374, "signal/frontier_coverage_25/centered_abs_mean": 0.014044274762272834, "signal/frontier_coverage_25/group_bin_occupancy": 0.8270833333333334, "signal/frontier_coverage_25/group_std_mean": 0.018727122619748116, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014044275041669608, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014044275041669608, "signal/frontier_coverage_5/centered_abs_mean": 0.051775246113538745, "signal/frontier_coverage_5/group_bin_occupancy": 0.7840277777777779, "signal/frontier_coverage_5/group_std_mean": 0.07248672246932983, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005177524592727423, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005177524592727423, "signal/frontier_ece_reward/centered_abs_mean": 0.0005371888051740825, "signal/frontier_ece_reward/group_bin_occupancy": 0.5788194444444444, "signal/frontier_ece_reward/group_std_mean": 0.0011012869304977357, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 5.3718879644293335e-05, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 5.3718879644293335e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.15258081257343292, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.38159722222222225, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2634937405586243, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.050000001676380634, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015258081257343292, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015258081257343292, "step": 180 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.1397452059158112, "calibration/batch_entropy_100bins": 0.4532488626623232, "calibration/batch_entropy_10bins": 0.1397452059158112, "calibration/batch_entropy_50bins": 0.36456030835742337, "calibration/batch_uniqueness": 0.5317420231340483, "calibration/buffer_distribution_entropy": 0.46346997718826266, "calibration/buffer_entropy_100bins": 0.6473612509534107, "calibration/buffer_entropy_10bins": 0.46346997718826266, "calibration/buffer_entropy_50bins": 0.5911614288645997, "calibration/confidence_entropy": 0.11180096035805913, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.03057175988552433, "calibration/mean_confidence": 0.03057175988552433, "calibration/prompt_uniqueness": 0.4787019949921956, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00017361111111111605, "completions/max_length": 2614.6, "completions/max_terminated_length": 2614.6, "completions/mean_length": 650.3674438476562, "completions/mean_terminated_length": 650.4793334960938, "completions/min_length": 98.0, "completions/min_terminated_length": 166.6, "epoch": 0.44399445006937416, "grad_norm": 0.0001353075058432296, "learning_rate": 6.927710843373495e-07, "loss": -0.0003, "num_tokens": 395974697.0, "reward": 1.0496395826339722, "reward_std": 0.01763119362294674, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.997331178188324, "rewards/confidence_uniqueness_reward": 0.5169573307037354, "rewards/format_reward": 0.9997395753860474, "rewards/frontier_aurc_reward": -0.006073478236794471, "rewards/frontier_coverage_0": 0.9439624428749085, "rewards/frontier_coverage_1": 0.9439624428749085, "rewards/frontier_coverage_10": 0.9314130187034607, "rewards/frontier_coverage_15": 0.7630311965942382, "rewards/frontier_coverage_20": 0.31332806348800657, "rewards/frontier_coverage_25": 0.06819383800029755, "rewards/frontier_coverage_5": 0.9432536959648132, "rewards/frontier_ece_reward": -0.00026074662746395917, "rewards/frontier_entropy_batch_reward": -0.9227155327796936, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.010541598871350288, "signal/advantage_pre_scale_abs_mean": 0.010541598871350288, "signal/advantage_pre_scale_std": 0.02300250492990017, "signal/advantage_std": 0.02300250492990017, "signal/brier_reward/centered_abs_mean": 0.0036982921417802574, "signal/brier_reward/group_bin_occupancy": 0.6208333333333333, "signal/brier_reward/group_std_mean": 0.006745168566703796, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00036982920719310643, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.00036982920719310643, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.20829733610153198, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8489583333333333, "signal/confidence_uniqueness_reward/group_std_mean": 0.24315473139286042, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020829734578728675, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020829734578728675, "signal/format_reward/centered_abs_mean": 0.0005045572877861559, "signal/format_reward/group_bin_occupancy": 0.12604166666666666, "signal/format_reward/group_std_mean": 0.0014731390401721, "signal/format_reward/group_zero_std_frac": 0.9916666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00025227864389307795, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00025227864389307795, "signal/frontier_aurc_reward/centered_abs_mean": 7.683526928303763e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7350694444444444, "signal/frontier_aurc_reward/group_std_mean": 0.00011488014133647085, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.60440888775338e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.60440888775338e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.05254817381501198, "signal/frontier_coverage_0/group_bin_occupancy": 0.815625, "signal/frontier_coverage_0/group_std_mean": 0.0715567022562027, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005254817847162485, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005254817847162485, "signal/frontier_coverage_1/centered_abs_mean": 0.05254817381501198, "signal/frontier_coverage_1/group_bin_occupancy": 0.815625, "signal/frontier_coverage_1/group_std_mean": 0.0715567022562027, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005254817847162485, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005254817847162485, "signal/frontier_coverage_10/centered_abs_mean": 0.052186784148216245, "signal/frontier_coverage_10/group_bin_occupancy": 0.8152777777777779, "signal/frontier_coverage_10/group_std_mean": 0.07105031162500382, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005218678433448076, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005218678433448076, "signal/frontier_coverage_15/centered_abs_mean": 0.04690430983901024, "signal/frontier_coverage_15/group_bin_occupancy": 0.8173611111111111, "signal/frontier_coverage_15/group_std_mean": 0.06369466707110405, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004690431244671345, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004690431244671345, "signal/frontier_coverage_20/centered_abs_mean": 0.02917616181075573, "signal/frontier_coverage_20/group_bin_occupancy": 0.8243055555555555, "signal/frontier_coverage_20/group_std_mean": 0.03923035711050034, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029176161624491215, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029176161624491215, "signal/frontier_coverage_25/centered_abs_mean": 0.012513933330774307, "signal/frontier_coverage_25/group_bin_occupancy": 0.8538194444444442, "signal/frontier_coverage_25/group_std_mean": 0.016343067213892937, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001251393323764205, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001251393323764205, "signal/frontier_coverage_5/centered_abs_mean": 0.05252725183963776, "signal/frontier_coverage_5/group_bin_occupancy": 0.815625, "signal/frontier_coverage_5/group_std_mean": 0.07152666449546814, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005252725258469581, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005252725258469581, "signal/frontier_ece_reward/centered_abs_mean": 0.0006273603008594364, "signal/frontier_ece_reward/group_bin_occupancy": 0.6163194444444444, "signal/frontier_ece_reward/group_std_mean": 0.0012211131630465387, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 6.273603576119058e-05, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 6.273603576119058e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.13514876663684844, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.36111111111111105, "signal/frontier_entropy_batch_reward/group_std_mean": 0.24130152761936188, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.10277777835726738, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013514876924455166, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013514876924455166, "step": 185 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.14159358832858687, "calibration/batch_entropy_100bins": 0.457122083997195, "calibration/batch_entropy_10bins": 0.14159358832858687, "calibration/batch_entropy_50bins": 0.36937610428144063, "calibration/batch_uniqueness": 0.5414725827467513, "calibration/buffer_distribution_entropy": 0.4170079353065802, "calibration/buffer_entropy_100bins": 0.6200426436493253, "calibration/buffer_entropy_10bins": 0.4170079353065802, "calibration/buffer_entropy_50bins": 0.5589740745093812, "calibration/confidence_entropy": 0.11398051065385324, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.03120483008441626, "calibration/mean_confidence": 0.031204830084416258, "calibration/prompt_uniqueness": 0.49466172931841834, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0006076388888889061, "completions/max_length": 2246.0, "completions/max_terminated_length": 2246.0, "completions/mean_length": 668.8586181640625, "completions/mean_terminated_length": 669.2669555664063, "completions/min_length": 28.2, "completions/min_terminated_length": 172.8, "epoch": 0.45599430007124914, "grad_norm": 0.00011211562377866358, "learning_rate": 5.421686746987952e-07, "loss": -0.0006, "num_tokens": 406762892.0, "reward": 1.0342876434326171, "reward_std": 0.020416828989982604, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9967251300811768, "rewards/confidence_uniqueness_reward": 0.5348598599433899, "rewards/format_reward": 0.9993055462837219, "rewards/frontier_aurc_reward": -0.0064199940301477906, "rewards/frontier_coverage_0": 0.9401438474655152, "rewards/frontier_coverage_1": 0.9401438474655152, "rewards/frontier_coverage_10": 0.9241740465164184, "rewards/frontier_coverage_15": 0.6645206689834595, "rewards/frontier_coverage_20": 0.2714716076850891, "rewards/frontier_coverage_25": 0.05851527601480484, "rewards/frontier_coverage_5": 0.9390028834342956, "rewards/frontier_ece_reward": -0.0005113947670906782, "rewards/frontier_entropy_batch_reward": -0.9218945980072022, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.011686141788959502, "signal/advantage_pre_scale_abs_mean": 0.011686141788959502, "signal/advantage_pre_scale_std": 0.031211203336715697, "signal/advantage_std": 0.031211203336715697, "signal/brier_reward/centered_abs_mean": 0.004695464763790369, "signal/brier_reward/group_bin_occupancy": 0.6270833333333333, "signal/brier_reward/group_std_mean": 0.009423768334090709, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00046954649733379485, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.00046954649733379485, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.20274572670459748, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8649305555555555, "signal/confidence_uniqueness_reward/group_std_mean": 0.23646896481513976, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020274572446942328, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020274572446942328, "signal/format_reward/centered_abs_mean": 0.0013454860891215503, "signal/format_reward/group_bin_occupancy": 0.12777777777777777, "signal/format_reward/group_std_mean": 0.003928370773792267, "signal/format_reward/group_zero_std_frac": 0.9777777671813965, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006727430445607752, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006727430445607752, "signal/frontier_aurc_reward/centered_abs_mean": 8.048932650126517e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7298611111111112, "signal/frontier_aurc_reward/group_std_mean": 0.00012802162964362652, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.006116576718341e-06, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.006116576718341e-06, "signal/frontier_coverage_0/centered_abs_mean": 0.05514864847064018, "signal/frontier_coverage_0/group_bin_occupancy": 0.8149305555555557, "signal/frontier_coverage_0/group_std_mean": 0.0753313958644867, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0055148651823401455, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0055148651823401455, "signal/frontier_coverage_1/centered_abs_mean": 0.05514864847064018, "signal/frontier_coverage_1/group_bin_occupancy": 0.8149305555555557, "signal/frontier_coverage_1/group_std_mean": 0.0753313958644867, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0055148651823401455, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0055148651823401455, "signal/frontier_coverage_10/centered_abs_mean": 0.05464940145611763, "signal/frontier_coverage_10/group_bin_occupancy": 0.8149305555555557, "signal/frontier_coverage_10/group_std_mean": 0.07461710721254348, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005464939959347248, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005464939959347248, "signal/frontier_coverage_15/centered_abs_mean": 0.045646652579307556, "signal/frontier_coverage_15/group_bin_occupancy": 0.8190972222222224, "signal/frontier_coverage_15/group_std_mean": 0.06189981997013092, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004564665257930756, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004564665257930756, "signal/frontier_coverage_20/centered_abs_mean": 0.02797577567398548, "signal/frontier_coverage_20/group_bin_occupancy": 0.8340277777777778, "signal/frontier_coverage_20/group_std_mean": 0.03734948411583901, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002797577390447259, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002797577390447259, "signal/frontier_coverage_25/centered_abs_mean": 0.011748875863850117, "signal/frontier_coverage_25/group_bin_occupancy": 0.8729166666666668, "signal/frontier_coverage_25/group_std_mean": 0.015189211443066597, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011748875956982374, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011748875956982374, "signal/frontier_coverage_5/centered_abs_mean": 0.055112923681735995, "signal/frontier_coverage_5/group_bin_occupancy": 0.8149305555555557, "signal/frontier_coverage_5/group_std_mean": 0.07528131753206253, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0055112925358116625, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0055112925358116625, "signal/frontier_ece_reward/centered_abs_mean": 0.0008357930113561451, "signal/frontier_ece_reward/group_bin_occupancy": 0.6513888888888889, "signal/frontier_ece_reward/group_std_mean": 0.0014408526243641973, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 8.357930200872943e-05, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 8.357930200872943e-05, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.13618087768554688, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.37881944444444443, "signal/frontier_entropy_batch_reward/group_std_mean": 0.24314994513988494, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.07500000298023224, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013618088141083718, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013618088141083718, "step": 190 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.13027754640944603, "calibration/batch_entropy_100bins": 0.4532988871809781, "calibration/batch_entropy_10bins": 0.13027754640944603, "calibration/batch_entropy_50bins": 0.36252827329939896, "calibration/batch_uniqueness": 0.5393975639469387, "calibration/buffer_distribution_entropy": 0.3707754570940318, "calibration/buffer_entropy_100bins": 0.5929112185718033, "calibration/buffer_entropy_10bins": 0.3707754570940318, "calibration/buffer_entropy_50bins": 0.5270096794218817, "calibration/confidence_entropy": 0.11147583357801931, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.02990175815167439, "calibration/mean_confidence": 0.02990175815167439, "calibration/prompt_uniqueness": 0.49622036778095735, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0004340277777777901, "completions/max_length": 2352.6, "completions/max_terminated_length": 2352.6, "completions/mean_length": 684.2117309570312, "completions/mean_terminated_length": 684.5085205078125, "completions/min_length": 31.6, "completions/min_terminated_length": 161.6, "epoch": 0.46799415007312406, "grad_norm": 0.00023885858536232263, "learning_rate": 3.91566265060241e-07, "loss": -0.0005, "num_tokens": 417725875.0, "reward": 1.0253089666366577, "reward_std": 0.019479617476463318, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9968544721603394, "rewards/confidence_uniqueness_reward": 0.5347052574157715, "rewards/format_reward": 0.9993923544883728, "rewards/frontier_aurc_reward": -0.006775370147079229, "rewards/frontier_coverage_0": 0.9401466131210328, "rewards/frontier_coverage_1": 0.9401466131210328, "rewards/frontier_coverage_10": 0.9198176503181458, "rewards/frontier_coverage_15": 0.6201009631156922, "rewards/frontier_coverage_20": 0.2388811856508255, "rewards/frontier_coverage_25": 0.05078308284282684, "rewards/frontier_coverage_5": 0.9393264293670655, "rewards/frontier_ece_reward": -0.0007388276979327201, "rewards/frontier_entropy_batch_reward": -0.9230483531951904, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.011419064365327358, "signal/advantage_pre_scale_abs_mean": 0.011419064365327358, "signal/advantage_pre_scale_std": 0.029631392285227775, "signal/advantage_std": 0.029631392285227775, "signal/brier_reward/centered_abs_mean": 0.004466315684840083, "signal/brier_reward/group_bin_occupancy": 0.6125, "signal/brier_reward/group_std_mean": 0.008843818213790655, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00044663152657449247, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.00044663152657449247, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.20187841057777406, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.85, "signal/confidence_uniqueness_reward/group_std_mean": 0.2346838593482971, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02018784135580063, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02018784135580063, "signal/format_reward/centered_abs_mean": 0.001177300326526165, "signal/format_reward/group_bin_occupancy": 0.12743055555555557, "signal/format_reward/group_std_mean": 0.0034373244270682335, "signal/format_reward/group_zero_std_frac": 0.9805555462837219, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005886501632630825, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0005886501632630825, "signal/frontier_aurc_reward/centered_abs_mean": 7.506578403990716e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7173611111111111, "signal/frontier_aurc_reward/group_std_mean": 0.0001206688757520169, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.383222845826821e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.383222845826821e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.054283497482538225, "signal/frontier_coverage_0/group_bin_occupancy": 0.8135416666666666, "signal/frontier_coverage_0/group_std_mean": 0.07416067421436309, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005428350064903498, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005428350064903498, "signal/frontier_coverage_1/centered_abs_mean": 0.054283497482538225, "signal/frontier_coverage_1/group_bin_occupancy": 0.8135416666666666, "signal/frontier_coverage_1/group_std_mean": 0.07416067421436309, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005428350064903498, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005428350064903498, "signal/frontier_coverage_10/centered_abs_mean": 0.053661100566387177, "signal/frontier_coverage_10/group_bin_occupancy": 0.8135416666666666, "signal/frontier_coverage_10/group_std_mean": 0.07328308522701263, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00536611033603549, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00536611033603549, "signal/frontier_coverage_15/centered_abs_mean": 0.04330161884427071, "signal/frontier_coverage_15/group_bin_occupancy": 0.8215277777777779, "signal/frontier_coverage_15/group_std_mean": 0.05878105312585831, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004330162052065134, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004330162052065134, "signal/frontier_coverage_20/centered_abs_mean": 0.025675978884100913, "signal/frontier_coverage_20/group_bin_occupancy": 0.8336805555555555, "signal/frontier_coverage_20/group_std_mean": 0.03427637964487076, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025675980374217033, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025675980374217033, "signal/frontier_coverage_25/centered_abs_mean": 0.010694251023232937, "signal/frontier_coverage_25/group_bin_occupancy": 0.8760416666666666, "signal/frontier_coverage_25/group_std_mean": 0.013758376985788346, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010694251395761967, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010694251395761967, "signal/frontier_coverage_5/centered_abs_mean": 0.05425994023680687, "signal/frontier_coverage_5/group_bin_occupancy": 0.8135416666666666, "signal/frontier_coverage_5/group_std_mean": 0.07412810325622558, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005425994377583265, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005425994377583265, "signal/frontier_ece_reward/centered_abs_mean": 0.001065828336868435, "signal/frontier_ece_reward/group_bin_occupancy": 0.6572916666666667, "signal/frontier_ece_reward/group_std_mean": 0.0017601919127628208, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0001065828386344947, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0001065828386344947, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.13488883078098296, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.371875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2434331715106964, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06388888973742723, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013488883711397648, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013488883711397648, "step": 195 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.13851863335516348, "calibration/batch_entropy_100bins": 0.4530965748328907, "calibration/batch_entropy_10bins": 0.13851863335516348, "calibration/batch_entropy_50bins": 0.36334488782311547, "calibration/batch_uniqueness": 0.5406433485539678, "calibration/buffer_distribution_entropy": 0.32474393930528545, "calibration/buffer_entropy_100bins": 0.5656790315407927, "calibration/buffer_entropy_10bins": 0.32474393930528545, "calibration/buffer_entropy_50bins": 0.49501215415121924, "calibration/confidence_entropy": 0.11259193466071331, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.03032343621645415, "calibration/mean_confidence": 0.030323436216454152, "calibration/prompt_uniqueness": 0.49584634061741245, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001041666666666674, "completions/max_length": 3055.6, "completions/max_terminated_length": 3055.6, "completions/mean_length": 653.0716186523438, "completions/mean_terminated_length": 653.7612670898437, "completions/min_length": 0.0, "completions/min_terminated_length": 177.2, "epoch": 0.47999400007499904, "grad_norm": 0.00013980362564325333, "learning_rate": 2.409638554216868e-07, "loss": -0.0011, "num_tokens": 428317068.0, "reward": 1.0170727252960206, "reward_std": 0.021385809779167174, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9963622570037842, "rewards/confidence_uniqueness_reward": 0.5438861727714539, "rewards/format_reward": 0.9989583253860473, "rewards/frontier_aurc_reward": -0.007121744379401207, "rewards/frontier_coverage_0": 0.9382775664329529, "rewards/frontier_coverage_1": 0.9382775664329529, "rewards/frontier_coverage_10": 0.910933256149292, "rewards/frontier_coverage_15": 0.5705180168151855, "rewards/frontier_coverage_20": 0.21805984079837798, "rewards/frontier_coverage_25": 0.05167670994997024, "rewards/frontier_coverage_5": 0.9366953253746033, "rewards/frontier_ece_reward": -0.0011651593260467053, "rewards/frontier_entropy_batch_reward": -0.9266958594322204, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.011997931264340877, "signal/advantage_pre_scale_abs_mean": 0.011997931264340877, "signal/advantage_pre_scale_std": 0.034786536172032353, "signal/advantage_std": 0.034786536172032353, "signal/brier_reward/centered_abs_mean": 0.005294179357588291, "signal/brier_reward/group_bin_occupancy": 0.6322916666666667, "signal/brier_reward/group_std_mean": 0.010655418131500482, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0005294179252814501, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0005294179252814501, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1994739830493927, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8621527777777779, "signal/confidence_uniqueness_reward/group_std_mean": 0.23068396151065826, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019947398081421853, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.019947398081421853, "signal/format_reward/centered_abs_mean": 0.0019965277635492383, "signal/format_reward/group_bin_occupancy": 0.1284722222222222, "signal/format_reward/group_std_mean": 0.005294674634933471, "signal/format_reward/group_zero_std_frac": 0.9722222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0009982638817746191, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0009982638817746191, "signal/frontier_aurc_reward/centered_abs_mean": 6.543248455272987e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7041666666666666, "signal/frontier_aurc_reward/group_std_mean": 0.00011322678910801187, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.179060500879132e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.179060500879132e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.05585807859897614, "signal/frontier_coverage_0/group_bin_occupancy": 0.83125, "signal/frontier_coverage_0/group_std_mean": 0.07585428953170777, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005585807748138905, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005585807748138905, "signal/frontier_coverage_1/centered_abs_mean": 0.05585807859897614, "signal/frontier_coverage_1/group_bin_occupancy": 0.83125, "signal/frontier_coverage_1/group_std_mean": 0.07585428953170777, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005585807748138905, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005585807748138905, "signal/frontier_coverage_10/centered_abs_mean": 0.0549948088824749, "signal/frontier_coverage_10/group_bin_occupancy": 0.8322916666666667, "signal/frontier_coverage_10/group_std_mean": 0.07462709993124009, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005499481037259102, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005499481037259102, "signal/frontier_coverage_15/centered_abs_mean": 0.04226614907383919, "signal/frontier_coverage_15/group_bin_occupancy": 0.8364583333333332, "signal/frontier_coverage_15/group_std_mean": 0.056792113929986954, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004226614907383919, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004226614907383919, "signal/frontier_coverage_20/centered_abs_mean": 0.02481546886265278, "signal/frontier_coverage_20/group_bin_occupancy": 0.8503472222222221, "signal/frontier_coverage_20/group_std_mean": 0.03280436284840107, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024815469048917295, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024815469048917295, "signal/frontier_coverage_25/centered_abs_mean": 0.010957871191203594, "signal/frontier_coverage_25/group_bin_occupancy": 0.8864583333333333, "signal/frontier_coverage_25/group_std_mean": 0.014017502591013909, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010957871330901981, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010957871330901981, "signal/frontier_coverage_5/centered_abs_mean": 0.05581053644418717, "signal/frontier_coverage_5/group_bin_occupancy": 0.83125, "signal/frontier_coverage_5/group_std_mean": 0.07578854262828827, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005581053905189037, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005581053905189037, "signal/frontier_ece_reward/centered_abs_mean": 0.001521405391395092, "signal/frontier_ece_reward/group_bin_occupancy": 0.6770833333333334, "signal/frontier_ece_reward/group_std_mean": 0.0023106941487640144, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00015214053855743258, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00015214053855743258, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1287894919514656, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.35, "signal/frontier_entropy_batch_reward/group_std_mean": 0.23831940293312073, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.11111111268401146, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01287894994020462, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01287894994020462, "step": 200 }, { "epoch": 0.47999400007499904, "eval_calibration/aurc": 1.0, "eval_calibration/batch_distribution_entropy": 0.06643064451573505, "eval_calibration/batch_entropy_100bins": 0.37397742139005613, "eval_calibration/batch_entropy_10bins": 0.06643064451573505, "eval_calibration/batch_entropy_50bins": 0.2916695775087659, "eval_calibration/batch_uniqueness": 0.4544270833333333, "eval_calibration/buffer_distribution_entropy": 0.2915210919064046, "eval_calibration/buffer_entropy_100bins": 0.5463082971911503, "eval_calibration/buffer_entropy_10bins": 0.2915210919064046, "eval_calibration/buffer_entropy_50bins": 0.4722571553551534, "eval_calibration/confidence_entropy": 0.09491457918095768, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.0, "eval_calibration/coverage@20%": 0.0, "eval_calibration/coverage@25%": 0.0, "eval_calibration/coverage@30%": 0.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/ece": 0.023790659409756474, "eval_calibration/mean_confidence": 0.023790659409756474, "eval_calibration/prompt_uniqueness": 0.4544270833333333, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 1680.1666666666667, "eval_completions/max_terminated_length": 1680.1666666666667, "eval_completions/mean_length": 669.5234578450521, "eval_completions/mean_terminated_length": 669.5234578450521, "eval_completions/min_length": 236.83333333333334, "eval_completions/min_terminated_length": 236.83333333333334, "eval_loss": 0.0, "eval_num_tokens": 428317068.0, "eval_reward": 0.9965925514698029, "eval_reward_std": 0.024295299003521603, "eval_rewards/accuracy_reward": 0.0, "eval_rewards/brier_reward": 0.9971688687801361, "eval_rewards/confidence_uniqueness_reward": 0.4712456613779068, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.007382758737852176, "eval_rewards/frontier_coverage_0": 0.9456672767798106, "eval_rewards/frontier_coverage_1": 0.9456672767798106, "eval_rewards/frontier_coverage_10": 0.8689454793930054, "eval_rewards/frontier_coverage_15": 0.5284307897090912, "eval_rewards/frontier_coverage_20": 0.20884332557519278, "eval_rewards/frontier_coverage_25": 0.058971162885427475, "eval_rewards/frontier_coverage_5": 0.9430893162886301, "eval_rewards/frontier_ece_reward": -0.0011813394764127831, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 94.7926, "eval_samples_per_second": 10.549, "eval_signal/accuracy_reward/centered_abs_mean": 0.0, "eval_signal/accuracy_reward/group_bin_occupancy": 0.125, "eval_signal/accuracy_reward/group_std_mean": 0.0, "eval_signal/accuracy_reward/group_zero_std_frac": 1.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "eval_signal/advantage_abs_mean": 0.016077665146440268, "eval_signal/advantage_pre_scale_abs_mean": 0.016077665146440268, "eval_signal/advantage_pre_scale_std": 0.02679586907227834, "eval_signal/advantage_std": 0.02679586907227834, "eval_signal/brier_reward/centered_abs_mean": 0.004176080265703301, "eval_signal/brier_reward/group_bin_occupancy": 0.5763888888888888, "eval_signal/brier_reward/group_std_mean": 0.009115726919844747, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00041760804257743683, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.00041760804257743683, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.22382948050896326, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7569444444444443, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.2574465771516164, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0223829485476017, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0223829485476017, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 9.999908312844734e-05, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6284722222222222, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0002476933683889608, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.2499885049995403e-06, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.2499885049995403e-06, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.050190938636660576, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.8090277777777778, "eval_signal/frontier_coverage_0/group_std_mean": 0.07040310216446717, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005019093786055843, "eval_signal/frontier_coverage_0/weight": 0.10000000149011612, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005019093786055843, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.050190938636660576, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.8090277777777778, "eval_signal/frontier_coverage_1/group_std_mean": 0.07040310216446717, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005019093786055843, "eval_signal/frontier_coverage_1/weight": 0.10000000149011612, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005019093786055843, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.048136645928025246, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.8090277777777778, "eval_signal/frontier_coverage_10/group_std_mean": 0.06756559945642948, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004813664670412739, "eval_signal/frontier_coverage_10/weight": 0.10000000149011612, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004813664670412739, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.03631955695648988, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.8159722222222222, "eval_signal/frontier_coverage_15/group_std_mean": 0.05081744554142157, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003631955711171031, "eval_signal/frontier_coverage_15/weight": 0.10000000149011612, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003631955711171031, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.021420936100184917, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.8229166666666666, "eval_signal/frontier_coverage_20/group_std_mean": 0.029625747663279373, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021420938040440283, "eval_signal/frontier_coverage_20/weight": 0.10000000149011612, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021420938040440283, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.011142984808733067, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.8472222222222222, "eval_signal/frontier_coverage_25/group_std_mean": 0.0158317390208443, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001114298482813562, "eval_signal/frontier_coverage_25/weight": 0.10000000149011612, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001114298482813562, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.050127786894639335, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.8090277777777778, "eval_signal/frontier_coverage_5/group_std_mean": 0.07031712743143241, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005012778720508019, "eval_signal/frontier_coverage_5/weight": 0.10000000149011612, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005012778720508019, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.0016588448003555338, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.6215277777777778, "eval_signal/frontier_ece_reward/group_std_mean": 0.002777168876491487, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0001658844824608726, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0001658844824608726, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.063, "step": 200 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.11455553726240084, "calibration/batch_entropy_100bins": 0.45068975035694436, "calibration/batch_entropy_10bins": 0.11455553726240084, "calibration/batch_entropy_50bins": 0.35838251002340216, "calibration/batch_uniqueness": 0.5393663194444445, "calibration/buffer_distribution_entropy": 0.2649266196607571, "calibration/buffer_entropy_100bins": 0.5311263627702383, "calibration/buffer_entropy_10bins": 0.2649266196607571, "calibration/buffer_entropy_50bins": 0.45441417672910356, "calibration/confidence_entropy": 0.11045526044395729, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.029426482313059867, "calibration/mean_confidence": 0.029426482313059867, "calibration/prompt_uniqueness": 0.492578125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0002604166666666741, "completions/max_length": 2353.0, "completions/max_terminated_length": 2353.0, "completions/mean_length": 679.0311645507812, "completions/mean_terminated_length": 679.208349609375, "completions/min_length": 122.0, "completions/min_terminated_length": 189.2, "epoch": 0.491993850076874, "grad_norm": 5.876172872376628e-05, "learning_rate": 9.036144578313253e-08, "loss": -0.0004, "num_tokens": 439205459.0, "reward": 1.0090928316116332, "reward_std": 0.017295997962355612, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9972542285919189, "rewards/confidence_uniqueness_reward": 0.5266831457614899, "rewards/format_reward": 0.9996527671813965, "rewards/frontier_aurc_reward": -0.007521875947713852, "rewards/frontier_coverage_0": 0.9413968443870544, "rewards/frontier_coverage_1": 0.9413968443870544, "rewards/frontier_coverage_10": 0.8523622274398803, "rewards/frontier_coverage_15": 0.5419311404228211, "rewards/frontier_coverage_20": 0.21406979262828826, "rewards/frontier_coverage_25": 0.061385908722877504, "rewards/frontier_coverage_5": 0.9381889939308167, "rewards/frontier_ece_reward": -0.001526193623431027, "rewards/frontier_entropy_batch_reward": -0.9195385575294495, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.010584606975317, "signal/advantage_pre_scale_abs_mean": 0.010584606975317, "signal/advantage_pre_scale_std": 0.023541892506182195, "signal/advantage_std": 0.023541892506182195, "signal/brier_reward/centered_abs_mean": 0.0038198365829885004, "signal/brier_reward/group_bin_occupancy": 0.6184027777777777, "signal/brier_reward/group_std_mean": 0.006863884162157774, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0003819836885668337, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0003819836885668337, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.20393891334533693, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8586805555555556, "signal/confidence_uniqueness_reward/group_std_mean": 0.23769011199474335, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020393891260027885, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020393891260027885, "signal/format_reward/centered_abs_mean": 0.0006618923507630825, "signal/format_reward/group_bin_occupancy": 0.12604166666666666, "signal/format_reward/group_std_mean": 0.0016652445774525404, "signal/format_reward/group_zero_std_frac": 0.9916666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00033094617538154125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00033094617538154125, "signal/frontier_aurc_reward/centered_abs_mean": 3.8273249083431436e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.648263888888889, "signal/frontier_aurc_reward/group_std_mean": 6.441681398428045e-05, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.784156260484451e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.784156260484451e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.05274165198206902, "signal/frontier_coverage_0/group_bin_occupancy": 0.8149305555555555, "signal/frontier_coverage_0/group_std_mean": 0.07166333943605423, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005274165514856577, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005274165514856577, "signal/frontier_coverage_1/centered_abs_mean": 0.05274165198206902, "signal/frontier_coverage_1/group_bin_occupancy": 0.8149305555555555, "signal/frontier_coverage_1/group_std_mean": 0.07166333943605423, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005274165514856577, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005274165514856577, "signal/frontier_coverage_10/centered_abs_mean": 0.04995769932866097, "signal/frontier_coverage_10/group_bin_occupancy": 0.815625, "signal/frontier_coverage_10/group_std_mean": 0.06778565198183059, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004995770007371903, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004995770007371903, "signal/frontier_coverage_15/centered_abs_mean": 0.03851696029305458, "signal/frontier_coverage_15/group_bin_occupancy": 0.8184027777777777, "signal/frontier_coverage_15/group_std_mean": 0.051988587528467176, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038516961503773928, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038516961503773928, "signal/frontier_coverage_20/centered_abs_mean": 0.022696791961789132, "signal/frontier_coverage_20/group_bin_occupancy": 0.8371527777777779, "signal/frontier_coverage_20/group_std_mean": 0.03020997978746891, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002269679168239236, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002269679168239236, "signal/frontier_coverage_25/centered_abs_mean": 0.011402542889118194, "signal/frontier_coverage_25/group_bin_occupancy": 0.8784722222222223, "signal/frontier_coverage_25/group_std_mean": 0.0147995101287961, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011402543168514967, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011402543168514967, "signal/frontier_coverage_5/centered_abs_mean": 0.052644898742437364, "signal/frontier_coverage_5/group_bin_occupancy": 0.8149305555555555, "signal/frontier_coverage_5/group_std_mean": 0.07152151316404343, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005264490097761154, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005264490097761154, "signal/frontier_ece_reward/centered_abs_mean": 0.0019458664581179618, "signal/frontier_ece_reward/group_bin_occupancy": 0.6496527777777777, "signal/frontier_ece_reward/group_std_mean": 0.0029517014976590873, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00019458665046840907, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00019458665046840907, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.14009268283843995, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3819444444444445, "signal/frontier_entropy_batch_reward/group_std_mean": 0.24936519265174867, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.07777777910232545, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014009268768131734, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014009268768131734, "step": 205 }, { "calibration/aurc": 1.0, "calibration/batch_distribution_entropy": 0.11726611228589778, "calibration/batch_entropy_100bins": 0.43786275906834105, "calibration/batch_entropy_10bins": 0.11726611228589778, "calibration/batch_entropy_50bins": 0.3445128998256206, "calibration/batch_uniqueness": 0.5125325520833334, "calibration/buffer_distribution_entropy": 0.21560621581407732, "calibration/buffer_entropy_100bins": 0.5033980975058389, "calibration/buffer_entropy_10bins": 0.21560621581407732, "calibration/buffer_entropy_50bins": 0.42178915870920547, "calibration/confidence_entropy": 0.10638714985174476, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.028447378988646913, "calibration/mean_confidence": 0.028447378988646913, "calibration/prompt_uniqueness": 0.46137152777777773, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0002893518518518601, "completions/max_length": 2120.6666666666665, "completions/max_terminated_length": 2120.6666666666665, "completions/mean_length": 666.4303995768229, "completions/mean_terminated_length": 666.6202799479166, "completions/min_length": 67.0, "completions/min_terminated_length": 170.0, "epoch": 0.49919376007799904, "num_tokens": 445677874.0, "reward": 0.9926110506057739, "reward_std": 0.01812468096613884, "rewards/accuracy_reward": 0.0, "rewards/brier_reward": 0.9972220460573832, "rewards/confidence_uniqueness_reward": 0.5229232708613077, "rewards/format_reward": 0.9997106393178304, "rewards/frontier_aurc_reward": -0.007819035245726505, "rewards/frontier_coverage_0": 0.9409451087315878, "rewards/frontier_coverage_1": 0.9409451087315878, "rewards/frontier_coverage_10": 0.7893781860669454, "rewards/frontier_coverage_15": 0.5073119203249613, "rewards/frontier_coverage_20": 0.18051361044247946, "rewards/frontier_coverage_25": 0.049011316150426865, "rewards/frontier_coverage_5": 0.9196064670880636, "rewards/frontier_ece_reward": -0.0018021255576362212, "rewards/frontier_entropy_batch_reward": -0.9175194899241129, "signal/accuracy_reward/centered_abs_mean": 0.0, "signal/accuracy_reward/group_bin_occupancy": 0.125, "signal/accuracy_reward/group_std_mean": 0.0, "signal/accuracy_reward/group_zero_std_frac": 1.0, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0, "signal/advantage_abs_mean": 0.010793880249063173, "signal/advantage_pre_scale_abs_mean": 0.010793880249063173, "signal/advantage_pre_scale_std": 0.023850775013367336, "signal/advantage_std": 0.023850775013367336, "signal/brier_reward/centered_abs_mean": 0.0038659116253256798, "signal/brier_reward/group_bin_occupancy": 0.6278935185185185, "signal/brier_reward/group_std_mean": 0.0070017667797704535, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0003865911761143555, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0003865911761143555, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.21364787220954895, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.828125, "signal/confidence_uniqueness_reward/group_std_mean": 0.24747528632481894, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021364788214365642, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.021364788214365642, "signal/format_reward/centered_abs_mean": 0.0005606192086512843, "signal/format_reward/group_bin_occupancy": 0.1261574074074074, "signal/format_reward/group_std_mean": 0.0016368211557467778, "signal/format_reward/group_zero_std_frac": 0.9907407363255819, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00028030960432564217, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00028030960432564217, "signal/frontier_aurc_reward/centered_abs_mean": 3.1314588947376855e-05, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6168981481481483, "signal/frontier_aurc_reward/group_std_mean": 5.595714052712234e-05, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.914323466839657e-07, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.914323466839657e-07, "signal/frontier_coverage_0/centered_abs_mean": 0.05400566880901655, "signal/frontier_coverage_0/group_bin_occupancy": 0.8234953703703702, "signal/frontier_coverage_0/group_std_mean": 0.07307405769824982, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005400567160298427, "signal/frontier_coverage_0/weight": 0.10000000149011612, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005400567160298427, "signal/frontier_coverage_1/centered_abs_mean": 0.05400566880901655, "signal/frontier_coverage_1/group_bin_occupancy": 0.8234953703703702, "signal/frontier_coverage_1/group_std_mean": 0.07307405769824982, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005400567160298427, "signal/frontier_coverage_1/weight": 0.10000000149011612, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005400567160298427, "signal/frontier_coverage_10/centered_abs_mean": 0.048948156336943306, "signal/frontier_coverage_10/group_bin_occupancy": 0.8246527777777778, "signal/frontier_coverage_10/group_std_mean": 0.06624157354235649, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004894815851002932, "signal/frontier_coverage_10/weight": 0.10000000149011612, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004894815851002932, "signal/frontier_coverage_15/centered_abs_mean": 0.037971017261346184, "signal/frontier_coverage_15/group_bin_occupancy": 0.8252314814814815, "signal/frontier_coverage_15/group_std_mean": 0.05115088944633802, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003797101710612575, "signal/frontier_coverage_15/weight": 0.10000000149011612, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003797101710612575, "signal/frontier_coverage_20/centered_abs_mean": 0.021584200983246166, "signal/frontier_coverage_20/group_bin_occupancy": 0.8385416666666666, "signal/frontier_coverage_20/group_std_mean": 0.02855382238825162, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021584201992178955, "signal/frontier_coverage_20/weight": 0.10000000149011612, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021584201992178955, "signal/frontier_coverage_25/centered_abs_mean": 0.010639886682232222, "signal/frontier_coverage_25/group_bin_occupancy": 0.8790509259259259, "signal/frontier_coverage_25/group_std_mean": 0.013642584905028343, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010639886216570933, "signal/frontier_coverage_25/weight": 0.10000000149011612, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010639886216570933, "signal/frontier_coverage_5/centered_abs_mean": 0.05342509597539902, "signal/frontier_coverage_5/group_bin_occupancy": 0.8234953703703702, "signal/frontier_coverage_5/group_std_mean": 0.07227163016796112, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005342509442319472, "signal/frontier_coverage_5/weight": 0.10000000149011612, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005342509442319472, "signal/frontier_ece_reward/centered_abs_mean": 0.002326072504123052, "signal/frontier_ece_reward/group_bin_occupancy": 0.6481481481481483, "signal/frontier_ece_reward/group_std_mean": 0.003536020793641607, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00023260726690447578, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00023260726690447578, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.14257381359736124, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3755787037037037, "signal/frontier_entropy_batch_reward/group_std_mean": 0.24763726194699606, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0879629651705424, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014257381359736124, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014257381359736124, "step": 208, "total_flos": 0.0, "train_loss": -0.004933846771494315, "train_runtime": 36324.5396, "train_samples_per_second": 0.413, "train_steps_per_second": 0.006 } ], "logging_steps": 5, "max_steps": 208, "num_input_tokens_seen": 445677874, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }