Files
RLCR-v4-ks-bins100-hotpot/trainer_state.json
ModelHub XC 17885c18fd 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-bins100-hotpot
Source: Original Platform
2026-04-11 03:04:58 +08:00

9940 lines
610 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 50,
"global_step": 312,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.6303003664973054,
"calibration/batch_distribution_entropy": 0.6597844034096975,
"calibration/batch_entropy_100bins": 0.4873712088023803,
"calibration/batch_entropy_10bins": 0.6597844034096975,
"calibration/batch_entropy_50bins": 0.5698783479346161,
"calibration/batch_uniqueness": 0.7280114042860303,
"calibration/confidence_entropy": 0.34552134095532827,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.502710703849331,
"calibration/mean_confidence": 0.7903732547788247,
"calibration/prompt_uniqueness": 0.5916048209774899,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03564453125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1493.6,
"completions/mean_length": 272.20166015625,
"completions/mean_terminated_length": 225.48247375488282,
"completions/min_length": 2.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.016,
"grad_norm": 0.03546224907040596,
"learning_rate": 3.1249999999999997e-07,
"loss": 0.069,
"num_tokens": 17631377.0,
"reward": 0.49061959981918335,
"reward_std": 0.39449103474617003,
"rewards/accuracy_reward": 0.21728515625,
"rewards/brier_reward": 0.3733718991279602,
"rewards/format_reward": 0.67998046875,
"rewards/frontier_aurc_reward": 0.30710798501968384,
"rewards/frontier_coverage_1": 0.30710798501968384,
"rewards/frontier_coverage_10": 0.30710798501968384,
"rewards/frontier_coverage_15": 0.30710798501968384,
"rewards/frontier_coverage_20": 0.30710798501968384,
"rewards/frontier_coverage_25": 0.30710798501968384,
"rewards/frontier_coverage_5": 0.30710798501968384,
"rewards/frontier_ece_reward": 0.30710798501968384,
"rewards/frontier_entropy_batch_reward": -0.6524304747581482,
"signal/accuracy_reward/centered_abs_mean": 0.238629150390625,
"signal/accuracy_reward/group_bin_occupancy": 0.21015625,
"signal/accuracy_reward/group_std_mean": 0.28090503215789797,
"signal/accuracy_reward/group_zero_std_frac": 0.31875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1193145751953125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1193145751953125,
"signal/advantage_abs_mean": 0.3349871218204498,
"signal/advantage_pre_scale_abs_mean": 0.3349871218204498,
"signal/advantage_pre_scale_std": 0.40826708674430845,
"signal/advantage_std": 0.40826708674430845,
"signal/brier_reward/centered_abs_mean": 0.31911089420318606,
"signal/brier_reward/group_bin_occupancy": 0.751171875,
"signal/brier_reward/group_std_mean": 0.36441142559051515,
"signal/brier_reward/group_zero_std_frac": 0.003125,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03988886177539826,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.03988886177539826,
"signal/format_reward/centered_abs_mean": 0.404473876953125,
"signal/format_reward/group_bin_occupancy": 0.25,
"signal/format_reward/group_std_mean": 0.4542974352836609,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2022369384765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.2022369384765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.2967922270298004,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.662109375,
"signal/frontier_aurc_reward/group_std_mean": 0.34727140665054324,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005312580615282059,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005312580615282059,
"signal/frontier_coverage_1/centered_abs_mean": 0.2967922270298004,
"signal/frontier_coverage_1/group_bin_occupancy": 0.662109375,
"signal/frontier_coverage_1/group_std_mean": 0.34727140665054324,
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005312580615282059,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005312580615282059,
"signal/frontier_coverage_10/centered_abs_mean": 0.2967922270298004,
"signal/frontier_coverage_10/group_bin_occupancy": 0.662109375,
"signal/frontier_coverage_10/group_std_mean": 0.34727140665054324,
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005312580615282059,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005312580615282059,
"signal/frontier_coverage_15/centered_abs_mean": 0.2967922270298004,
"signal/frontier_coverage_15/group_bin_occupancy": 0.662109375,
"signal/frontier_coverage_15/group_std_mean": 0.34727140665054324,
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005312580615282059,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005312580615282059,
"signal/frontier_coverage_20/centered_abs_mean": 0.2967922270298004,
"signal/frontier_coverage_20/group_bin_occupancy": 0.662109375,
"signal/frontier_coverage_20/group_std_mean": 0.34727140665054324,
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005312580615282059,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005312580615282059,
"signal/frontier_coverage_25/centered_abs_mean": 0.2967922270298004,
"signal/frontier_coverage_25/group_bin_occupancy": 0.662109375,
"signal/frontier_coverage_25/group_std_mean": 0.34727140665054324,
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005312580615282059,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005312580615282059,
"signal/frontier_coverage_5/centered_abs_mean": 0.2967922270298004,
"signal/frontier_coverage_5/group_bin_occupancy": 0.662109375,
"signal/frontier_coverage_5/group_std_mean": 0.34727140665054324,
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005312580615282059,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005312580615282059,
"signal/frontier_ece_reward/centered_abs_mean": 0.2967922270298004,
"signal/frontier_ece_reward/group_bin_occupancy": 0.662109375,
"signal/frontier_ece_reward/group_std_mean": 0.34727140665054324,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03709902837872505,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03709902837872505,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.42547852396965025,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.30625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.47201172113418577,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.05318481549620628,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.05318481549620628,
"step": 5
},
{
"calibration/aurc": 0.6822839581909947,
"calibration/batch_distribution_entropy": 0.6427950052801196,
"calibration/batch_entropy_100bins": 0.47864642318075556,
"calibration/batch_entropy_10bins": 0.6427950052801196,
"calibration/batch_entropy_50bins": 0.5601287104037039,
"calibration/batch_uniqueness": 0.7195723234061977,
"calibration/confidence_entropy": 0.3457734196325082,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5269429998220361,
"calibration/mean_confidence": 0.7938687058643216,
"calibration/prompt_uniqueness": 0.6126342076368461,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03427734375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1488.2,
"completions/mean_length": 255.5763671875,
"completions/mean_terminated_length": 210.13367309570313,
"completions/min_length": 2.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.032,
"grad_norm": 0.017373288050293922,
"learning_rate": 6.249999999999999e-07,
"loss": 0.0672,
"num_tokens": 35348831.0,
"reward": 0.5113436818122864,
"reward_std": 0.37235715985298157,
"rewards/accuracy_reward": 0.21552734375,
"rewards/brier_reward": 0.38588611483573915,
"rewards/format_reward": 0.733984375,
"rewards/frontier_aurc_reward": 0.3045971155166626,
"rewards/frontier_coverage_1": 0.3045971155166626,
"rewards/frontier_coverage_10": 0.3045971155166626,
"rewards/frontier_coverage_15": 0.3045971155166626,
"rewards/frontier_coverage_20": 0.3045971155166626,
"rewards/frontier_coverage_25": 0.3045971155166626,
"rewards/frontier_coverage_5": 0.3045971155166626,
"rewards/frontier_ece_reward": 0.3045971155166626,
"rewards/frontier_entropy_batch_reward": -0.7031086683273315,
"signal/accuracy_reward/centered_abs_mean": 0.225225830078125,
"signal/accuracy_reward/group_bin_occupancy": 0.209765625,
"signal/accuracy_reward/group_std_mean": 0.2710058391094208,
"signal/accuracy_reward/group_zero_std_frac": 0.321875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1126129150390625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1126129150390625,
"signal/advantage_abs_mean": 0.30710036158561704,
"signal/advantage_pre_scale_abs_mean": 0.30710036158561704,
"signal/advantage_pre_scale_std": 0.3872555077075958,
"signal/advantage_std": 0.3872555077075958,
"signal/brier_reward/centered_abs_mean": 0.3064376533031464,
"signal/brier_reward/group_bin_occupancy": 0.766796875,
"signal/brier_reward/group_std_mean": 0.35422558784484864,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0383047066628933,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0383047066628933,
"signal/format_reward/centered_abs_mean": 0.36297607421875,
"signal/format_reward/group_bin_occupancy": 0.25,
"signal/format_reward/group_std_mean": 0.4295056998729706,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.181488037109375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.181488037109375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.27946594953536985,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6734375,
"signal/frontier_aurc_reward/group_std_mean": 0.33357922434806825,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005002440419048071,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005002440419048071,
"signal/frontier_coverage_1/centered_abs_mean": 0.27946594953536985,
"signal/frontier_coverage_1/group_bin_occupancy": 0.6734375,
"signal/frontier_coverage_1/group_std_mean": 0.33357922434806825,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005002440419048071,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005002440419048071,
"signal/frontier_coverage_10/centered_abs_mean": 0.27946594953536985,
"signal/frontier_coverage_10/group_bin_occupancy": 0.6734375,
"signal/frontier_coverage_10/group_std_mean": 0.33357922434806825,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005002440419048071,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005002440419048071,
"signal/frontier_coverage_15/centered_abs_mean": 0.27946594953536985,
"signal/frontier_coverage_15/group_bin_occupancy": 0.6734375,
"signal/frontier_coverage_15/group_std_mean": 0.33357922434806825,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005002440419048071,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005002440419048071,
"signal/frontier_coverage_20/centered_abs_mean": 0.27946594953536985,
"signal/frontier_coverage_20/group_bin_occupancy": 0.6734375,
"signal/frontier_coverage_20/group_std_mean": 0.33357922434806825,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005002440419048071,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005002440419048071,
"signal/frontier_coverage_25/centered_abs_mean": 0.27946594953536985,
"signal/frontier_coverage_25/group_bin_occupancy": 0.6734375,
"signal/frontier_coverage_25/group_std_mean": 0.33357922434806825,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005002440419048071,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005002440419048071,
"signal/frontier_coverage_5/centered_abs_mean": 0.27946594953536985,
"signal/frontier_coverage_5/group_bin_occupancy": 0.6734375,
"signal/frontier_coverage_5/group_std_mean": 0.33357922434806825,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005002440419048071,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005002440419048071,
"signal/frontier_ece_reward/centered_abs_mean": 0.27946594953536985,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6734375,
"signal/frontier_ece_reward/group_std_mean": 0.33357922434806825,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03493324369192123,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03493324369192123,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.39019296765327455,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.31015625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.45074942111968996,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04877412095665932,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04877412095665932,
"step": 10
},
{
"calibration/aurc": 0.5998491425462407,
"calibration/batch_distribution_entropy": 0.645822968664817,
"calibration/batch_entropy_100bins": 0.4777453417155669,
"calibration/batch_entropy_10bins": 0.645822968664817,
"calibration/batch_entropy_50bins": 0.5601469875575775,
"calibration/batch_uniqueness": 0.706415871160267,
"calibration/buffer_distribution_entropy": 0.658197276088968,
"calibration/buffer_entropy_100bins": 0.49091085619434366,
"calibration/buffer_entropy_10bins": 0.658197276088968,
"calibration/buffer_entropy_50bins": 0.5740145738778557,
"calibration/confidence_entropy": 0.3450713866685877,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.47860442442795315,
"calibration/mean_confidence": 0.8054640124402823,
"calibration/prompt_uniqueness": 0.6039787026135109,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01513671875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1480.2,
"completions/mean_length": 199.25283203125,
"completions/mean_terminated_length": 178.8089813232422,
"completions/min_length": 13.6,
"completions/min_terminated_length": 13.6,
"epoch": 0.048,
"grad_norm": 0.013462609611451626,
"learning_rate": 9.374999999999999e-07,
"loss": 0.0458,
"num_tokens": 52437916.0,
"reward": 0.6119109511375427,
"reward_std": 0.2940455138683319,
"rewards/accuracy_reward": 0.2783203125,
"rewards/brier_reward": 0.492095947265625,
"rewards/format_reward": 0.889453125,
"rewards/frontier_aurc_reward": 0.2877124358899891,
"rewards/frontier_coverage_1": 0.3070936232805252,
"rewards/frontier_coverage_10": 0.3070936232805252,
"rewards/frontier_coverage_15": 0.3070936232805252,
"rewards/frontier_coverage_20": 0.3070936232805252,
"rewards/frontier_coverage_25": 0.3070936232805252,
"rewards/frontier_coverage_5": 0.3070936232805252,
"rewards/frontier_ece_reward": 0.2710249736905098,
"rewards/frontier_entropy_batch_reward": -0.8439823746681213,
"signal/accuracy_reward/centered_abs_mean": 0.19747314453125,
"signal/accuracy_reward/group_bin_occupancy": 0.203515625,
"signal/accuracy_reward/group_std_mean": 0.2440613567829132,
"signal/accuracy_reward/group_zero_std_frac": 0.371875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.098736572265625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.098736572265625,
"signal/advantage_abs_mean": 0.2279714286327362,
"signal/advantage_pre_scale_abs_mean": 0.2279714286327362,
"signal/advantage_pre_scale_std": 0.31036766767501833,
"signal/advantage_std": 0.31036766767501833,
"signal/brier_reward/centered_abs_mean": 0.272215747833252,
"signal/brier_reward/group_bin_occupancy": 0.807421875,
"signal/brier_reward/group_std_mean": 0.32673339247703553,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0340269684791565,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0340269684791565,
"signal/format_reward/centered_abs_mean": 0.18416748046875,
"signal/format_reward/group_bin_occupancy": 0.240625,
"signal/format_reward/group_std_mean": 0.28515345454216,
"signal/format_reward/group_zero_std_frac": 0.075,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.092083740234375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.092083740234375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.20810934910550713,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.728125,
"signal/frontier_aurc_reward/group_std_mean": 0.24877500906586647,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003725157254666556,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003725157254666556,
"signal/frontier_coverage_1/centered_abs_mean": 0.23256531208753586,
"signal/frontier_coverage_1/group_bin_occupancy": 0.708984375,
"signal/frontier_coverage_1/group_std_mean": 0.28642610311508176,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004162919009104371,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004162919009104371,
"signal/frontier_coverage_10/centered_abs_mean": 0.23256531208753586,
"signal/frontier_coverage_10/group_bin_occupancy": 0.708984375,
"signal/frontier_coverage_10/group_std_mean": 0.28642610311508176,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004162919009104371,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004162919009104371,
"signal/frontier_coverage_15/centered_abs_mean": 0.23256531208753586,
"signal/frontier_coverage_15/group_bin_occupancy": 0.708984375,
"signal/frontier_coverage_15/group_std_mean": 0.28642610311508176,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004162919009104371,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004162919009104371,
"signal/frontier_coverage_20/centered_abs_mean": 0.23256531208753586,
"signal/frontier_coverage_20/group_bin_occupancy": 0.708984375,
"signal/frontier_coverage_20/group_std_mean": 0.28642610311508176,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004162919009104371,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004162919009104371,
"signal/frontier_coverage_25/centered_abs_mean": 0.23256531208753586,
"signal/frontier_coverage_25/group_bin_occupancy": 0.708984375,
"signal/frontier_coverage_25/group_std_mean": 0.28642610311508176,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004162919009104371,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004162919009104371,
"signal/frontier_coverage_5/centered_abs_mean": 0.23256531208753586,
"signal/frontier_coverage_5/group_bin_occupancy": 0.708984375,
"signal/frontier_coverage_5/group_std_mean": 0.28642610311508176,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004162919009104371,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004162919009104371,
"signal/frontier_ece_reward/centered_abs_mean": 0.23592609018087388,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6890625,
"signal/frontier_ece_reward/group_std_mean": 0.2841936469078064,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.029490761272609235,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.029490761272609235,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24894185066223146,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.319921875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35590378642082215,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0375,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031117731332778932,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031117731332778932,
"step": 15
},
{
"calibration/aurc": 0.5341530098263937,
"calibration/batch_distribution_entropy": 0.6953781856593181,
"calibration/batch_entropy_100bins": 0.5009968781781217,
"calibration/batch_entropy_10bins": 0.6953781856593181,
"calibration/batch_entropy_50bins": 0.5862452573775565,
"calibration/batch_uniqueness": 0.7366723501935544,
"calibration/buffer_distribution_entropy": 0.6570677642554361,
"calibration/buffer_entropy_100bins": 0.49046116304887866,
"calibration/buffer_entropy_10bins": 0.6570677642554361,
"calibration/buffer_entropy_50bins": 0.5733484379340844,
"calibration/confidence_entropy": 0.36679942254955256,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.37407708422530783,
"calibration/mean_confidence": 0.7776747983528602,
"calibration/prompt_uniqueness": 0.6546207061865514,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.005078125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1265.4,
"completions/mean_length": 143.1140625,
"completions/mean_terminated_length": 136.00784912109376,
"completions/min_length": 27.0,
"completions/min_terminated_length": 27.0,
"epoch": 0.064,
"grad_norm": 0.004953299183398485,
"learning_rate": 1e-06,
"loss": 0.0132,
"num_tokens": 68821804.0,
"reward": 0.6139556050300599,
"reward_std": 0.2074872225522995,
"rewards/accuracy_reward": 0.339453125,
"rewards/brier_reward": 0.5684828639030457,
"rewards/format_reward": 0.97431640625,
"rewards/frontier_aurc_reward": -0.006926297210156918,
"rewards/frontier_coverage_1": 0.06462682336568833,
"rewards/frontier_coverage_10": 0.06462682336568833,
"rewards/frontier_coverage_15": 0.06462682336568833,
"rewards/frontier_coverage_20": 0.06462682336568833,
"rewards/frontier_coverage_25": 0.06462682336568833,
"rewards/frontier_coverage_5": 0.06462682336568833,
"rewards/frontier_ece_reward": -0.057490382343530655,
"rewards/frontier_entropy_batch_reward": -0.9089613318443298,
"signal/accuracy_reward/centered_abs_mean": 0.19891357421875,
"signal/accuracy_reward/group_bin_occupancy": 0.206640625,
"signal/accuracy_reward/group_std_mean": 0.24994130730628966,
"signal/accuracy_reward/group_zero_std_frac": 0.346875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.099456787109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.099456787109375,
"signal/advantage_abs_mean": 0.1587139695882797,
"signal/advantage_pre_scale_abs_mean": 0.1587139695882797,
"signal/advantage_pre_scale_std": 0.22378909289836885,
"signal/advantage_std": 0.22378909289836885,
"signal/brier_reward/centered_abs_mean": 0.24612878561019896,
"signal/brier_reward/group_bin_occupancy": 0.83125,
"signal/brier_reward/group_std_mean": 0.30340049862861634,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03076609820127487,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.03076609820127487,
"signal/format_reward/centered_abs_mean": 0.048358154296875,
"signal/format_reward/group_bin_occupancy": 0.19296875,
"signal/format_reward/group_std_mean": 0.11387113332748414,
"signal/format_reward/group_zero_std_frac": 0.45625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0241790771484375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0241790771484375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.005050728749483824,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.74140625,
"signal/frontier_aurc_reward/group_std_mean": 0.006901584379374981,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.040803997777403e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.040803997777403e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1083696573972702,
"signal/frontier_coverage_1/group_bin_occupancy": 0.669921875,
"signal/frontier_coverage_1/group_std_mean": 0.17034714818000793,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019398168195039033,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019398168195039033,
"signal/frontier_coverage_10/centered_abs_mean": 0.1083696573972702,
"signal/frontier_coverage_10/group_bin_occupancy": 0.669921875,
"signal/frontier_coverage_10/group_std_mean": 0.17034714818000793,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019398168195039033,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019398168195039033,
"signal/frontier_coverage_15/centered_abs_mean": 0.1083696573972702,
"signal/frontier_coverage_15/group_bin_occupancy": 0.669921875,
"signal/frontier_coverage_15/group_std_mean": 0.17034714818000793,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019398168195039033,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019398168195039033,
"signal/frontier_coverage_20/centered_abs_mean": 0.1083696573972702,
"signal/frontier_coverage_20/group_bin_occupancy": 0.669921875,
"signal/frontier_coverage_20/group_std_mean": 0.17034714818000793,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019398168195039033,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019398168195039033,
"signal/frontier_coverage_25/centered_abs_mean": 0.1083696573972702,
"signal/frontier_coverage_25/group_bin_occupancy": 0.669921875,
"signal/frontier_coverage_25/group_std_mean": 0.17034714818000793,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019398168195039033,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019398168195039033,
"signal/frontier_coverage_5/centered_abs_mean": 0.1083696573972702,
"signal/frontier_coverage_5/group_bin_occupancy": 0.669921875,
"signal/frontier_coverage_5/group_std_mean": 0.17034714818000793,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019398168195039033,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019398168195039033,
"signal/frontier_ece_reward/centered_abs_mean": 0.12835136353969573,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6828125,
"signal/frontier_ece_reward/group_std_mean": 0.1587873101234436,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.016043920442461966,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.016043920442461966,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1584494709968567,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.34140625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.27752745449543,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1125,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019806183874607086,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019806183874607086,
"step": 20
},
{
"calibration/aurc": 0.6123685825831842,
"calibration/batch_distribution_entropy": 0.7943765238910869,
"calibration/batch_entropy_100bins": 0.5589816454054507,
"calibration/batch_entropy_10bins": 0.7943765238910869,
"calibration/batch_entropy_50bins": 0.6484498837772973,
"calibration/batch_uniqueness": 0.8013406362144699,
"calibration/buffer_distribution_entropy": 0.6846900609897967,
"calibration/buffer_entropy_100bins": 0.5049285356703311,
"calibration/buffer_entropy_10bins": 0.6846900609897967,
"calibration/buffer_entropy_50bins": 0.5890832546755681,
"calibration/confidence_entropy": 0.4306898105190983,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.40301083672013965,
"calibration/mean_confidence": 0.7135340481523055,
"calibration/prompt_uniqueness": 0.7293804507629489,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00224609375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1015.4,
"completions/mean_length": 125.69091796875,
"completions/mean_terminated_length": 122.5162124633789,
"completions/min_length": 38.2,
"completions/min_terminated_length": 38.2,
"epoch": 0.08,
"grad_norm": 0.01374911330640316,
"learning_rate": 1e-06,
"loss": 0.0039,
"num_tokens": 85042031.0,
"reward": 0.6419196009635926,
"reward_std": 0.18055281639099122,
"rewards/accuracy_reward": 0.35390625,
"rewards/brier_reward": 0.6228940486907959,
"rewards/format_reward": 0.9931640625,
"rewards/frontier_aurc_reward": -0.005947516486048699,
"rewards/frontier_coverage_1": 0.07641823142766953,
"rewards/frontier_coverage_10": 0.07641823142766953,
"rewards/frontier_coverage_15": 0.07641823142766953,
"rewards/frontier_coverage_20": 0.07641823142766953,
"rewards/frontier_coverage_25": 0.07641823142766953,
"rewards/frontier_coverage_5": 0.07641823142766953,
"rewards/frontier_ece_reward": -0.0432446762919426,
"rewards/frontier_entropy_batch_reward": -0.8973807573318482,
"signal/accuracy_reward/centered_abs_mean": 0.18575439453125,
"signal/accuracy_reward/group_bin_occupancy": 0.205078125,
"signal/accuracy_reward/group_std_mean": 0.23675628304481505,
"signal/accuracy_reward/group_zero_std_frac": 0.359375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.092877197265625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.092877197265625,
"signal/advantage_abs_mean": 0.13976745307445526,
"signal/advantage_pre_scale_abs_mean": 0.13976745307445526,
"signal/advantage_pre_scale_std": 0.1967965304851532,
"signal/advantage_std": 0.1967965304851532,
"signal/brier_reward/centered_abs_mean": 0.22795205116271972,
"signal/brier_reward/group_bin_occupancy": 0.8578125,
"signal/brier_reward/group_std_mean": 0.28223063349723815,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028494006395339964,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.028494006395339964,
"signal/format_reward/centered_abs_mean": 0.01312255859375,
"signal/format_reward/group_bin_occupancy": 0.148828125,
"signal/format_reward/group_std_mean": 0.03558391332626343,
"signal/format_reward/group_zero_std_frac": 0.809375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006561279296875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006561279296875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0038825439289212225,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72265625,
"signal/frontier_aurc_reward/group_std_mean": 0.005549946706742049,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.949753442313522e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.949753442313522e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14139218479394913,
"signal/frontier_coverage_1/group_bin_occupancy": 0.740625,
"signal/frontier_coverage_1/group_std_mean": 0.2093652755022049,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025309198535978794,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025309198535978794,
"signal/frontier_coverage_10/centered_abs_mean": 0.14139218479394913,
"signal/frontier_coverage_10/group_bin_occupancy": 0.740625,
"signal/frontier_coverage_10/group_std_mean": 0.2093652755022049,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025309198535978794,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025309198535978794,
"signal/frontier_coverage_15/centered_abs_mean": 0.14139218479394913,
"signal/frontier_coverage_15/group_bin_occupancy": 0.740625,
"signal/frontier_coverage_15/group_std_mean": 0.2093652755022049,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025309198535978794,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025309198535978794,
"signal/frontier_coverage_20/centered_abs_mean": 0.14139218479394913,
"signal/frontier_coverage_20/group_bin_occupancy": 0.740625,
"signal/frontier_coverage_20/group_std_mean": 0.2093652755022049,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025309198535978794,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025309198535978794,
"signal/frontier_coverage_25/centered_abs_mean": 0.14139218479394913,
"signal/frontier_coverage_25/group_bin_occupancy": 0.740625,
"signal/frontier_coverage_25/group_std_mean": 0.2093652755022049,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025309198535978794,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025309198535978794,
"signal/frontier_coverage_5/centered_abs_mean": 0.14139218479394913,
"signal/frontier_coverage_5/group_bin_occupancy": 0.740625,
"signal/frontier_coverage_5/group_std_mean": 0.2093652755022049,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025309198535978794,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025309198535978794,
"signal/frontier_ece_reward/centered_abs_mean": 0.1143453910946846,
"signal/frontier_ece_reward/group_bin_occupancy": 0.75546875,
"signal/frontier_ece_reward/group_std_mean": 0.1409228652715683,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.014293173886835575,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.014293173886835575,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1770955890417099,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.37734375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3114177048206329,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.05,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02213694863021374,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02213694863021374,
"step": 25
},
{
"calibration/aurc": 0.6187899429468231,
"calibration/batch_distribution_entropy": 0.903264681119443,
"calibration/batch_entropy_100bins": 0.6551592063501281,
"calibration/batch_entropy_10bins": 0.903264681119443,
"calibration/batch_entropy_50bins": 0.7520692317813698,
"calibration/batch_uniqueness": 0.8607905341900063,
"calibration/buffer_distribution_entropy": 0.7327899214780806,
"calibration/buffer_entropy_100bins": 0.5348155537313921,
"calibration/buffer_entropy_10bins": 0.7327899214780806,
"calibration/buffer_entropy_50bins": 0.6214034080313888,
"calibration/confidence_entropy": 0.4955790028408361,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.03137254901960784,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.29642104394376234,
"calibration/mean_confidence": 0.585974183555353,
"calibration/prompt_uniqueness": 0.7915110704181842,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00087890625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 762.6,
"completions/mean_length": 124.0724609375,
"completions/mean_terminated_length": 122.8305679321289,
"completions/min_length": 37.2,
"completions/min_terminated_length": 37.2,
"epoch": 0.096,
"grad_norm": 0.004074463155120611,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 101357141.0,
"reward": 0.6705445170402526,
"reward_std": 0.16784824728965758,
"rewards/accuracy_reward": 0.36572265625,
"rewards/brier_reward": 0.6802791714668274,
"rewards/format_reward": 0.9966796875,
"rewards/frontier_aurc_reward": -0.005298045370727778,
"rewards/frontier_coverage_1": 0.10815906524658203,
"rewards/frontier_coverage_10": 0.10815906524658203,
"rewards/frontier_coverage_15": 0.10815906524658203,
"rewards/frontier_coverage_20": 0.10815906524658203,
"rewards/frontier_coverage_25": 0.10815906524658203,
"rewards/frontier_coverage_5": 0.10815906524658203,
"rewards/frontier_ece_reward": -0.02196125448681414,
"rewards/frontier_entropy_batch_reward": -0.8357429265975952,
"signal/accuracy_reward/centered_abs_mean": 0.182061767578125,
"signal/accuracy_reward/group_bin_occupancy": 0.204296875,
"signal/accuracy_reward/group_std_mean": 0.23303503692150115,
"signal/accuracy_reward/group_zero_std_frac": 0.365625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0910308837890625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0910308837890625,
"signal/advantage_abs_mean": 0.13158320784568786,
"signal/advantage_pre_scale_abs_mean": 0.13158320784568786,
"signal/advantage_pre_scale_std": 0.18012692034244537,
"signal/advantage_std": 0.18012692034244537,
"signal/brier_reward/centered_abs_mean": 0.22275688350200654,
"signal/brier_reward/group_bin_occupancy": 0.885546875,
"signal/brier_reward/group_std_mean": 0.27452688217163085,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027844610437750817,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.027844610437750817,
"signal/format_reward/centered_abs_mean": 0.00640869140625,
"signal/format_reward/group_bin_occupancy": 0.1375,
"signal/format_reward/group_std_mean": 0.018109906651079654,
"signal/format_reward/group_zero_std_frac": 0.9,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003204345703125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.003204345703125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029932684265077114,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.705078125,
"signal/frontier_aurc_reward/group_std_mean": 0.004513154737651348,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.357950285542756e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.357950285542756e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.207365944981575,
"signal/frontier_coverage_1/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_1/group_std_mean": 0.27984519600868224,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037118502892553805,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037118502892553805,
"signal/frontier_coverage_10/centered_abs_mean": 0.207365944981575,
"signal/frontier_coverage_10/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_10/group_std_mean": 0.27984519600868224,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037118502892553805,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037118502892553805,
"signal/frontier_coverage_15/centered_abs_mean": 0.207365944981575,
"signal/frontier_coverage_15/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_15/group_std_mean": 0.27984519600868224,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037118502892553805,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037118502892553805,
"signal/frontier_coverage_20/centered_abs_mean": 0.207365944981575,
"signal/frontier_coverage_20/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_20/group_std_mean": 0.27984519600868224,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037118502892553805,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037118502892553805,
"signal/frontier_coverage_25/centered_abs_mean": 0.207365944981575,
"signal/frontier_coverage_25/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_25/group_std_mean": 0.27984519600868224,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037118502892553805,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037118502892553805,
"signal/frontier_coverage_5/centered_abs_mean": 0.207365944981575,
"signal/frontier_coverage_5/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_5/group_std_mean": 0.27984519600868224,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037118502892553805,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037118502892553805,
"signal/frontier_ece_reward/centered_abs_mean": 0.10284390598535538,
"signal/frontier_ece_reward/group_bin_occupancy": 0.791796875,
"signal/frontier_ece_reward/group_std_mean": 0.12563495337963104,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.012855488248169422,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.012855488248169422,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2653192490339279,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.4625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40386409759521485,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.021875,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03316490612924099,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03316490612924099,
"step": 30
},
{
"calibration/aurc": 0.46203742605027587,
"calibration/batch_distribution_entropy": 0.9505007499638772,
"calibration/batch_entropy_100bins": 0.8425976231283391,
"calibration/batch_entropy_10bins": 0.9505007499638772,
"calibration/batch_entropy_50bins": 0.8942506391359825,
"calibration/batch_uniqueness": 0.9259175330557399,
"calibration/buffer_distribution_entropy": 0.8045500051104121,
"calibration/buffer_entropy_100bins": 0.5969997504126264,
"calibration/buffer_entropy_10bins": 0.8045500051104121,
"calibration/buffer_entropy_50bins": 0.6849823069476149,
"calibration/confidence_entropy": 0.49676340907411404,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.01875,
"calibration/coverage@20%": 0.026953125,
"calibration/coverage@25%": 0.057421875,
"calibration/coverage@30%": 0.059375,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.1438197834007442,
"calibration/mean_confidence": 0.4231594765776811,
"calibration/prompt_uniqueness": 0.8550597826970604,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00068359375,
"completions/max_length": 1096.2,
"completions/max_terminated_length": 672.8,
"completions/mean_length": 126.68193359375,
"completions/mean_terminated_length": 125.71805114746094,
"completions/min_length": 37.6,
"completions/min_terminated_length": 37.6,
"epoch": 0.112,
"grad_norm": 0.00284270360134542,
"learning_rate": 1e-06,
"loss": 0.0018,
"num_tokens": 117763836.0,
"reward": 0.7385032176971436,
"reward_std": 0.1466519594192505,
"rewards/accuracy_reward": 0.39775390625,
"rewards/brier_reward": 0.7357542634010314,
"rewards/format_reward": 0.99775390625,
"rewards/frontier_aurc_reward": -0.004414942674338817,
"rewards/frontier_coverage_1": 0.14376911520957947,
"rewards/frontier_coverage_10": 0.14376911520957947,
"rewards/frontier_coverage_15": 0.14376911520957947,
"rewards/frontier_coverage_20": 0.14376911520957947,
"rewards/frontier_coverage_25": 0.14376911520957947,
"rewards/frontier_coverage_5": 0.14376911520957947,
"rewards/frontier_ece_reward": 0.004067628551274538,
"rewards/frontier_entropy_batch_reward": -0.5367215931415558,
"signal/accuracy_reward/centered_abs_mean": 0.178826904296875,
"signal/accuracy_reward/group_bin_occupancy": 0.203125,
"signal/accuracy_reward/group_std_mean": 0.22884972691535949,
"signal/accuracy_reward/group_zero_std_frac": 0.375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0894134521484375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0894134521484375,
"signal/advantage_abs_mean": 0.11495690047740936,
"signal/advantage_pre_scale_abs_mean": 0.11495690047740936,
"signal/advantage_pre_scale_std": 0.15609990060329437,
"signal/advantage_std": 0.15609990060329437,
"signal/brier_reward/centered_abs_mean": 0.19914465844631196,
"signal/brier_reward/group_bin_occupancy": 0.871484375,
"signal/brier_reward/group_std_mean": 0.25038520991802216,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024893082305788995,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.024893082305788995,
"signal/format_reward/centered_abs_mean": 0.004351806640625,
"signal/format_reward/group_bin_occupancy": 0.133984375,
"signal/format_reward/group_std_mean": 0.012705824710428715,
"signal/format_reward/group_zero_std_frac": 0.928125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0021759033203125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0021759033203125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00161303433123976,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7171875,
"signal/frontier_aurc_reward/group_std_mean": 0.002583282254636288,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8873312840005382e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8873312840005382e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.28870871663093567,
"signal/frontier_coverage_1/group_bin_occupancy": 0.931640625,
"signal/frontier_coverage_1/group_std_mean": 0.36129642128944395,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00516788586974144,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00516788586974144,
"signal/frontier_coverage_10/centered_abs_mean": 0.28870871663093567,
"signal/frontier_coverage_10/group_bin_occupancy": 0.931640625,
"signal/frontier_coverage_10/group_std_mean": 0.36129642128944395,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00516788586974144,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00516788586974144,
"signal/frontier_coverage_15/centered_abs_mean": 0.28870871663093567,
"signal/frontier_coverage_15/group_bin_occupancy": 0.931640625,
"signal/frontier_coverage_15/group_std_mean": 0.36129642128944395,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00516788586974144,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00516788586974144,
"signal/frontier_coverage_20/centered_abs_mean": 0.28870871663093567,
"signal/frontier_coverage_20/group_bin_occupancy": 0.931640625,
"signal/frontier_coverage_20/group_std_mean": 0.36129642128944395,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00516788586974144,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00516788586974144,
"signal/frontier_coverage_25/centered_abs_mean": 0.28870871663093567,
"signal/frontier_coverage_25/group_bin_occupancy": 0.931640625,
"signal/frontier_coverage_25/group_std_mean": 0.36129642128944395,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00516788586974144,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00516788586974144,
"signal/frontier_coverage_5/centered_abs_mean": 0.28870871663093567,
"signal/frontier_coverage_5/group_bin_occupancy": 0.931640625,
"signal/frontier_coverage_5/group_std_mean": 0.36129642128944395,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00516788586974144,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00516788586974144,
"signal/frontier_ece_reward/centered_abs_mean": 0.06561752930283546,
"signal/frontier_ece_reward/group_bin_occupancy": 0.724609375,
"signal/frontier_ece_reward/group_std_mean": 0.08880508691072464,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008202191162854432,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008202191162854432,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.455366712808609,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.688671875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5278913855552674,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.05692083910107613,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.05692083910107613,
"step": 35
},
{
"calibration/aurc": 0.5524540681481819,
"calibration/batch_distribution_entropy": 0.8926553482625657,
"calibration/batch_entropy_100bins": 0.9168440833945379,
"calibration/batch_entropy_10bins": 0.8926553482625657,
"calibration/batch_entropy_50bins": 0.920951322294291,
"calibration/batch_uniqueness": 0.9371887328581348,
"calibration/buffer_distribution_entropy": 0.8752040807438071,
"calibration/buffer_entropy_100bins": 0.6906934782719981,
"calibration/buffer_entropy_10bins": 0.8752040807438071,
"calibration/buffer_entropy_50bins": 0.7684556478443987,
"calibration/confidence_entropy": 0.47675208932497054,
"calibration/coverage@0%": 0.003919266336671655,
"calibration/coverage@1%": 0.003919266336671655,
"calibration/coverage@10%": 0.006664364375887341,
"calibration/coverage@15%": 0.007840834964122636,
"calibration/coverage@20%": 0.008232991826867733,
"calibration/coverage@25%": 0.018820459690725606,
"calibration/coverage@30%": 0.027445608380338438,
"calibration/coverage@5%": 0.003919266336671655,
"calibration/ece": 0.1811672126323914,
"calibration/mean_confidence": 0.3110680805757154,
"calibration/prompt_uniqueness": 0.8780275469888137,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00107421875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 799.4,
"completions/mean_length": 121.43369140625,
"completions/mean_terminated_length": 119.91219024658203,
"completions/min_length": 40.6,
"completions/min_terminated_length": 40.6,
"epoch": 0.128,
"grad_norm": 0.0024108977522701025,
"learning_rate": 1e-06,
"loss": 0.0019,
"num_tokens": 133923989.0,
"reward": 0.7399052381515503,
"reward_std": 0.12542397379875184,
"rewards/accuracy_reward": 0.3775390625,
"rewards/brier_reward": 0.7327269196510315,
"rewards/format_reward": 0.996875,
"rewards/frontier_aurc_reward": -0.004320676997303962,
"rewards/frontier_coverage_1": 0.1661964625120163,
"rewards/frontier_coverage_10": 0.1661964625120163,
"rewards/frontier_coverage_15": 0.1661964625120163,
"rewards/frontier_coverage_20": 0.1661964625120163,
"rewards/frontier_coverage_25": 0.1661964625120163,
"rewards/frontier_coverage_5": 0.1661964625120163,
"rewards/frontier_ece_reward": 0.00360437398776412,
"rewards/frontier_entropy_batch_reward": -0.4569231688976288,
"signal/accuracy_reward/centered_abs_mean": 0.1685546875,
"signal/accuracy_reward/group_bin_occupancy": 0.1984375,
"signal/accuracy_reward/group_std_mean": 0.21551733016967772,
"signal/accuracy_reward/group_zero_std_frac": 0.4125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08427734375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08427734375,
"signal/advantage_abs_mean": 0.09791394621133805,
"signal/advantage_pre_scale_abs_mean": 0.09791394621133805,
"signal/advantage_pre_scale_std": 0.13817036151885986,
"signal/advantage_std": 0.13817036151885986,
"signal/brier_reward/centered_abs_mean": 0.1911756455898285,
"signal/brier_reward/group_bin_occupancy": 0.863671875,
"signal/brier_reward/group_std_mean": 0.24225885570049285,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023896955698728562,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.023896955698728562,
"signal/format_reward/centered_abs_mean": 0.0059814453125,
"signal/format_reward/group_bin_occupancy": 0.135546875,
"signal/format_reward/group_std_mean": 0.015936914831399918,
"signal/format_reward/group_zero_std_frac": 0.915625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00299072265625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00299072265625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012180484831333161,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71640625,
"signal/frontier_aurc_reward/group_std_mean": 0.001990600279532373,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.180306655645836e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.180306655645836e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.3146185576915741,
"signal/frontier_coverage_1/group_bin_occupancy": 0.925,
"signal/frontier_coverage_1/group_std_mean": 0.3889928042888641,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005631672125309706,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005631672125309706,
"signal/frontier_coverage_10/centered_abs_mean": 0.3146185576915741,
"signal/frontier_coverage_10/group_bin_occupancy": 0.925,
"signal/frontier_coverage_10/group_std_mean": 0.3889928042888641,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005631672125309706,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005631672125309706,
"signal/frontier_coverage_15/centered_abs_mean": 0.3146185576915741,
"signal/frontier_coverage_15/group_bin_occupancy": 0.925,
"signal/frontier_coverage_15/group_std_mean": 0.3889928042888641,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005631672125309706,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005631672125309706,
"signal/frontier_coverage_20/centered_abs_mean": 0.3146185576915741,
"signal/frontier_coverage_20/group_bin_occupancy": 0.925,
"signal/frontier_coverage_20/group_std_mean": 0.3889928042888641,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005631672125309706,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005631672125309706,
"signal/frontier_coverage_25/centered_abs_mean": 0.3146185576915741,
"signal/frontier_coverage_25/group_bin_occupancy": 0.925,
"signal/frontier_coverage_25/group_std_mean": 0.3889928042888641,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005631672125309706,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005631672125309706,
"signal/frontier_coverage_5/centered_abs_mean": 0.3146185576915741,
"signal/frontier_coverage_5/group_bin_occupancy": 0.925,
"signal/frontier_coverage_5/group_std_mean": 0.3889928042888641,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005631672125309706,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005631672125309706,
"signal/frontier_ece_reward/centered_abs_mean": 0.0448478564620018,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6765625,
"signal/frontier_ece_reward/group_std_mean": 0.06723327487707138,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005605982057750225,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005605982057750225,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.44531151056289675,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5023255228996277,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.055663938820362094,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.055663938820362094,
"step": 40
},
{
"calibration/aurc": 0.3908736488625225,
"calibration/batch_distribution_entropy": 0.952870268039504,
"calibration/batch_entropy_100bins": 0.9511110787504482,
"calibration/batch_entropy_10bins": 0.952870268039504,
"calibration/batch_entropy_50bins": 0.9596622843607211,
"calibration/batch_uniqueness": 0.9485456533751936,
"calibration/buffer_distribution_entropy": 0.9173490790989061,
"calibration/buffer_entropy_100bins": 0.7607477388436942,
"calibration/buffer_entropy_10bins": 0.9173490790989061,
"calibration/buffer_entropy_50bins": 0.8259944052519161,
"calibration/confidence_entropy": 0.5360820790331252,
"calibration/coverage@0%": 0.000390625,
"calibration/coverage@1%": 0.000390625,
"calibration/coverage@10%": 0.025145045432220035,
"calibration/coverage@15%": 0.07660240667976424,
"calibration/coverage@20%": 0.12021733791748526,
"calibration/coverage@25%": 0.19523871778573904,
"calibration/coverage@30%": 0.23445159313725492,
"calibration/coverage@5%": 0.000390625,
"calibration/ece": 0.21712174109019186,
"calibration/mean_confidence": 0.4122595098549266,
"calibration/prompt_uniqueness": 0.8860017234651405,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00107421875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 854.2,
"completions/mean_length": 137.2712890625,
"completions/mean_terminated_length": 135.76856079101563,
"completions/min_length": 45.2,
"completions/min_terminated_length": 45.2,
"epoch": 0.144,
"grad_norm": 0.0018301416421309114,
"learning_rate": 1e-06,
"loss": 0.0035,
"num_tokens": 150280079.0,
"reward": 0.8004841685295105,
"reward_std": 0.1398836553096771,
"rewards/accuracy_reward": 0.48779296875,
"rewards/brier_reward": 0.7173955202102661,
"rewards/format_reward": 0.99775390625,
"rewards/frontier_aurc_reward": -0.003834694530814886,
"rewards/frontier_coverage_1": 0.0504607018083334,
"rewards/frontier_coverage_10": 0.0504607018083334,
"rewards/frontier_coverage_15": 0.0504607018083334,
"rewards/frontier_coverage_20": 0.0504607018083334,
"rewards/frontier_coverage_25": 0.0504607018083334,
"rewards/frontier_coverage_5": 0.0504607018083334,
"rewards/frontier_ece_reward": 0.009193889400921761,
"rewards/frontier_entropy_batch_reward": -0.30771028995513916,
"signal/accuracy_reward/centered_abs_mean": 0.172930908203125,
"signal/accuracy_reward/group_bin_occupancy": 0.20703125,
"signal/accuracy_reward/group_std_mean": 0.22917729318141938,
"signal/accuracy_reward/group_zero_std_frac": 0.34375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0864654541015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0864654541015625,
"signal/advantage_abs_mean": 0.1103538304567337,
"signal/advantage_pre_scale_abs_mean": 0.1103538304567337,
"signal/advantage_pre_scale_std": 0.14888681769371032,
"signal/advantage_std": 0.14888681769371032,
"signal/brier_reward/centered_abs_mean": 0.19215757250785828,
"signal/brier_reward/group_bin_occupancy": 0.915234375,
"signal/brier_reward/group_std_mean": 0.24102371633052827,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024019696563482285,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.024019696563482285,
"signal/format_reward/centered_abs_mean": 0.004351806640625,
"signal/format_reward/group_bin_occupancy": 0.133984375,
"signal/format_reward/group_std_mean": 0.012705824431031942,
"signal/format_reward/group_zero_std_frac": 0.928125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0021759033203125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0021759033203125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016577748814597727,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.787109375,
"signal/frontier_aurc_reward/group_std_mean": 0.002490155445411801,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9674168763449417e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9674168763449417e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2725175261497498,
"signal/frontier_coverage_1/group_bin_occupancy": 0.9375,
"signal/frontier_coverage_1/group_std_mean": 0.3421742796897888,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004878063499927521,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004878063499927521,
"signal/frontier_coverage_10/centered_abs_mean": 0.2725175261497498,
"signal/frontier_coverage_10/group_bin_occupancy": 0.9375,
"signal/frontier_coverage_10/group_std_mean": 0.3421742796897888,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004878063499927521,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004878063499927521,
"signal/frontier_coverage_15/centered_abs_mean": 0.2725175261497498,
"signal/frontier_coverage_15/group_bin_occupancy": 0.9375,
"signal/frontier_coverage_15/group_std_mean": 0.3421742796897888,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004878063499927521,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004878063499927521,
"signal/frontier_coverage_20/centered_abs_mean": 0.2725175261497498,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9375,
"signal/frontier_coverage_20/group_std_mean": 0.3421742796897888,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004878063499927521,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004878063499927521,
"signal/frontier_coverage_25/centered_abs_mean": 0.2725175261497498,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9375,
"signal/frontier_coverage_25/group_std_mean": 0.3421742796897888,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004878063499927521,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004878063499927521,
"signal/frontier_coverage_5/centered_abs_mean": 0.2725175261497498,
"signal/frontier_coverage_5/group_bin_occupancy": 0.9375,
"signal/frontier_coverage_5/group_std_mean": 0.3421742796897888,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004878063499927521,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004878063499927521,
"signal/frontier_ece_reward/centered_abs_mean": 0.05783376470208168,
"signal/frontier_ece_reward/group_bin_occupancy": 0.755078125,
"signal/frontier_ece_reward/group_std_mean": 0.07939041703939438,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00722922058776021,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00722922058776021,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.37824747562408445,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.766015625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.44698449969291687,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.047280934453010556,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.047280934453010556,
"step": 45
},
{
"calibration/aurc": 0.44512130583689513,
"calibration/batch_distribution_entropy": 0.9777809946831368,
"calibration/batch_entropy_100bins": 0.9621848460602968,
"calibration/batch_entropy_10bins": 0.9777809946831368,
"calibration/batch_entropy_50bins": 0.9734038075969862,
"calibration/batch_uniqueness": 0.9527174185369095,
"calibration/buffer_distribution_entropy": 0.9389729288749795,
"calibration/buffer_entropy_100bins": 0.8098340055908284,
"calibration/buffer_entropy_10bins": 0.9389729288749795,
"calibration/buffer_entropy_50bins": 0.8648510922973776,
"calibration/confidence_entropy": 0.5341879274317181,
"calibration/coverage@0%": 0.0007827788649706457,
"calibration/coverage@1%": 0.0007827788649706457,
"calibration/coverage@10%": 0.0007827788649706457,
"calibration/coverage@15%": 0.0007827788649706457,
"calibration/coverage@20%": 0.0136986301369863,
"calibration/coverage@25%": 0.026996697651663404,
"calibration/coverage@30%": 0.06065236668297456,
"calibration/coverage@5%": 0.0007827788649706457,
"calibration/ece": 0.12611413936588028,
"calibration/mean_confidence": 0.5199908538561552,
"calibration/prompt_uniqueness": 0.8914967420818158,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 1256.2,
"completions/max_terminated_length": 726.6,
"completions/mean_length": 151.9931640625,
"completions/mean_terminated_length": 151.31725463867187,
"completions/min_length": 54.6,
"completions/min_terminated_length": 54.6,
"epoch": 0.16,
"grad_norm": 0.002915080636739731,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 166857417.0,
"reward": 0.7953056693077087,
"reward_std": 0.1445574551820755,
"rewards/accuracy_reward": 0.444921875,
"rewards/brier_reward": 0.7283108472824097,
"rewards/format_reward": 0.99892578125,
"rewards/frontier_aurc_reward": -0.004147487320005893,
"rewards/frontier_coverage_1": 0.08064137399196625,
"rewards/frontier_coverage_10": 0.08064137399196625,
"rewards/frontier_coverage_15": 0.08064137399196625,
"rewards/frontier_coverage_20": 0.08064137399196625,
"rewards/frontier_coverage_25": 0.08064137399196625,
"rewards/frontier_coverage_5": 0.08064137399196625,
"rewards/frontier_ece_reward": 0.009373257122933864,
"rewards/frontier_entropy_batch_reward": -0.2193224996328354,
"signal/accuracy_reward/centered_abs_mean": 0.1618408203125,
"signal/accuracy_reward/group_bin_occupancy": 0.1984375,
"signal/accuracy_reward/group_std_mean": 0.20846819281578063,
"signal/accuracy_reward/group_zero_std_frac": 0.4125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08092041015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08092041015625,
"signal/advantage_abs_mean": 0.1161547839641571,
"signal/advantage_pre_scale_abs_mean": 0.1161547839641571,
"signal/advantage_pre_scale_std": 0.15687225759029388,
"signal/advantage_std": 0.15687225759029388,
"signal/brier_reward/centered_abs_mean": 0.19412323236465454,
"signal/brier_reward/group_bin_occupancy": 0.912109375,
"signal/brier_reward/group_std_mean": 0.24180429577827453,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024265404045581817,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.024265404045581817,
"signal/format_reward/centered_abs_mean": 0.002081298828125,
"signal/format_reward/group_bin_occupancy": 0.129296875,
"signal/format_reward/group_std_mean": 0.006076698750257492,
"signal/format_reward/group_zero_std_frac": 0.965625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010406494140625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0010406494140625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002387148514389992,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.79453125,
"signal/frontier_aurc_reward/group_std_mean": 0.0034649877808988093,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2729955748654905e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2729955748654905e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.22005172967910766,
"signal/frontier_coverage_1/group_bin_occupancy": 0.925390625,
"signal/frontier_coverage_1/group_std_mean": 0.28260610103607176,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003938925778493285,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003938925778493285,
"signal/frontier_coverage_10/centered_abs_mean": 0.22005172967910766,
"signal/frontier_coverage_10/group_bin_occupancy": 0.925390625,
"signal/frontier_coverage_10/group_std_mean": 0.28260610103607176,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003938925778493285,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003938925778493285,
"signal/frontier_coverage_15/centered_abs_mean": 0.22005172967910766,
"signal/frontier_coverage_15/group_bin_occupancy": 0.925390625,
"signal/frontier_coverage_15/group_std_mean": 0.28260610103607176,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003938925778493285,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003938925778493285,
"signal/frontier_coverage_20/centered_abs_mean": 0.22005172967910766,
"signal/frontier_coverage_20/group_bin_occupancy": 0.925390625,
"signal/frontier_coverage_20/group_std_mean": 0.28260610103607176,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003938925778493285,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003938925778493285,
"signal/frontier_coverage_25/centered_abs_mean": 0.22005172967910766,
"signal/frontier_coverage_25/group_bin_occupancy": 0.925390625,
"signal/frontier_coverage_25/group_std_mean": 0.28260610103607176,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003938925778493285,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003938925778493285,
"signal/frontier_coverage_5/centered_abs_mean": 0.22005172967910766,
"signal/frontier_coverage_5/group_bin_occupancy": 0.925390625,
"signal/frontier_coverage_5/group_std_mean": 0.28260610103607176,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003938925778493285,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003938925778493285,
"signal/frontier_ece_reward/centered_abs_mean": 0.07324738055467606,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7953125,
"signal/frontier_ece_reward/group_std_mean": 0.09497761726379395,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009155922569334507,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009155922569334507,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.310670405626297,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.757421875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3890035688877106,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.038833800703287125,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038833800703287125,
"step": 50
},
{
"epoch": 0.16,
"eval_calibration/aurc": 0.64462876440125,
"eval_calibration/batch_distribution_entropy": 0.9172303138520053,
"eval_calibration/batch_entropy_100bins": 0.6999477876337099,
"eval_calibration/batch_entropy_10bins": 0.9172303138520053,
"eval_calibration/batch_entropy_50bins": 0.7876103883743395,
"eval_calibration/batch_uniqueness": 0.892578125,
"eval_calibration/buffer_distribution_entropy": 0.9464992351760915,
"eval_calibration/buffer_entropy_100bins": 0.8332314601674196,
"eval_calibration/buffer_entropy_10bins": 0.9464992351760915,
"eval_calibration/buffer_entropy_50bins": 0.882575922510453,
"eval_calibration/confidence_entropy": 0.5330085179766757,
"eval_calibration/coverage@0%": 0.0078125,
"eval_calibration/coverage@1%": 0.0078125,
"eval_calibration/coverage@10%": 0.0078125,
"eval_calibration/coverage@15%": 0.0078125,
"eval_calibration/coverage@20%": 0.0078125,
"eval_calibration/coverage@25%": 0.0078125,
"eval_calibration/coverage@30%": 0.0078125,
"eval_calibration/coverage@5%": 0.0078125,
"eval_calibration/ece": 0.3476830269533962,
"eval_calibration/mean_confidence": 0.5751843204816087,
"eval_calibration/prompt_uniqueness": 0.892578125,
"eval_completions/clipped_ratio": 0.002155172413793094,
"eval_completions/max_length": 638.5,
"eval_completions/max_terminated_length": 338.25,
"eval_completions/mean_length": 167.50525283813477,
"eval_completions/mean_terminated_length": 164.54486846923828,
"eval_completions/min_length": 69.5,
"eval_completions/min_terminated_length": 69.5,
"eval_loss": 0.0,
"eval_num_tokens": 166857417.0,
"eval_reward": 0.6432019472122192,
"eval_reward_std": 0.24113430455327034,
"eval_rewards/accuracy_reward": 0.345703125,
"eval_rewards/brier_reward": 0.6860938370227814,
"eval_rewards/format_reward": 0.998046875,
"eval_rewards/frontier_aurc_reward": -0.00537203811109066,
"eval_rewards/frontier_coverage_1": 0.11216729879379272,
"eval_rewards/frontier_coverage_10": 0.11216729879379272,
"eval_rewards/frontier_coverage_15": 0.11216729879379272,
"eval_rewards/frontier_coverage_20": 0.11216729879379272,
"eval_rewards/frontier_coverage_25": 0.11216729879379272,
"eval_rewards/frontier_coverage_5": 0.11216729879379272,
"eval_rewards/frontier_ece_reward": -0.01303649484179914,
"eval_rewards/frontier_entropy_batch_reward": -0.998046875,
"eval_runtime": 27.8074,
"eval_samples_per_second": 17.981,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4376220703125,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.4738186076283455,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21881103515625,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21881103515625,
"eval_signal/advantage_abs_mean": 0.21178173646330833,
"eval_signal/advantage_pre_scale_abs_mean": 0.21178173646330833,
"eval_signal/advantage_pre_scale_std": 0.23889374360442162,
"eval_signal/advantage_std": 0.23889374360442162,
"eval_signal/brier_reward/centered_abs_mean": 0.23696352913975716,
"eval_signal/brier_reward/group_bin_occupancy": 0.9375,
"eval_signal/brier_reward/group_std_mean": 0.28708796203136444,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029620441142469645,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.029620441142469645,
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
"eval_signal/format_reward/group_bin_occupancy": 0.1328125,
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003871684370096773,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.859375,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.005378421046771109,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.930314702913165e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.930314702913165e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.26474981755018234,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_1/group_std_mean": 0.3581102788448334,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004739021649584174,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004739021649584174,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.26474981755018234,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_10/group_std_mean": 0.3581102788448334,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004739021649584174,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004739021649584174,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.26474981755018234,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_15/group_std_mean": 0.3581102788448334,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004739021649584174,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004739021649584174,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.26474981755018234,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_20/group_std_mean": 0.3581102788448334,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004739021649584174,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004739021649584174,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.26474981755018234,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_25/group_std_mean": 0.3581102788448334,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004739021649584174,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004739021649584174,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.26474981755018234,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_5/group_std_mean": 0.3581102788448334,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004739021649584174,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004739021649584174,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.08822193928062916,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8515625,
"eval_signal/frontier_ece_reward/group_std_mean": 0.12208670750260353,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.011027742410078645,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.011027742410078645,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0037841796875,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.1328125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.011048543266952038,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9375,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"eval_signal/frontier_entropy_batch_reward/weight": 0.125,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0004730224609375,
"eval_steps_per_second": 0.144,
"step": 50
},
{
"calibration/aurc": 0.4433609500645401,
"calibration/batch_distribution_entropy": 0.9786514493906904,
"calibration/batch_entropy_100bins": 0.9673268063104924,
"calibration/batch_entropy_10bins": 0.9786514493906904,
"calibration/batch_entropy_50bins": 0.9765609238602323,
"calibration/batch_uniqueness": 0.9542412727001033,
"calibration/buffer_distribution_entropy": 0.9496313104637549,
"calibration/buffer_entropy_100bins": 0.8458478577532829,
"calibration/buffer_entropy_10bins": 0.9496313104637549,
"calibration/buffer_entropy_50bins": 0.8919244765075639,
"calibration/confidence_entropy": 0.5027312367502492,
"calibration/coverage@0%": 0.00078125,
"calibration/coverage@1%": 0.00078125,
"calibration/coverage@10%": 0.00078125,
"calibration/coverage@15%": 0.00078125,
"calibration/coverage@20%": 0.00078125,
"calibration/coverage@25%": 0.07697150735294118,
"calibration/coverage@30%": 0.10665900735294118,
"calibration/coverage@5%": 0.00078125,
"calibration/ece": 0.18526691978044169,
"calibration/mean_confidence": 0.5750017950436274,
"calibration/prompt_uniqueness": 0.889430057394641,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00087890625,
"completions/max_length": 1196.4,
"completions/max_terminated_length": 714.2,
"completions/mean_length": 172.89833984375,
"completions/mean_terminated_length": 171.70108032226562,
"completions/min_length": 69.0,
"completions/min_terminated_length": 69.0,
"epoch": 0.176,
"grad_norm": 0.0018691306468099356,
"learning_rate": 1e-06,
"loss": 0.002,
"num_tokens": 183865016.0,
"reward": 0.8038440227508545,
"reward_std": 0.14429736733436585,
"rewards/accuracy_reward": 0.4546875,
"rewards/brier_reward": 0.7278488039970398,
"rewards/format_reward": 0.99814453125,
"rewards/frontier_aurc_reward": -0.004136397829279303,
"rewards/frontier_coverage_1": 0.08787006139755249,
"rewards/frontier_coverage_10": 0.08787006139755249,
"rewards/frontier_coverage_15": 0.08787006139755249,
"rewards/frontier_coverage_20": 0.08787006139755249,
"rewards/frontier_coverage_25": 0.08787006139755249,
"rewards/frontier_coverage_5": 0.08787006139755249,
"rewards/frontier_ece_reward": 0.013153896108269692,
"rewards/frontier_entropy_batch_reward": -0.19648434817790986,
"signal/accuracy_reward/centered_abs_mean": 0.15616455078125,
"signal/accuracy_reward/group_bin_occupancy": 0.196875,
"signal/accuracy_reward/group_std_mean": 0.20412348508834838,
"signal/accuracy_reward/group_zero_std_frac": 0.425,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.078082275390625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.078082275390625,
"signal/advantage_abs_mean": 0.11150152832269669,
"signal/advantage_pre_scale_abs_mean": 0.11150152832269669,
"signal/advantage_pre_scale_std": 0.15599793791770936,
"signal/advantage_std": 0.15599793791770936,
"signal/brier_reward/centered_abs_mean": 0.1965700715780258,
"signal/brier_reward/group_bin_occupancy": 0.896875,
"signal/brier_reward/group_std_mean": 0.24470431506633758,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024571258947253226,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.024571258947253226,
"signal/format_reward/centered_abs_mean": 0.003594970703125,
"signal/format_reward/group_bin_occupancy": 0.132421875,
"signal/format_reward/group_std_mean": 0.010496115870773792,
"signal/format_reward/group_zero_std_frac": 0.940625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0017974853515625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0017974853515625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002715344587340951,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.78515625,
"signal/frontier_aurc_reward/group_std_mean": 0.003948622290045023,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8604665062157436e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8604665062157436e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2161078006029129,
"signal/frontier_coverage_1/group_bin_occupancy": 0.902734375,
"signal/frontier_coverage_1/group_std_mean": 0.2793154060840607,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038683294784277677,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038683294784277677,
"signal/frontier_coverage_10/centered_abs_mean": 0.2161078006029129,
"signal/frontier_coverage_10/group_bin_occupancy": 0.902734375,
"signal/frontier_coverage_10/group_std_mean": 0.2793154060840607,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038683294784277677,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038683294784277677,
"signal/frontier_coverage_15/centered_abs_mean": 0.2161078006029129,
"signal/frontier_coverage_15/group_bin_occupancy": 0.902734375,
"signal/frontier_coverage_15/group_std_mean": 0.2793154060840607,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038683294784277677,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038683294784277677,
"signal/frontier_coverage_20/centered_abs_mean": 0.2161078006029129,
"signal/frontier_coverage_20/group_bin_occupancy": 0.902734375,
"signal/frontier_coverage_20/group_std_mean": 0.2793154060840607,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038683294784277677,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038683294784277677,
"signal/frontier_coverage_25/centered_abs_mean": 0.2161078006029129,
"signal/frontier_coverage_25/group_bin_occupancy": 0.902734375,
"signal/frontier_coverage_25/group_std_mean": 0.2793154060840607,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038683294784277677,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038683294784277677,
"signal/frontier_coverage_5/centered_abs_mean": 0.2161078006029129,
"signal/frontier_coverage_5/group_bin_occupancy": 0.902734375,
"signal/frontier_coverage_5/group_std_mean": 0.2793154060840607,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038683294784277677,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038683294784277677,
"signal/frontier_ece_reward/centered_abs_mean": 0.07303946614265441,
"signal/frontier_ece_reward/group_bin_occupancy": 0.757421875,
"signal/frontier_ece_reward/group_std_mean": 0.0940830409526825,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009129933267831802,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009129933267831802,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2854499340057373,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7484375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3648853302001953,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035681241750717164,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035681241750717164,
"step": 55
},
{
"calibration/aurc": 0.3634893904079156,
"calibration/batch_distribution_entropy": 0.979375848885876,
"calibration/batch_entropy_100bins": 0.9688071121704119,
"calibration/batch_entropy_10bins": 0.979375848885876,
"calibration/batch_entropy_50bins": 0.9775960671362709,
"calibration/batch_uniqueness": 0.9546913035214375,
"calibration/buffer_distribution_entropy": 0.9549641064676517,
"calibration/buffer_entropy_100bins": 0.870291774493005,
"calibration/buffer_entropy_10bins": 0.9549641064676517,
"calibration/buffer_entropy_50bins": 0.9096286521366295,
"calibration/confidence_entropy": 0.47387102535633635,
"calibration/coverage@0%": 0.003125,
"calibration/coverage@1%": 0.003125,
"calibration/coverage@10%": 0.005078125,
"calibration/coverage@15%": 0.008203125,
"calibration/coverage@20%": 0.032421875,
"calibration/coverage@25%": 0.06640930772994129,
"calibration/coverage@30%": 0.33067056017612523,
"calibration/coverage@5%": 0.003125,
"calibration/ece": 0.12527432286914647,
"calibration/mean_confidence": 0.5426209280769687,
"calibration/prompt_uniqueness": 0.8840015994894641,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00078125,
"completions/max_length": 1356.0,
"completions/max_terminated_length": 532.8,
"completions/mean_length": 186.98125,
"completions/mean_terminated_length": 185.92561950683594,
"completions/min_length": 73.6,
"completions/min_terminated_length": 73.6,
"epoch": 0.192,
"grad_norm": 0.0012723851250484586,
"learning_rate": 1e-06,
"loss": 0.0012,
"num_tokens": 200594520.0,
"reward": 0.8214977860450745,
"reward_std": 0.13184674382209777,
"rewards/accuracy_reward": 0.47822265625,
"rewards/brier_reward": 0.7468044757843018,
"rewards/format_reward": 0.998828125,
"rewards/frontier_aurc_reward": -0.00359484669752419,
"rewards/frontier_coverage_1": 0.1034425899386406,
"rewards/frontier_coverage_10": 0.1034425899386406,
"rewards/frontier_coverage_15": 0.1034425899386406,
"rewards/frontier_coverage_20": 0.1034425899386406,
"rewards/frontier_coverage_25": 0.1034425899386406,
"rewards/frontier_coverage_5": 0.1034425899386406,
"rewards/frontier_ece_reward": 0.022514346055686474,
"rewards/frontier_entropy_batch_reward": -0.193902850151062,
"signal/accuracy_reward/centered_abs_mean": 0.144403076171875,
"signal/accuracy_reward/group_bin_occupancy": 0.19140625,
"signal/accuracy_reward/group_std_mean": 0.1877404749393463,
"signal/accuracy_reward/group_zero_std_frac": 0.46875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0722015380859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0722015380859375,
"signal/advantage_abs_mean": 0.10249822586774826,
"signal/advantage_pre_scale_abs_mean": 0.10249822586774826,
"signal/advantage_pre_scale_std": 0.14616102278232573,
"signal/advantage_std": 0.14616102278232573,
"signal/brier_reward/centered_abs_mean": 0.1951207399368286,
"signal/brier_reward/group_bin_occupancy": 0.887109375,
"signal/brier_reward/group_std_mean": 0.24402922093868257,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024390092492103575,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.024390092492103575,
"signal/format_reward/centered_abs_mean": 0.00225830078125,
"signal/format_reward/group_bin_occupancy": 0.129296875,
"signal/format_reward/group_std_mean": 0.0062928175088018175,
"signal/format_reward/group_zero_std_frac": 0.965625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001129150390625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001129150390625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002580239251255989,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.78125,
"signal/frontier_aurc_reward/group_std_mean": 0.003768660081550479,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.618627863237634e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.618627863237634e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.22825982868671418,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8921875,
"signal/frontier_coverage_1/group_std_mean": 0.2953463554382324,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004085850715637207,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004085850715637207,
"signal/frontier_coverage_10/centered_abs_mean": 0.22825982868671418,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8921875,
"signal/frontier_coverage_10/group_std_mean": 0.2953463554382324,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004085850715637207,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004085850715637207,
"signal/frontier_coverage_15/centered_abs_mean": 0.22825982868671418,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8921875,
"signal/frontier_coverage_15/group_std_mean": 0.2953463554382324,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004085850715637207,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004085850715637207,
"signal/frontier_coverage_20/centered_abs_mean": 0.22825982868671418,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8921875,
"signal/frontier_coverage_20/group_std_mean": 0.2953463554382324,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004085850715637207,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004085850715637207,
"signal/frontier_coverage_25/centered_abs_mean": 0.22825982868671418,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8921875,
"signal/frontier_coverage_25/group_std_mean": 0.2953463554382324,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004085850715637207,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004085850715637207,
"signal/frontier_coverage_5/centered_abs_mean": 0.22825982868671418,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8921875,
"signal/frontier_coverage_5/group_std_mean": 0.2953463554382324,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004085850715637207,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004085850715637207,
"signal/frontier_ece_reward/centered_abs_mean": 0.06816109567880631,
"signal/frontier_ece_reward/group_bin_occupancy": 0.735546875,
"signal/frontier_ece_reward/group_std_mean": 0.08766969740390777,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008520136959850788,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008520136959850788,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28305876851081846,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3631249308586121,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03538234606385231,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03538234606385231,
"step": 60
},
{
"calibration/aurc": 0.2962042663019113,
"calibration/batch_distribution_entropy": 0.9784319986138323,
"calibration/batch_entropy_100bins": 0.966199879916589,
"calibration/batch_entropy_10bins": 0.9784319986138323,
"calibration/batch_entropy_50bins": 0.9744431551149368,
"calibration/batch_uniqueness": 0.9526550054551084,
"calibration/buffer_distribution_entropy": 0.9601725002568273,
"calibration/buffer_entropy_100bins": 0.888484851018059,
"calibration/buffer_entropy_10bins": 0.9601725002568273,
"calibration/buffer_entropy_50bins": 0.9226145962963195,
"calibration/confidence_entropy": 0.47267274675605747,
"calibration/coverage@0%": 0.010947437622309198,
"calibration/coverage@1%": 0.010947437622309198,
"calibration/coverage@10%": 0.12052042563600782,
"calibration/coverage@15%": 0.2706022199119374,
"calibration/coverage@20%": 0.3628806873776908,
"calibration/coverage@25%": 0.4660821306262231,
"calibration/coverage@30%": 0.552443126223092,
"calibration/coverage@5%": 0.026603014921722113,
"calibration/ece": 0.17455930370135442,
"calibration/mean_confidence": 0.48808305500723426,
"calibration/prompt_uniqueness": 0.8768258975026015,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 1138.4,
"completions/max_terminated_length": 574.2,
"completions/mean_length": 199.851171875,
"completions/mean_terminated_length": 199.32947998046876,
"completions/min_length": 84.2,
"completions/min_terminated_length": 84.2,
"epoch": 0.208,
"grad_norm": 0.0013960555661469698,
"learning_rate": 1e-06,
"loss": 0.0012,
"num_tokens": 217673220.0,
"reward": 0.8450250387191772,
"reward_std": 0.12305467575788498,
"rewards/accuracy_reward": 0.524609375,
"rewards/brier_reward": 0.7498408198356629,
"rewards/format_reward": 0.99912109375,
"rewards/frontier_aurc_reward": -0.003026116266846657,
"rewards/frontier_coverage_1": 0.07876687720417977,
"rewards/frontier_coverage_10": 0.07876687720417977,
"rewards/frontier_coverage_15": 0.07876687720417977,
"rewards/frontier_coverage_20": 0.07876687720417977,
"rewards/frontier_coverage_25": 0.07876687720417977,
"rewards/frontier_coverage_5": 0.07876687720417977,
"rewards/frontier_ece_reward": 0.025368864834308624,
"rewards/frontier_entropy_batch_reward": -0.17717448472976685,
"signal/accuracy_reward/centered_abs_mean": 0.13885498046875,
"signal/accuracy_reward/group_bin_occupancy": 0.1890625,
"signal/accuracy_reward/group_std_mean": 0.18212647438049318,
"signal/accuracy_reward/group_zero_std_frac": 0.4875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.069427490234375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.069427490234375,
"signal/advantage_abs_mean": 0.09637551605701447,
"signal/advantage_pre_scale_abs_mean": 0.09637551605701447,
"signal/advantage_pre_scale_std": 0.1368851602077484,
"signal/advantage_std": 0.1368851602077484,
"signal/brier_reward/centered_abs_mean": 0.19150430560112,
"signal/brier_reward/group_bin_occupancy": 0.884765625,
"signal/brier_reward/group_std_mean": 0.23957839012145996,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02393803820014,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02393803820014,
"signal/format_reward/centered_abs_mean": 0.001702880859375,
"signal/format_reward/group_bin_occupancy": 0.128515625,
"signal/format_reward/group_std_mean": 0.004971844423562288,
"signal/format_reward/group_zero_std_frac": 0.971875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021365312393754722,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7796875,
"signal/frontier_aurc_reward/group_std_mean": 0.003165799472481012,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.824390878435224e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.824390878435224e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2470179468393326,
"signal/frontier_coverage_1/group_bin_occupancy": 0.89609375,
"signal/frontier_coverage_1/group_std_mean": 0.31373026967048645,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004421621095389128,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004421621095389128,
"signal/frontier_coverage_10/centered_abs_mean": 0.2470179468393326,
"signal/frontier_coverage_10/group_bin_occupancy": 0.89609375,
"signal/frontier_coverage_10/group_std_mean": 0.31373026967048645,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004421621095389128,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004421621095389128,
"signal/frontier_coverage_15/centered_abs_mean": 0.2470179468393326,
"signal/frontier_coverage_15/group_bin_occupancy": 0.89609375,
"signal/frontier_coverage_15/group_std_mean": 0.31373026967048645,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004421621095389128,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004421621095389128,
"signal/frontier_coverage_20/centered_abs_mean": 0.2470179468393326,
"signal/frontier_coverage_20/group_bin_occupancy": 0.89609375,
"signal/frontier_coverage_20/group_std_mean": 0.31373026967048645,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004421621095389128,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004421621095389128,
"signal/frontier_coverage_25/centered_abs_mean": 0.2470179468393326,
"signal/frontier_coverage_25/group_bin_occupancy": 0.89609375,
"signal/frontier_coverage_25/group_std_mean": 0.31373026967048645,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004421621095389128,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004421621095389128,
"signal/frontier_coverage_5/centered_abs_mean": 0.2470179468393326,
"signal/frontier_coverage_5/group_bin_occupancy": 0.89609375,
"signal/frontier_coverage_5/group_std_mean": 0.31373026967048645,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004421621095389128,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004421621095389128,
"signal/frontier_ece_reward/centered_abs_mean": 0.057305699586868285,
"signal/frontier_ece_reward/group_bin_occupancy": 0.706640625,
"signal/frontier_ece_reward/group_std_mean": 0.0751513734459877,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007163212448358536,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007163212448358536,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2632014513015747,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.744140625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3395733177661896,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032900181412696836,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032900181412696836,
"step": 65
},
{
"calibration/aurc": 0.34124345710586457,
"calibration/batch_distribution_entropy": 0.9802328569497316,
"calibration/batch_entropy_100bins": 0.9677623361060274,
"calibration/batch_entropy_10bins": 0.9802328569497316,
"calibration/batch_entropy_50bins": 0.9763784251673779,
"calibration/batch_uniqueness": 0.9531273263345412,
"calibration/buffer_distribution_entropy": 0.9665878977970547,
"calibration/buffer_entropy_100bins": 0.903459766372482,
"calibration/buffer_entropy_10bins": 0.9665878977970547,
"calibration/buffer_entropy_50bins": 0.9337556437523503,
"calibration/confidence_entropy": 0.46980353579934553,
"calibration/coverage@0%": 0.0015663251602010667,
"calibration/coverage@1%": 0.0015663251602010667,
"calibration/coverage@10%": 0.06016007516020107,
"calibration/coverage@15%": 0.13458331234891216,
"calibration/coverage@20%": 0.25736270193008715,
"calibration/coverage@25%": 0.37164490881969225,
"calibration/coverage@30%": 0.48739008959748287,
"calibration/coverage@5%": 0.0015663251602010667,
"calibration/ece": 0.17054890157844843,
"calibration/mean_confidence": 0.449686680982038,
"calibration/prompt_uniqueness": 0.8756768868691467,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0009765625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 729.8,
"completions/mean_length": 209.3384765625,
"completions/mean_terminated_length": 208.0431701660156,
"completions/min_length": 86.8,
"completions/min_terminated_length": 86.8,
"epoch": 0.224,
"grad_norm": 0.0011869962327182293,
"learning_rate": 1e-06,
"loss": 0.0021,
"num_tokens": 234970030.0,
"reward": 0.8263303160667419,
"reward_std": 0.11979700475931168,
"rewards/accuracy_reward": 0.4796875,
"rewards/brier_reward": 0.7604617238044739,
"rewards/format_reward": 0.99853515625,
"rewards/frontier_aurc_reward": -0.0031618389301002027,
"rewards/frontier_coverage_1": 0.1220865547657013,
"rewards/frontier_coverage_10": 0.1220865547657013,
"rewards/frontier_coverage_15": 0.1220865547657013,
"rewards/frontier_coverage_20": 0.1220865547657013,
"rewards/frontier_coverage_25": 0.1220865547657013,
"rewards/frontier_coverage_5": 0.1220865547657013,
"rewards/frontier_ece_reward": 0.022188258543610572,
"rewards/frontier_entropy_batch_reward": -0.1893421322107315,
"signal/accuracy_reward/centered_abs_mean": 0.1328857421875,
"signal/accuracy_reward/group_bin_occupancy": 0.187890625,
"signal/accuracy_reward/group_std_mean": 0.17681021988391876,
"signal/accuracy_reward/group_zero_std_frac": 0.496875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06644287109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06644287109375,
"signal/advantage_abs_mean": 0.09216942489147187,
"signal/advantage_pre_scale_abs_mean": 0.09216942489147187,
"signal/advantage_pre_scale_std": 0.13417203724384308,
"signal/advantage_std": 0.13417203724384308,
"signal/brier_reward/centered_abs_mean": 0.18361150324344636,
"signal/brier_reward/group_bin_occupancy": 0.86953125,
"signal/brier_reward/group_std_mean": 0.23139148950576782,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022951437905430794,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.022951437905430794,
"signal/format_reward/centered_abs_mean": 0.002838134765625,
"signal/format_reward/group_bin_occupancy": 0.130859375,
"signal/format_reward/group_std_mean": 0.008286407357081771,
"signal/format_reward/group_zero_std_frac": 0.953125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0014190673828125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0014190673828125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021024826914072038,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.768359375,
"signal/frontier_aurc_reward/group_std_mean": 0.003143219882622361,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.763444037758745e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.763444037758745e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2386508047580719,
"signal/frontier_coverage_1/group_bin_occupancy": 0.895703125,
"signal/frontier_coverage_1/group_std_mean": 0.3026322960853577,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00427184933796525,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00427184933796525,
"signal/frontier_coverage_10/centered_abs_mean": 0.2386508047580719,
"signal/frontier_coverage_10/group_bin_occupancy": 0.895703125,
"signal/frontier_coverage_10/group_std_mean": 0.3026322960853577,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00427184933796525,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00427184933796525,
"signal/frontier_coverage_15/centered_abs_mean": 0.2386508047580719,
"signal/frontier_coverage_15/group_bin_occupancy": 0.895703125,
"signal/frontier_coverage_15/group_std_mean": 0.3026322960853577,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00427184933796525,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00427184933796525,
"signal/frontier_coverage_20/centered_abs_mean": 0.2386508047580719,
"signal/frontier_coverage_20/group_bin_occupancy": 0.895703125,
"signal/frontier_coverage_20/group_std_mean": 0.3026322960853577,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00427184933796525,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00427184933796525,
"signal/frontier_coverage_25/centered_abs_mean": 0.2386508047580719,
"signal/frontier_coverage_25/group_bin_occupancy": 0.895703125,
"signal/frontier_coverage_25/group_std_mean": 0.3026322960853577,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00427184933796525,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00427184933796525,
"signal/frontier_coverage_5/centered_abs_mean": 0.2386508047580719,
"signal/frontier_coverage_5/group_bin_occupancy": 0.895703125,
"signal/frontier_coverage_5/group_std_mean": 0.3026322960853577,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00427184933796525,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00427184933796525,
"signal/frontier_ece_reward/centered_abs_mean": 0.05160396620631218,
"signal/frontier_ece_reward/group_bin_occupancy": 0.694140625,
"signal/frontier_ece_reward/group_std_mean": 0.0683397501707077,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006450495775789022,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006450495775789022,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2719322979450226,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.746484375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3513746976852417,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033991537243127826,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033991537243127826,
"step": 70
},
{
"calibration/aurc": 0.3860749528714358,
"calibration/batch_distribution_entropy": 0.9809446834041381,
"calibration/batch_entropy_100bins": 0.9698001713708365,
"calibration/batch_entropy_10bins": 0.9809446834041381,
"calibration/batch_entropy_50bins": 0.9782092600318476,
"calibration/batch_uniqueness": 0.9534891725132038,
"calibration/buffer_distribution_entropy": 0.9715373687156422,
"calibration/buffer_entropy_100bins": 0.9155967161839724,
"calibration/buffer_entropy_10bins": 0.9715373687156422,
"calibration/buffer_entropy_50bins": 0.942549278644391,
"calibration/confidence_entropy": 0.49962341310661385,
"calibration/coverage@0%": 0.0023452788649706456,
"calibration/coverage@1%": 0.0023452788649706456,
"calibration/coverage@10%": 0.08906402886497064,
"calibration/coverage@15%": 0.15312652886497063,
"calibration/coverage@20%": 0.20859527886497062,
"calibration/coverage@25%": 0.22343902886497063,
"calibration/coverage@30%": 0.32201412671232876,
"calibration/coverage@5%": 0.010157778864970646,
"calibration/ece": 0.17762837176592952,
"calibration/mean_confidence": 0.4960856168122013,
"calibration/prompt_uniqueness": 0.8812828738781218,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00078125,
"completions/max_length": 1358.0,
"completions/max_terminated_length": 703.6,
"completions/mean_length": 217.19853515625,
"completions/mean_terminated_length": 216.16774291992186,
"completions/min_length": 96.8,
"completions/min_terminated_length": 96.8,
"epoch": 0.24,
"grad_norm": 0.0012256160844117403,
"learning_rate": 1e-06,
"loss": 0.0022,
"num_tokens": 252445823.0,
"reward": 0.8490519523620605,
"reward_std": 0.1284138709306717,
"rewards/accuracy_reward": 0.5345703125,
"rewards/brier_reward": 0.7593809008598328,
"rewards/format_reward": 0.9990234375,
"rewards/frontier_aurc_reward": -0.0028817789163440464,
"rewards/frontier_coverage_1": 0.07307546683587134,
"rewards/frontier_coverage_10": 0.07307546683587134,
"rewards/frontier_coverage_15": 0.07307546683587134,
"rewards/frontier_coverage_20": 0.07307546683587134,
"rewards/frontier_coverage_25": 0.07307546683587134,
"rewards/frontier_coverage_5": 0.07307546683587134,
"rewards/frontier_ece_reward": 0.023964449018239974,
"rewards/frontier_entropy_batch_reward": -0.18767853379249572,
"signal/accuracy_reward/centered_abs_mean": 0.14945068359375,
"signal/accuracy_reward/group_bin_occupancy": 0.1953125,
"signal/accuracy_reward/group_std_mean": 0.19708451330661775,
"signal/accuracy_reward/group_zero_std_frac": 0.4375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.074725341796875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.074725341796875,
"signal/advantage_abs_mean": 0.10107299536466599,
"signal/advantage_pre_scale_abs_mean": 0.10107299536466599,
"signal/advantage_pre_scale_std": 0.14294905364513397,
"signal/advantage_std": 0.14294905364513397,
"signal/brier_reward/centered_abs_mean": 0.1782033383846283,
"signal/brier_reward/group_bin_occupancy": 0.876171875,
"signal/brier_reward/group_std_mean": 0.22389057874679566,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022275417298078536,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.022275417298078536,
"signal/format_reward/centered_abs_mean": 0.0018798828125,
"signal/format_reward/group_bin_occupancy": 0.128515625,
"signal/format_reward/group_std_mean": 0.0051879632286727425,
"signal/format_reward/group_zero_std_frac": 0.971875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00093994140625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00093994140625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002186433505266905,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.79296875,
"signal/frontier_aurc_reward/group_std_mean": 0.00316581423394382,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9137157000368464e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9137157000368464e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.22060461044311525,
"signal/frontier_coverage_1/group_bin_occupancy": 0.896484375,
"signal/frontier_coverage_1/group_std_mean": 0.2838852107524872,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003948822524398566,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003948822524398566,
"signal/frontier_coverage_10/centered_abs_mean": 0.22060461044311525,
"signal/frontier_coverage_10/group_bin_occupancy": 0.896484375,
"signal/frontier_coverage_10/group_std_mean": 0.2838852107524872,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003948822524398566,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003948822524398566,
"signal/frontier_coverage_15/centered_abs_mean": 0.22060461044311525,
"signal/frontier_coverage_15/group_bin_occupancy": 0.896484375,
"signal/frontier_coverage_15/group_std_mean": 0.2838852107524872,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003948822524398566,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003948822524398566,
"signal/frontier_coverage_20/centered_abs_mean": 0.22060461044311525,
"signal/frontier_coverage_20/group_bin_occupancy": 0.896484375,
"signal/frontier_coverage_20/group_std_mean": 0.2838852107524872,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003948822524398566,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003948822524398566,
"signal/frontier_coverage_25/centered_abs_mean": 0.22060461044311525,
"signal/frontier_coverage_25/group_bin_occupancy": 0.896484375,
"signal/frontier_coverage_25/group_std_mean": 0.2838852107524872,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003948822524398566,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003948822524398566,
"signal/frontier_coverage_5/centered_abs_mean": 0.22060461044311525,
"signal/frontier_coverage_5/group_bin_occupancy": 0.896484375,
"signal/frontier_coverage_5/group_std_mean": 0.2838852107524872,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003948822524398566,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003948822524398566,
"signal/frontier_ece_reward/centered_abs_mean": 0.050569846481084826,
"signal/frontier_ece_reward/group_bin_occupancy": 0.68359375,
"signal/frontier_ece_reward/group_std_mean": 0.06721205115318299,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006321230810135603,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006321230810135603,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2772687911987305,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.748046875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3573911190032959,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03465859889984131,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03465859889984131,
"step": 75
},
{
"calibration/aurc": 0.3027865730158209,
"calibration/batch_distribution_entropy": 0.9824120975656007,
"calibration/batch_entropy_100bins": 0.9675954439907024,
"calibration/batch_entropy_10bins": 0.9824120975656007,
"calibration/batch_entropy_50bins": 0.9768153214786937,
"calibration/batch_uniqueness": 0.9534858712115677,
"calibration/buffer_distribution_entropy": 0.9745029867503969,
"calibration/buffer_entropy_100bins": 0.9254029563560598,
"calibration/buffer_entropy_10bins": 0.9745029867503969,
"calibration/buffer_entropy_50bins": 0.9494087491638338,
"calibration/confidence_entropy": 0.47803596521726666,
"calibration/coverage@0%": 0.015629586594911937,
"calibration/coverage@1%": 0.015629586594911937,
"calibration/coverage@10%": 0.14150486179060665,
"calibration/coverage@15%": 0.28024859344422703,
"calibration/coverage@20%": 0.3670330846379647,
"calibration/coverage@25%": 0.4843245474559687,
"calibration/coverage@30%": 0.5863885151663405,
"calibration/coverage@5%": 0.03712695694716243,
"calibration/ece": 0.1304594987473791,
"calibration/mean_confidence": 0.5039093234803077,
"calibration/prompt_uniqueness": 0.8684847225383715,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00068359375,
"completions/max_length": 1332.2,
"completions/max_terminated_length": 770.2,
"completions/mean_length": 211.239453125,
"completions/mean_terminated_length": 210.33331604003905,
"completions/min_length": 97.4,
"completions/min_terminated_length": 97.4,
"epoch": 0.256,
"grad_norm": 0.001191093702800572,
"learning_rate": 1e-06,
"loss": 0.0018,
"num_tokens": 269663731.0,
"reward": 0.8402328372001648,
"reward_std": 0.11854993999004364,
"rewards/accuracy_reward": 0.50908203125,
"rewards/brier_reward": 0.7678213238716125,
"rewards/format_reward": 0.9990234375,
"rewards/frontier_aurc_reward": -0.002967528020963073,
"rewards/frontier_coverage_1": 0.10275040753185749,
"rewards/frontier_coverage_10": 0.10275040753185749,
"rewards/frontier_coverage_15": 0.10275040753185749,
"rewards/frontier_coverage_20": 0.10275040753185749,
"rewards/frontier_coverage_25": 0.10275040753185749,
"rewards/frontier_coverage_5": 0.10275040753185749,
"rewards/frontier_ece_reward": 0.024521516263484956,
"rewards/frontier_entropy_batch_reward": -0.19076020121574402,
"signal/accuracy_reward/centered_abs_mean": 0.136175537109375,
"signal/accuracy_reward/group_bin_occupancy": 0.18671875,
"signal/accuracy_reward/group_std_mean": 0.17651076018810272,
"signal/accuracy_reward/group_zero_std_frac": 0.50625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0680877685546875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0680877685546875,
"signal/advantage_abs_mean": 0.0925136923789978,
"signal/advantage_pre_scale_abs_mean": 0.0925136923789978,
"signal/advantage_pre_scale_std": 0.13497910499572754,
"signal/advantage_std": 0.13497910499572754,
"signal/brier_reward/centered_abs_mean": 0.16773454546928407,
"signal/brier_reward/group_bin_occupancy": 0.859375,
"signal/brier_reward/group_std_mean": 0.21297150552272798,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02096681818366051,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02096681818366051,
"signal/format_reward/centered_abs_mean": 0.00189208984375,
"signal/format_reward/group_bin_occupancy": 0.12890625,
"signal/format_reward/group_std_mean": 0.005524271540343762,
"signal/format_reward/group_zero_std_frac": 0.96875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002278644498437643,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.781640625,
"signal/frontier_aurc_reward/group_std_mean": 0.0033373693004250526,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.078773708897643e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.078773708897643e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2098309278488159,
"signal/frontier_coverage_1/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_1/group_std_mean": 0.26889588236808776,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037559733726084234,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037559733726084234,
"signal/frontier_coverage_10/centered_abs_mean": 0.2098309278488159,
"signal/frontier_coverage_10/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_10/group_std_mean": 0.26889588236808776,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037559733726084234,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037559733726084234,
"signal/frontier_coverage_15/centered_abs_mean": 0.2098309278488159,
"signal/frontier_coverage_15/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_15/group_std_mean": 0.26889588236808776,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037559733726084234,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037559733726084234,
"signal/frontier_coverage_20/centered_abs_mean": 0.2098309278488159,
"signal/frontier_coverage_20/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_20/group_std_mean": 0.26889588236808776,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037559733726084234,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037559733726084234,
"signal/frontier_coverage_25/centered_abs_mean": 0.2098309278488159,
"signal/frontier_coverage_25/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_25/group_std_mean": 0.26889588236808776,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037559733726084234,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037559733726084234,
"signal/frontier_coverage_5/centered_abs_mean": 0.2098309278488159,
"signal/frontier_coverage_5/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_5/group_std_mean": 0.26889588236808776,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037559733726084234,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037559733726084234,
"signal/frontier_ece_reward/centered_abs_mean": 0.04780852794647217,
"signal/frontier_ece_reward/group_bin_occupancy": 0.66171875,
"signal/frontier_ece_reward/group_std_mean": 0.06307629272341728,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005976065993309021,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005976065993309021,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.271970134973526,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.755859375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3501406848430634,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03399626687169075,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03399626687169075,
"step": 80
},
{
"calibration/aurc": 0.367258270235761,
"calibration/batch_distribution_entropy": 0.9895644463823899,
"calibration/batch_entropy_100bins": 0.971964415223642,
"calibration/batch_entropy_10bins": 0.9895644463823899,
"calibration/batch_entropy_50bins": 0.9832667329307185,
"calibration/batch_uniqueness": 0.9549774753110185,
"calibration/buffer_distribution_entropy": 0.9773499248067615,
"calibration/buffer_entropy_100bins": 0.9333172418219796,
"calibration/buffer_entropy_10bins": 0.9773499248067615,
"calibration/buffer_entropy_50bins": 0.9548425574225243,
"calibration/confidence_entropy": 0.49009995581254717,
"calibration/coverage@0%": 0.003126528864970646,
"calibration/coverage@1%": 0.003126528864970646,
"calibration/coverage@10%": 0.11253975048923678,
"calibration/coverage@15%": 0.144580938111546,
"calibration/coverage@20%": 0.20554748654598826,
"calibration/coverage@25%": 0.2856638331702544,
"calibration/coverage@30%": 0.3716640166340509,
"calibration/coverage@5%": 0.06562652886497064,
"calibration/ece": 0.14631884515051413,
"calibration/mean_confidence": 0.498405006869299,
"calibration/prompt_uniqueness": 0.8753438292631373,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 1144.6,
"completions/max_terminated_length": 571.2,
"completions/mean_length": 214.196875,
"completions/mean_terminated_length": 213.6815643310547,
"completions/min_length": 91.2,
"completions/min_terminated_length": 91.2,
"epoch": 0.272,
"grad_norm": 0.0008510042098350823,
"learning_rate": 1e-06,
"loss": 0.0014,
"num_tokens": 286822803.0,
"reward": 0.8385721206665039,
"reward_std": 0.11488556414842606,
"rewards/accuracy_reward": 0.49951171875,
"rewards/brier_reward": 0.7645434498786926,
"rewards/format_reward": 0.99921875,
"rewards/frontier_aurc_reward": -0.0029727344401180742,
"rewards/frontier_coverage_1": 0.10659078769385814,
"rewards/frontier_coverage_10": 0.10659078769385814,
"rewards/frontier_coverage_15": 0.10659078769385814,
"rewards/frontier_coverage_20": 0.10659078769385814,
"rewards/frontier_coverage_25": 0.10659078769385814,
"rewards/frontier_coverage_5": 0.10659078769385814,
"rewards/frontier_ece_reward": 0.021371402591466904,
"rewards/frontier_entropy_batch_reward": -0.16341689825057984,
"signal/accuracy_reward/centered_abs_mean": 0.124920654296875,
"signal/accuracy_reward/group_bin_occupancy": 0.184375,
"signal/accuracy_reward/group_std_mean": 0.16582859456539153,
"signal/accuracy_reward/group_zero_std_frac": 0.525,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0624603271484375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0624603271484375,
"signal/advantage_abs_mean": 0.08883939385414123,
"signal/advantage_pre_scale_abs_mean": 0.08883939385414123,
"signal/advantage_pre_scale_std": 0.13017976582050322,
"signal/advantage_std": 0.13017976582050322,
"signal/brier_reward/centered_abs_mean": 0.1681692123413086,
"signal/brier_reward/group_bin_occupancy": 0.858984375,
"signal/brier_reward/group_std_mean": 0.2134801924228668,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021021151542663576,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021021151542663576,
"signal/format_reward/centered_abs_mean": 0.001513671875,
"signal/format_reward/group_bin_occupancy": 0.128125,
"signal/format_reward/group_std_mean": 0.004419417306780815,
"signal/format_reward/group_zero_std_frac": 0.975,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002199468924663961,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.768359375,
"signal/frontier_aurc_reward/group_std_mean": 0.0032290446572005747,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9370492595480754e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9370492595480754e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.21055666208267212,
"signal/frontier_coverage_1/group_bin_occupancy": 0.88828125,
"signal/frontier_coverage_1/group_std_mean": 0.2709640562534332,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037689640186727045,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037689640186727045,
"signal/frontier_coverage_10/centered_abs_mean": 0.21055666208267212,
"signal/frontier_coverage_10/group_bin_occupancy": 0.88828125,
"signal/frontier_coverage_10/group_std_mean": 0.2709640562534332,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037689640186727045,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037689640186727045,
"signal/frontier_coverage_15/centered_abs_mean": 0.21055666208267212,
"signal/frontier_coverage_15/group_bin_occupancy": 0.88828125,
"signal/frontier_coverage_15/group_std_mean": 0.2709640562534332,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037689640186727045,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037689640186727045,
"signal/frontier_coverage_20/centered_abs_mean": 0.21055666208267212,
"signal/frontier_coverage_20/group_bin_occupancy": 0.88828125,
"signal/frontier_coverage_20/group_std_mean": 0.2709640562534332,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037689640186727045,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037689640186727045,
"signal/frontier_coverage_25/centered_abs_mean": 0.21055666208267212,
"signal/frontier_coverage_25/group_bin_occupancy": 0.88828125,
"signal/frontier_coverage_25/group_std_mean": 0.2709640562534332,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037689640186727045,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037689640186727045,
"signal/frontier_coverage_5/centered_abs_mean": 0.21055666208267212,
"signal/frontier_coverage_5/group_bin_occupancy": 0.88828125,
"signal/frontier_coverage_5/group_std_mean": 0.2709640562534332,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037689640186727045,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037689640186727045,
"signal/frontier_ece_reward/centered_abs_mean": 0.044014541804790495,
"signal/frontier_ece_reward/group_bin_occupancy": 0.654296875,
"signal/frontier_ece_reward/group_std_mean": 0.058340293169021604,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005501817725598812,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005501817725598812,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2418614625930786,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3228378236293793,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030232682824134827,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030232682824134827,
"step": 85
},
{
"calibration/aurc": 0.3467141361919698,
"calibration/batch_distribution_entropy": 0.9916170507697315,
"calibration/batch_entropy_100bins": 0.9717505719323067,
"calibration/batch_entropy_10bins": 0.9916170507697315,
"calibration/batch_entropy_50bins": 0.9835024232131098,
"calibration/batch_uniqueness": 0.9547727550018493,
"calibration/buffer_distribution_entropy": 0.9800138866676088,
"calibration/buffer_entropy_100bins": 0.9401036418916597,
"calibration/buffer_entropy_10bins": 0.9800138866676088,
"calibration/buffer_entropy_50bins": 0.959772586517515,
"calibration/confidence_entropy": 0.4993253403838624,
"calibration/coverage@0%": 0.011344178082191781,
"calibration/coverage@1%": 0.011344178082191781,
"calibration/coverage@10%": 0.055174443493150684,
"calibration/coverage@15%": 0.0911792135518591,
"calibration/coverage@20%": 0.14161111790606654,
"calibration/coverage@25%": 0.17874495474559687,
"calibration/coverage@30%": 0.3023085861056751,
"calibration/coverage@5%": 0.035218933463796474,
"calibration/ece": 0.11480474764719235,
"calibration/mean_confidence": 0.5161338636836956,
"calibration/prompt_uniqueness": 0.8751561382186525,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 714.0,
"completions/mean_length": 205.28056640625,
"completions/mean_terminated_length": 204.50087890625,
"completions/min_length": 92.0,
"completions/min_terminated_length": 92.0,
"epoch": 0.288,
"grad_norm": 0.0010703956941142678,
"learning_rate": 1e-06,
"loss": 0.0017,
"num_tokens": 303883052.0,
"reward": 0.8444351196289063,
"reward_std": 0.11515617072582245,
"rewards/accuracy_reward": 0.51435546875,
"rewards/brier_reward": 0.7651584386825562,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.002866340894252062,
"rewards/frontier_coverage_1": 0.09787596613168717,
"rewards/frontier_coverage_10": 0.09787596613168717,
"rewards/frontier_coverage_15": 0.09787596613168717,
"rewards/frontier_coverage_20": 0.09787596613168717,
"rewards/frontier_coverage_25": 0.09787596613168717,
"rewards/frontier_coverage_5": 0.09787596613168717,
"rewards/frontier_ece_reward": 0.020884520187973977,
"rewards/frontier_entropy_batch_reward": -0.1689342439174652,
"signal/accuracy_reward/centered_abs_mean": 0.134161376953125,
"signal/accuracy_reward/group_bin_occupancy": 0.186328125,
"signal/accuracy_reward/group_std_mean": 0.17561693191528321,
"signal/accuracy_reward/group_zero_std_frac": 0.509375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0670806884765625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0670806884765625,
"signal/advantage_abs_mean": 0.09000947773456573,
"signal/advantage_pre_scale_abs_mean": 0.09000947773456573,
"signal/advantage_pre_scale_std": 0.13045729398727418,
"signal/advantage_std": 0.13045729398727418,
"signal/brier_reward/centered_abs_mean": 0.16578397750854493,
"signal/brier_reward/group_bin_occupancy": 0.86015625,
"signal/brier_reward/group_std_mean": 0.20985957980155945,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020722997188568116,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020722997188568116,
"signal/format_reward/centered_abs_mean": 0.001324462890625,
"signal/format_reward/group_bin_occupancy": 0.127734375,
"signal/format_reward/group_std_mean": 0.003866990143433213,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021198820788413284,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.778125,
"signal/frontier_aurc_reward/group_std_mean": 0.003104550950229168,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.794588847085834e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.794588847085834e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2139565408229828,
"signal/frontier_coverage_1/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_1/group_std_mean": 0.2742366850376129,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038298218045383693,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038298218045383693,
"signal/frontier_coverage_10/centered_abs_mean": 0.2139565408229828,
"signal/frontier_coverage_10/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_10/group_std_mean": 0.2742366850376129,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038298218045383693,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038298218045383693,
"signal/frontier_coverage_15/centered_abs_mean": 0.2139565408229828,
"signal/frontier_coverage_15/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_15/group_std_mean": 0.2742366850376129,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038298218045383693,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038298218045383693,
"signal/frontier_coverage_20/centered_abs_mean": 0.2139565408229828,
"signal/frontier_coverage_20/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_20/group_std_mean": 0.2742366850376129,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038298218045383693,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038298218045383693,
"signal/frontier_coverage_25/centered_abs_mean": 0.2139565408229828,
"signal/frontier_coverage_25/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_25/group_std_mean": 0.2742366850376129,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038298218045383693,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038298218045383693,
"signal/frontier_coverage_5/centered_abs_mean": 0.2139565408229828,
"signal/frontier_coverage_5/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_5/group_std_mean": 0.2742366850376129,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038298218045383693,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038298218045383693,
"signal/frontier_ece_reward/centered_abs_mean": 0.04083863347768783,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6359375,
"signal/frontier_ece_reward/group_std_mean": 0.053824542462825774,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005104829184710979,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005104829184710979,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25178911685943606,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7578125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3293434023857117,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03147363960742951,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03147363960742951,
"step": 90
},
{
"calibration/aurc": 0.3012958766581849,
"calibration/batch_distribution_entropy": 0.9837421655310596,
"calibration/batch_entropy_100bins": 0.9707956283872885,
"calibration/batch_entropy_10bins": 0.9837421655310596,
"calibration/batch_entropy_50bins": 0.9794131696452247,
"calibration/batch_uniqueness": 0.9540138412528385,
"calibration/buffer_distribution_entropy": 0.9819037876792042,
"calibration/buffer_entropy_100bins": 0.9457814405734298,
"calibration/buffer_entropy_10bins": 0.9819037876792042,
"calibration/buffer_entropy_50bins": 0.9637111958737059,
"calibration/confidence_entropy": 0.4894476112277997,
"calibration/coverage@0%": 0.0054825367647058825,
"calibration/coverage@1%": 0.0054825367647058825,
"calibration/coverage@10%": 0.04701746323529411,
"calibration/coverage@15%": 0.1557919730392157,
"calibration/coverage@20%": 0.2575536151960784,
"calibration/coverage@25%": 0.39245251225490196,
"calibration/coverage@30%": 0.5761427696078432,
"calibration/coverage@5%": 0.010580575980392156,
"calibration/ece": 0.12106669425909085,
"calibration/mean_confidence": 0.5282264856879203,
"calibration/prompt_uniqueness": 0.8732020399305556,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1072.8,
"completions/max_terminated_length": 670.4,
"completions/mean_length": 207.81123046875,
"completions/mean_terminated_length": 207.42286987304686,
"completions/min_length": 88.6,
"completions/min_terminated_length": 88.6,
"epoch": 0.304,
"grad_norm": 0.000941499718464911,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 320940991.0,
"reward": 0.8401289582252502,
"reward_std": 0.10886461585760117,
"rewards/accuracy_reward": 0.51181640625,
"rewards/brier_reward": 0.7542804956436158,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0029725372325628994,
"rewards/frontier_coverage_1": 0.08972824737429619,
"rewards/frontier_coverage_10": 0.08972824737429619,
"rewards/frontier_coverage_15": 0.08972824737429619,
"rewards/frontier_coverage_20": 0.08972824737429619,
"rewards/frontier_coverage_25": 0.08972824737429619,
"rewards/frontier_coverage_5": 0.08972824737429619,
"rewards/frontier_ece_reward": 0.017498020455241202,
"rewards/frontier_entropy_batch_reward": -0.17311883568763733,
"signal/accuracy_reward/centered_abs_mean": 0.122198486328125,
"signal/accuracy_reward/group_bin_occupancy": 0.183203125,
"signal/accuracy_reward/group_std_mean": 0.16242018342018127,
"signal/accuracy_reward/group_zero_std_frac": 0.534375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0610992431640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0610992431640625,
"signal/advantage_abs_mean": 0.08474316000938416,
"signal/advantage_pre_scale_abs_mean": 0.08474316000938416,
"signal/advantage_pre_scale_std": 0.12225746810436249,
"signal/advantage_std": 0.12225746810436249,
"signal/brier_reward/centered_abs_mean": 0.1670261949300766,
"signal/brier_reward/group_bin_occupancy": 0.86796875,
"signal/brier_reward/group_std_mean": 0.21082913279533386,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020878274366259574,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020878274366259574,
"signal/format_reward/centered_abs_mean": 0.00074462890625,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0018734002020210027,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000372314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021015216829255224,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.775390625,
"signal/frontier_aurc_reward/group_std_mean": 0.003054375061765313,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.761723637580871e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.761723637580871e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.21436746120452882,
"signal/frontier_coverage_1/group_bin_occupancy": 0.88125,
"signal/frontier_coverage_1/group_std_mean": 0.2753202378749847,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038371775299310684,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038371775299310684,
"signal/frontier_coverage_10/centered_abs_mean": 0.21436746120452882,
"signal/frontier_coverage_10/group_bin_occupancy": 0.88125,
"signal/frontier_coverage_10/group_std_mean": 0.2753202378749847,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038371775299310684,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038371775299310684,
"signal/frontier_coverage_15/centered_abs_mean": 0.21436746120452882,
"signal/frontier_coverage_15/group_bin_occupancy": 0.88125,
"signal/frontier_coverage_15/group_std_mean": 0.2753202378749847,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038371775299310684,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038371775299310684,
"signal/frontier_coverage_20/centered_abs_mean": 0.21436746120452882,
"signal/frontier_coverage_20/group_bin_occupancy": 0.88125,
"signal/frontier_coverage_20/group_std_mean": 0.2753202378749847,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038371775299310684,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038371775299310684,
"signal/frontier_coverage_25/centered_abs_mean": 0.21436746120452882,
"signal/frontier_coverage_25/group_bin_occupancy": 0.88125,
"signal/frontier_coverage_25/group_std_mean": 0.2753202378749847,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038371775299310684,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038371775299310684,
"signal/frontier_coverage_5/centered_abs_mean": 0.21436746120452882,
"signal/frontier_coverage_5/group_bin_occupancy": 0.88125,
"signal/frontier_coverage_5/group_std_mean": 0.2753202378749847,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038371775299310684,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038371775299310684,
"signal/frontier_ece_reward/centered_abs_mean": 0.03961938172578812,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6265625,
"signal/frontier_ece_reward/group_std_mean": 0.051925134658813474,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004952422715723515,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004952422715723515,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25431135296821594,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74609375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3349409639835358,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03178891912102699,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03178891912102699,
"step": 95
},
{
"calibration/aurc": 0.2567548892732471,
"calibration/batch_distribution_entropy": 0.9855417888920972,
"calibration/batch_entropy_100bins": 0.9704664005737822,
"calibration/batch_entropy_10bins": 0.9855417888920972,
"calibration/batch_entropy_50bins": 0.9802541786549019,
"calibration/batch_uniqueness": 0.9540855714765243,
"calibration/buffer_distribution_entropy": 0.983232282815165,
"calibration/buffer_entropy_100bins": 0.9505687986075818,
"calibration/buffer_entropy_10bins": 0.983232282815165,
"calibration/buffer_entropy_50bins": 0.9668949759737334,
"calibration/confidence_entropy": 0.48253534328172953,
"calibration/coverage@0%": 0.013282778864970646,
"calibration/coverage@1%": 0.013282778864970646,
"calibration/coverage@10%": 0.21737555039138945,
"calibration/coverage@15%": 0.34399844055772993,
"calibration/coverage@20%": 0.44441964285714286,
"calibration/coverage@25%": 0.5225859222113503,
"calibration/coverage@30%": 0.6793251590019569,
"calibration/coverage@5%": 0.07500152886497065,
"calibration/ece": 0.1256929928477562,
"calibration/mean_confidence": 0.5410886092142749,
"calibration/prompt_uniqueness": 0.8685091111960197,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 1152.6,
"completions/max_terminated_length": 535.2,
"completions/mean_length": 207.54091796875,
"completions/mean_terminated_length": 206.8915283203125,
"completions/min_length": 94.8,
"completions/min_terminated_length": 94.8,
"epoch": 0.32,
"grad_norm": 0.0010245247976854444,
"learning_rate": 1e-06,
"loss": 0.0013,
"num_tokens": 338154914.0,
"reward": 0.8519093155860901,
"reward_std": 0.10203693956136703,
"rewards/accuracy_reward": 0.5287109375,
"rewards/brier_reward": 0.7753150343894959,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.0027055200189352036,
"rewards/frontier_coverage_1": 0.09903865978121758,
"rewards/frontier_coverage_10": 0.09903865978121758,
"rewards/frontier_coverage_15": 0.09903865978121758,
"rewards/frontier_coverage_20": 0.09903865978121758,
"rewards/frontier_coverage_25": 0.09903865978121758,
"rewards/frontier_coverage_5": 0.09903865978121758,
"rewards/frontier_ece_reward": 0.022906527668237687,
"rewards/frontier_entropy_batch_reward": -0.1797630488872528,
"signal/accuracy_reward/centered_abs_mean": 0.09708251953125,
"signal/accuracy_reward/group_bin_occupancy": 0.177734375,
"signal/accuracy_reward/group_std_mean": 0.13628645241260529,
"signal/accuracy_reward/group_zero_std_frac": 0.578125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.048541259765625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.048541259765625,
"signal/advantage_abs_mean": 0.07726810723543168,
"signal/advantage_pre_scale_abs_mean": 0.07726810723543168,
"signal/advantage_pre_scale_std": 0.11642331779003143,
"signal/advantage_std": 0.11642331779003143,
"signal/brier_reward/centered_abs_mean": 0.15678012669086455,
"signal/brier_reward/group_bin_occupancy": 0.85234375,
"signal/brier_reward/group_std_mean": 0.1998385190963745,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01959751583635807,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01959751583635807,
"signal/format_reward/centered_abs_mean": 0.001312255859375,
"signal/format_reward/group_bin_occupancy": 0.12734375,
"signal/format_reward/group_std_mean": 0.0035306816920638085,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002104542893357575,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.777734375,
"signal/frontier_aurc_reward/group_std_mean": 0.0030782954767346383,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.76713156583719e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.76713156583719e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.19006343185901642,
"signal/frontier_coverage_1/group_bin_occupancy": 0.874609375,
"signal/frontier_coverage_1/group_std_mean": 0.24371300339698793,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00340213542804122,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00340213542804122,
"signal/frontier_coverage_10/centered_abs_mean": 0.19006343185901642,
"signal/frontier_coverage_10/group_bin_occupancy": 0.874609375,
"signal/frontier_coverage_10/group_std_mean": 0.24371300339698793,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00340213542804122,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00340213542804122,
"signal/frontier_coverage_15/centered_abs_mean": 0.19006343185901642,
"signal/frontier_coverage_15/group_bin_occupancy": 0.874609375,
"signal/frontier_coverage_15/group_std_mean": 0.24371300339698793,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00340213542804122,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00340213542804122,
"signal/frontier_coverage_20/centered_abs_mean": 0.19006343185901642,
"signal/frontier_coverage_20/group_bin_occupancy": 0.874609375,
"signal/frontier_coverage_20/group_std_mean": 0.24371300339698793,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00340213542804122,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00340213542804122,
"signal/frontier_coverage_25/centered_abs_mean": 0.19006343185901642,
"signal/frontier_coverage_25/group_bin_occupancy": 0.874609375,
"signal/frontier_coverage_25/group_std_mean": 0.24371300339698793,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00340213542804122,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00340213542804122,
"signal/frontier_coverage_5/centered_abs_mean": 0.19006343185901642,
"signal/frontier_coverage_5/group_bin_occupancy": 0.874609375,
"signal/frontier_coverage_5/group_std_mean": 0.24371300339698793,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00340213542804122,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00340213542804122,
"signal/frontier_ece_reward/centered_abs_mean": 0.04008113071322441,
"signal/frontier_ece_reward/group_bin_occupancy": 0.603515625,
"signal/frontier_ece_reward/group_std_mean": 0.05185698121786118,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005010141339153051,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005010141339153051,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2621337234973907,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7578125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33962839245796206,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03276671543717384,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03276671543717384,
"step": 100
},
{
"epoch": 0.32,
"eval_calibration/aurc": 0.4795200196771078,
"eval_calibration/batch_distribution_entropy": 0.9021095312089411,
"eval_calibration/batch_entropy_100bins": 0.6837504483206038,
"eval_calibration/batch_entropy_10bins": 0.9021095312089411,
"eval_calibration/batch_entropy_50bins": 0.7600994124001819,
"eval_calibration/batch_uniqueness": 0.8837890625,
"eval_calibration/buffer_distribution_entropy": 0.9836716950770678,
"eval_calibration/buffer_entropy_100bins": 0.9532928632962285,
"eval_calibration/buffer_entropy_10bins": 0.9836716950770678,
"eval_calibration/buffer_entropy_50bins": 0.968683184468552,
"eval_calibration/confidence_entropy": 0.45694869462366755,
"eval_calibration/coverage@0%": 0.078125,
"eval_calibration/coverage@1%": 0.078125,
"eval_calibration/coverage@10%": 0.078125,
"eval_calibration/coverage@15%": 0.078125,
"eval_calibration/coverage@20%": 0.1015625,
"eval_calibration/coverage@25%": 0.1015625,
"eval_calibration/coverage@30%": 0.21875,
"eval_calibration/coverage@5%": 0.078125,
"eval_calibration/ece": 0.2528412410504649,
"eval_calibration/mean_confidence": 0.4613864587309076,
"eval_calibration/prompt_uniqueness": 0.8837890625,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 422.0,
"eval_completions/max_terminated_length": 422.0,
"eval_completions/mean_length": 207.25538635253906,
"eval_completions/mean_terminated_length": 207.25538635253906,
"eval_completions/min_length": 107.25,
"eval_completions/min_terminated_length": 107.25,
"eval_loss": 0.0,
"eval_num_tokens": 338154914.0,
"eval_reward": 0.7035073935985565,
"eval_reward_std": 0.2254948876798153,
"eval_rewards/accuracy_reward": 0.41796875,
"eval_rewards/brier_reward": 0.7835361808538437,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.003143564856145531,
"eval_rewards/frontier_coverage_1": 0.18091510236263275,
"eval_rewards/frontier_coverage_10": 0.18091510236263275,
"eval_rewards/frontier_coverage_15": 0.18091510236263275,
"eval_rewards/frontier_coverage_20": 0.18091510236263275,
"eval_rewards/frontier_coverage_25": 0.18091510236263275,
"eval_rewards/frontier_coverage_5": 0.18091510236263275,
"eval_rewards/frontier_ece_reward": 0.017655907664448023,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 21.7452,
"eval_samples_per_second": 22.994,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4716796875,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.49268144369125366,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23583984375,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23583984375,
"eval_signal/advantage_abs_mean": 0.2067052647471428,
"eval_signal/advantage_pre_scale_abs_mean": 0.2067052647471428,
"eval_signal/advantage_pre_scale_std": 0.22346897423267365,
"eval_signal/advantage_std": 0.22346897423267365,
"eval_signal/brier_reward/centered_abs_mean": 0.19929831847548485,
"eval_signal/brier_reward/group_bin_occupancy": 0.8984375,
"eval_signal/brier_reward/group_std_mean": 0.24902449920773506,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024912289809435606,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.024912289809435606,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0030411332263611257,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.7734375,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.004569516517221928,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.443628197099315e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.443628197099315e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.35146621614694595,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_1/group_std_mean": 0.43268968909978867,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006291245226748288,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006291245226748288,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.35146621614694595,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_10/group_std_mean": 0.43268968909978867,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006291245226748288,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006291245226748288,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.35146621614694595,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_15/group_std_mean": 0.43268968909978867,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006291245226748288,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006291245226748288,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.35146621614694595,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_20/group_std_mean": 0.43268968909978867,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006291245226748288,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006291245226748288,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.35146621614694595,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_25/group_std_mean": 0.43268968909978867,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006291245226748288,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006291245226748288,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.35146621614694595,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_5/group_std_mean": 0.43268968909978867,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006291245226748288,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006291245226748288,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.04135385248810053,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.7421875,
"eval_signal/frontier_ece_reward/group_std_mean": 0.062202571891248226,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005169231561012566,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005169231561012566,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.125,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.184,
"step": 100
},
{
"calibration/aurc": 0.30765736387397496,
"calibration/batch_distribution_entropy": 0.9848037323472848,
"calibration/batch_entropy_100bins": 0.9693385191204799,
"calibration/batch_entropy_10bins": 0.9848037323472848,
"calibration/batch_entropy_50bins": 0.9781402406586969,
"calibration/batch_uniqueness": 0.9536346435546875,
"calibration/buffer_distribution_entropy": 0.9854686951779537,
"calibration/buffer_entropy_100bins": 0.9569212673153616,
"calibration/buffer_entropy_10bins": 0.9854686951779537,
"calibration/buffer_entropy_50bins": 0.9713440007079225,
"calibration/confidence_entropy": 0.4995682667114387,
"calibration/coverage@0%": 0.019140625,
"calibration/coverage@1%": 0.019140625,
"calibration/coverage@10%": 0.040625,
"calibration/coverage@15%": 0.075,
"calibration/coverage@20%": 0.15234375,
"calibration/coverage@25%": 0.30859375,
"calibration/coverage@30%": 0.529296875,
"calibration/coverage@5%": 0.019921875,
"calibration/ece": 0.12052893017349497,
"calibration/mean_confidence": 0.48753457746559115,
"calibration/prompt_uniqueness": 0.867578125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 982.4,
"completions/max_terminated_length": 562.6,
"completions/mean_length": 209.47412109375,
"completions/mean_terminated_length": 209.2148651123047,
"completions/min_length": 91.6,
"completions/min_terminated_length": 91.6,
"epoch": 0.336,
"grad_norm": 0.0010945890098810196,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 355022361.0,
"reward": 0.8489757418632508,
"reward_std": 0.10516680479049682,
"rewards/accuracy_reward": 0.52822265625,
"rewards/brier_reward": 0.7679716944694519,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0026710231322795153,
"rewards/frontier_coverage_1": 0.08716111201792956,
"rewards/frontier_coverage_10": 0.08716111201792956,
"rewards/frontier_coverage_15": 0.08716111201792956,
"rewards/frontier_coverage_20": 0.08716111201792956,
"rewards/frontier_coverage_25": 0.08716111201792956,
"rewards/frontier_coverage_5": 0.08716111201792956,
"rewards/frontier_ece_reward": 0.01877461187541485,
"rewards/frontier_entropy_batch_reward": -0.18116532862186432,
"signal/accuracy_reward/centered_abs_mean": 0.112652587890625,
"signal/accuracy_reward/group_bin_occupancy": 0.18125,
"signal/accuracy_reward/group_std_mean": 0.15273889005184174,
"signal/accuracy_reward/group_zero_std_frac": 0.55,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0563262939453125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0563262939453125,
"signal/advantage_abs_mean": 0.08183809071779251,
"signal/advantage_pre_scale_abs_mean": 0.08183809071779251,
"signal/advantage_pre_scale_std": 0.12064377218484879,
"signal/advantage_std": 0.12064377218484879,
"signal/brier_reward/centered_abs_mean": 0.15700196623802185,
"signal/brier_reward/group_bin_occupancy": 0.870703125,
"signal/brier_reward/group_std_mean": 0.19847926795482634,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01962524577975273,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01962524577975273,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002023302251473069,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.76171875,
"signal/frontier_aurc_reward/group_std_mean": 0.0030010143760591745,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.621711148298346e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.621711148298346e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.19928583800792693,
"signal/frontier_coverage_1/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_1/group_std_mean": 0.25434514582157136,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035672161728143694,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035672161728143694,
"signal/frontier_coverage_10/centered_abs_mean": 0.19928583800792693,
"signal/frontier_coverage_10/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_10/group_std_mean": 0.25434514582157136,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035672161728143694,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035672161728143694,
"signal/frontier_coverage_15/centered_abs_mean": 0.19928583800792693,
"signal/frontier_coverage_15/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_15/group_std_mean": 0.25434514582157136,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035672161728143694,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035672161728143694,
"signal/frontier_coverage_20/centered_abs_mean": 0.19928583800792693,
"signal/frontier_coverage_20/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_20/group_std_mean": 0.25434514582157136,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035672161728143694,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035672161728143694,
"signal/frontier_coverage_25/centered_abs_mean": 0.19928583800792693,
"signal/frontier_coverage_25/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_25/group_std_mean": 0.25434514582157136,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035672161728143694,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035672161728143694,
"signal/frontier_coverage_5/centered_abs_mean": 0.19928583800792693,
"signal/frontier_coverage_5/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_5/group_std_mean": 0.25434514582157136,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035672161728143694,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035672161728143694,
"signal/frontier_ece_reward/centered_abs_mean": 0.03437858745455742,
"signal/frontier_ece_reward/group_bin_occupancy": 0.616015625,
"signal/frontier_ece_reward/group_std_mean": 0.04594796299934387,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004297323431819678,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004297323431819678,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26069518327713015,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.741015625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3357880413532257,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03258689790964127,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03258689790964127,
"step": 105
},
{
"calibration/aurc": 0.3248327389426379,
"calibration/batch_distribution_entropy": 0.9694191613572741,
"calibration/batch_entropy_100bins": 0.9653426900496431,
"calibration/batch_entropy_10bins": 0.9694191613572741,
"calibration/batch_entropy_50bins": 0.9723249920791073,
"calibration/batch_uniqueness": 0.9502899169921875,
"calibration/buffer_distribution_entropy": 0.9915301414778608,
"calibration/buffer_entropy_100bins": 0.9701647014936114,
"calibration/buffer_entropy_10bins": 0.9915301414778608,
"calibration/buffer_entropy_50bins": 0.9807902497768815,
"calibration/confidence_entropy": 0.46782397627976635,
"calibration/coverage@0%": 0.01640625,
"calibration/coverage@1%": 0.01640625,
"calibration/coverage@10%": 0.158984375,
"calibration/coverage@15%": 0.28203125,
"calibration/coverage@20%": 0.358984375,
"calibration/coverage@25%": 0.437109375,
"calibration/coverage@30%": 0.49609375,
"calibration/coverage@5%": 0.019140625,
"calibration/ece": 0.12978758350246267,
"calibration/mean_confidence": 0.43861533967578464,
"calibration/prompt_uniqueness": 0.862841796875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1178.4,
"completions/max_terminated_length": 645.2,
"completions/mean_length": 213.62646484375,
"completions/mean_terminated_length": 213.23961181640624,
"completions/min_length": 89.6,
"completions/min_terminated_length": 89.6,
"epoch": 0.352,
"grad_norm": 0.0010587095748633146,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 372470312.0,
"reward": 0.8264712929725647,
"reward_std": 0.10371551960706711,
"rewards/accuracy_reward": 0.47275390625,
"rewards/brier_reward": 0.7754327297210694,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.002917947107926011,
"rewards/frontier_coverage_1": 0.13510125279426574,
"rewards/frontier_coverage_10": 0.13510125279426574,
"rewards/frontier_coverage_15": 0.13510125279426574,
"rewards/frontier_coverage_20": 0.13510125279426574,
"rewards/frontier_coverage_25": 0.13510125279426574,
"rewards/frontier_coverage_5": 0.13510125279426574,
"rewards/frontier_ece_reward": 0.015004617348313331,
"rewards/frontier_entropy_batch_reward": -0.18378140330314635,
"signal/accuracy_reward/centered_abs_mean": 0.112518310546875,
"signal/accuracy_reward/group_bin_occupancy": 0.178515625,
"signal/accuracy_reward/group_std_mean": 0.14888902008533478,
"signal/accuracy_reward/group_zero_std_frac": 0.571875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0562591552734375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0562591552734375,
"signal/advantage_abs_mean": 0.08059937804937363,
"signal/advantage_pre_scale_abs_mean": 0.08059937804937363,
"signal/advantage_pre_scale_std": 0.11854567229747773,
"signal/advantage_std": 0.11854567229747773,
"signal/brier_reward/centered_abs_mean": 0.15186150074005128,
"signal/brier_reward/group_bin_occupancy": 0.857421875,
"signal/brier_reward/group_std_mean": 0.19348691403865814,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01898268759250641,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01898268759250641,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_bin_occupancy": 0.1265625,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002148870355449617,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.76796875,
"signal/frontier_aurc_reward/group_std_mean": 0.0032697335351258515,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.846477629849687e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.846477629849687e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.20343527495861052,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_1/group_std_mean": 0.25838565826416016,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036414912901818752,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036414912901818752,
"signal/frontier_coverage_10/centered_abs_mean": 0.20343527495861052,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_10/group_std_mean": 0.25838565826416016,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036414912901818752,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036414912901818752,
"signal/frontier_coverage_15/centered_abs_mean": 0.20343527495861052,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_15/group_std_mean": 0.25838565826416016,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036414912901818752,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036414912901818752,
"signal/frontier_coverage_20/centered_abs_mean": 0.20343527495861052,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_20/group_std_mean": 0.25838565826416016,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036414912901818752,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036414912901818752,
"signal/frontier_coverage_25/centered_abs_mean": 0.20343527495861052,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_25/group_std_mean": 0.25838565826416016,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036414912901818752,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036414912901818752,
"signal/frontier_coverage_5/centered_abs_mean": 0.20343527495861052,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_5/group_std_mean": 0.25838565826416016,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036414912901818752,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036414912901818752,
"signal/frontier_ece_reward/centered_abs_mean": 0.028419509530067444,
"signal/frontier_ece_reward/group_bin_occupancy": 0.628515625,
"signal/frontier_ece_reward/group_std_mean": 0.03753211200237274,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035524386912584305,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035524386912584305,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.256140798330307,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75078125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3320682287216187,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032017599791288376,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032017599791288376,
"step": 110
},
{
"calibration/aurc": 0.3685978316456159,
"calibration/batch_distribution_entropy": 0.9841220587243489,
"calibration/batch_entropy_100bins": 0.9711259650561594,
"calibration/batch_entropy_10bins": 0.9841220587243489,
"calibration/batch_entropy_50bins": 0.9806752929552862,
"calibration/batch_uniqueness": 0.9539794921875,
"calibration/buffer_distribution_entropy": 0.9960558216143583,
"calibration/buffer_entropy_100bins": 0.982010151103973,
"calibration/buffer_entropy_10bins": 0.9960558216143583,
"calibration/buffer_entropy_50bins": 0.9888810910867181,
"calibration/confidence_entropy": 0.47978596346982316,
"calibration/coverage@0%": 0.01015625,
"calibration/coverage@1%": 0.01015625,
"calibration/coverage@10%": 0.01875,
"calibration/coverage@15%": 0.069921875,
"calibration/coverage@20%": 0.158203125,
"calibration/coverage@25%": 0.3125,
"calibration/coverage@30%": 0.39921875,
"calibration/coverage@5%": 0.01015625,
"calibration/ece": 0.11309242941504012,
"calibration/mean_confidence": 0.5008828550670226,
"calibration/prompt_uniqueness": 0.862109375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 633.2,
"completions/mean_length": 213.10068359375,
"completions/mean_terminated_length": 212.4544189453125,
"completions/min_length": 91.0,
"completions/min_terminated_length": 91.0,
"epoch": 0.368,
"grad_norm": 0.0011026038555428386,
"learning_rate": 1e-06,
"loss": 0.0011,
"num_tokens": 389717935.0,
"reward": 0.8334917783737182,
"reward_std": 0.10164395570755005,
"rewards/accuracy_reward": 0.4896484375,
"rewards/brier_reward": 0.7806977868080139,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0031225197948515416,
"rewards/frontier_coverage_1": 0.1296861067414284,
"rewards/frontier_coverage_10": 0.1296861067414284,
"rewards/frontier_coverage_15": 0.1296861067414284,
"rewards/frontier_coverage_20": 0.1296861067414284,
"rewards/frontier_coverage_25": 0.1296861067414284,
"rewards/frontier_coverage_5": 0.1296861067414284,
"rewards/frontier_ece_reward": 0.01422354057431221,
"rewards/frontier_entropy_batch_reward": -0.19460689425468444,
"signal/accuracy_reward/centered_abs_mean": 0.1107421875,
"signal/accuracy_reward/group_bin_occupancy": 0.175390625,
"signal/accuracy_reward/group_std_mean": 0.14366783648729325,
"signal/accuracy_reward/group_zero_std_frac": 0.596875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05537109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05537109375,
"signal/advantage_abs_mean": 0.07986960113048554,
"signal/advantage_pre_scale_abs_mean": 0.07986960113048554,
"signal/advantage_pre_scale_std": 0.118281988799572,
"signal/advantage_std": 0.118281988799572,
"signal/brier_reward/centered_abs_mean": 0.14576351642608643,
"signal/brier_reward/group_bin_occupancy": 0.859375,
"signal/brier_reward/group_std_mean": 0.18720718026161193,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018220439553260803,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.018220439553260803,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_bin_occupancy": 0.126953125,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002540640765801072,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.760546875,
"signal/frontier_aurc_reward/group_std_mean": 0.0037694845348596575,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.547746866592206e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.547746866592206e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1892180174589157,
"signal/frontier_coverage_1/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_1/group_std_mean": 0.2430718183517456,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003387002367526293,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003387002367526293,
"signal/frontier_coverage_10/centered_abs_mean": 0.1892180174589157,
"signal/frontier_coverage_10/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_10/group_std_mean": 0.2430718183517456,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003387002367526293,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003387002367526293,
"signal/frontier_coverage_15/centered_abs_mean": 0.1892180174589157,
"signal/frontier_coverage_15/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_15/group_std_mean": 0.2430718183517456,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003387002367526293,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003387002367526293,
"signal/frontier_coverage_20/centered_abs_mean": 0.1892180174589157,
"signal/frontier_coverage_20/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_20/group_std_mean": 0.2430718183517456,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003387002367526293,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003387002367526293,
"signal/frontier_coverage_25/centered_abs_mean": 0.1892180174589157,
"signal/frontier_coverage_25/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_25/group_std_mean": 0.2430718183517456,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003387002367526293,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003387002367526293,
"signal/frontier_coverage_5/centered_abs_mean": 0.1892180174589157,
"signal/frontier_coverage_5/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_5/group_std_mean": 0.2430718183517456,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003387002367526293,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003387002367526293,
"signal/frontier_ece_reward/centered_abs_mean": 0.0239554800093174,
"signal/frontier_ece_reward/group_bin_occupancy": 0.63671875,
"signal/frontier_ece_reward/group_std_mean": 0.031465400382876395,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002994435001164675,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002994435001164675,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26627758145332336,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33881971836090086,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03328469768166542,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03328469768166542,
"step": 115
},
{
"calibration/aurc": 0.3288035707028756,
"calibration/batch_distribution_entropy": 0.972391171131245,
"calibration/batch_entropy_100bins": 0.9643182066779674,
"calibration/batch_entropy_10bins": 0.972391171131245,
"calibration/batch_entropy_50bins": 0.9728774568300317,
"calibration/batch_uniqueness": 0.9514887123619662,
"calibration/buffer_distribution_entropy": 0.998363598881251,
"calibration/buffer_entropy_100bins": 0.9907127922569062,
"calibration/buffer_entropy_10bins": 0.998363598881251,
"calibration/buffer_entropy_50bins": 0.9943556436848473,
"calibration/confidence_entropy": 0.467932900808412,
"calibration/coverage@0%": 0.026954653864970646,
"calibration/coverage@1%": 0.026954653864970646,
"calibration/coverage@10%": 0.17734527886497065,
"calibration/coverage@15%": 0.24492340386497063,
"calibration/coverage@20%": 0.29843902886497065,
"calibration/coverage@25%": 0.3417984038649706,
"calibration/coverage@30%": 0.3917984038649706,
"calibration/coverage@5%": 0.12304840386497065,
"calibration/ece": 0.13064203539088265,
"calibration/mean_confidence": 0.45519123283717455,
"calibration/prompt_uniqueness": 0.8569748512291884,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 1157.2,
"completions/max_terminated_length": 602.4,
"completions/mean_length": 209.76728515625,
"completions/mean_terminated_length": 208.9923522949219,
"completions/min_length": 95.2,
"completions/min_terminated_length": 95.2,
"epoch": 0.384,
"grad_norm": 0.0010048962431028485,
"learning_rate": 1e-06,
"loss": 0.0012,
"num_tokens": 406722464.0,
"reward": 0.8486302971839905,
"reward_std": 0.10181694477796555,
"rewards/accuracy_reward": 0.52177734375,
"rewards/brier_reward": 0.7872754693031311,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.0028854547068476676,
"rewards/frontier_coverage_1": 0.11500916741788388,
"rewards/frontier_coverage_10": 0.11500916741788388,
"rewards/frontier_coverage_15": 0.11500916741788388,
"rewards/frontier_coverage_20": 0.11500916741788388,
"rewards/frontier_coverage_25": 0.11500916741788388,
"rewards/frontier_coverage_5": 0.11500916741788388,
"rewards/frontier_ece_reward": 0.013547521643340587,
"rewards/frontier_entropy_batch_reward": -0.19455830454826356,
"signal/accuracy_reward/centered_abs_mean": 0.109857177734375,
"signal/accuracy_reward/group_bin_occupancy": 0.18359375,
"signal/accuracy_reward/group_std_mean": 0.15272603631019593,
"signal/accuracy_reward/group_zero_std_frac": 0.53125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0549285888671875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0549285888671875,
"signal/advantage_abs_mean": 0.07775198072195053,
"signal/advantage_pre_scale_abs_mean": 0.07775198072195053,
"signal/advantage_pre_scale_std": 0.11655086725950241,
"signal/advantage_std": 0.11655086725950241,
"signal/brier_reward/centered_abs_mean": 0.14183862507343292,
"signal/brier_reward/group_bin_occupancy": 0.846875,
"signal/brier_reward/group_std_mean": 0.1818255215883255,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017729828134179115,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017729828134179115,
"signal/format_reward/centered_abs_mean": 0.001312255859375,
"signal/format_reward/group_bin_occupancy": 0.12734375,
"signal/format_reward/group_std_mean": 0.0035306816920638085,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002797884680330753,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.762109375,
"signal/frontier_aurc_reward/group_std_mean": 0.00430455575697124,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.008213483961299e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.008213483961299e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18613593876361847,
"signal/frontier_coverage_1/group_bin_occupancy": 0.870703125,
"signal/frontier_coverage_1/group_std_mean": 0.23912187218666076,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003331833053380251,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003331833053380251,
"signal/frontier_coverage_10/centered_abs_mean": 0.18613593876361847,
"signal/frontier_coverage_10/group_bin_occupancy": 0.870703125,
"signal/frontier_coverage_10/group_std_mean": 0.23912187218666076,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003331833053380251,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003331833053380251,
"signal/frontier_coverage_15/centered_abs_mean": 0.18613593876361847,
"signal/frontier_coverage_15/group_bin_occupancy": 0.870703125,
"signal/frontier_coverage_15/group_std_mean": 0.23912187218666076,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003331833053380251,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003331833053380251,
"signal/frontier_coverage_20/centered_abs_mean": 0.18613593876361847,
"signal/frontier_coverage_20/group_bin_occupancy": 0.870703125,
"signal/frontier_coverage_20/group_std_mean": 0.23912187218666076,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003331833053380251,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003331833053380251,
"signal/frontier_coverage_25/centered_abs_mean": 0.18613593876361847,
"signal/frontier_coverage_25/group_bin_occupancy": 0.870703125,
"signal/frontier_coverage_25/group_std_mean": 0.23912187218666076,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003331833053380251,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003331833053380251,
"signal/frontier_coverage_5/centered_abs_mean": 0.18613593876361847,
"signal/frontier_coverage_5/group_bin_occupancy": 0.870703125,
"signal/frontier_coverage_5/group_std_mean": 0.23912187218666076,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003331833053380251,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003331833053380251,
"signal/frontier_ece_reward/centered_abs_mean": 0.02027609832584858,
"signal/frontier_ece_reward/group_bin_occupancy": 0.671484375,
"signal/frontier_ece_reward/group_std_mean": 0.026138320937752722,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025345122907310725,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025345122907310725,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25942595601081847,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.735546875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33479920625686643,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03242824450135231,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03242824450135231,
"step": 120
},
{
"calibration/aurc": 0.44117263123313394,
"calibration/batch_distribution_entropy": 0.9888037543083327,
"calibration/batch_entropy_100bins": 0.9728081260995414,
"calibration/batch_entropy_10bins": 0.9888037543083327,
"calibration/batch_entropy_50bins": 0.9824201332031917,
"calibration/batch_uniqueness": 0.95474853515625,
"calibration/buffer_distribution_entropy": 0.9988600772928141,
"calibration/buffer_entropy_100bins": 0.9957624928547298,
"calibration/buffer_entropy_10bins": 0.9988600772928141,
"calibration/buffer_entropy_50bins": 0.9972569067670085,
"calibration/confidence_entropy": 0.495948138479487,
"calibration/coverage@0%": 0.003125,
"calibration/coverage@1%": 0.003125,
"calibration/coverage@10%": 0.005859375,
"calibration/coverage@15%": 0.005859375,
"calibration/coverage@20%": 0.026953125,
"calibration/coverage@25%": 0.07890625,
"calibration/coverage@30%": 0.1234375,
"calibration/coverage@5%": 0.003125,
"calibration/ece": 0.16232330180334845,
"calibration/mean_confidence": 0.49218618777158946,
"calibration/prompt_uniqueness": 0.8701171875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 940.8,
"completions/max_terminated_length": 539.8,
"completions/mean_length": 209.08955078125,
"completions/mean_terminated_length": 208.8299774169922,
"completions/min_length": 97.0,
"completions/min_terminated_length": 97.0,
"epoch": 0.4,
"grad_norm": 0.0011378073832020164,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 423899989.0,
"reward": 0.8323954820632935,
"reward_std": 0.11341892182826996,
"rewards/accuracy_reward": 0.49619140625,
"rewards/brier_reward": 0.7690539121627807,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.003590290108695626,
"rewards/frontier_coverage_1": 0.1151683684438467,
"rewards/frontier_coverage_10": 0.1151683684438467,
"rewards/frontier_coverage_15": 0.1151683684438467,
"rewards/frontier_coverage_20": 0.1151683684438467,
"rewards/frontier_coverage_25": 0.1151683684438467,
"rewards/frontier_coverage_5": 0.1151683684438467,
"rewards/frontier_ece_reward": 0.009640791360288859,
"rewards/frontier_entropy_batch_reward": -0.20078192055225372,
"signal/accuracy_reward/centered_abs_mean": 0.135638427734375,
"signal/accuracy_reward/group_bin_occupancy": 0.18828125,
"signal/accuracy_reward/group_std_mean": 0.1780630737543106,
"signal/accuracy_reward/group_zero_std_frac": 0.49375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0678192138671875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0678192138671875,
"signal/advantage_abs_mean": 0.0883943647146225,
"signal/advantage_pre_scale_abs_mean": 0.0883943647146225,
"signal/advantage_pre_scale_std": 0.13039152324199677,
"signal/advantage_std": 0.13039152324199677,
"signal/brier_reward/centered_abs_mean": 0.1547802209854126,
"signal/brier_reward/group_bin_occupancy": 0.855859375,
"signal/brier_reward/group_std_mean": 0.19734705090522767,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019347527623176576,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.019347527623176576,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_bin_occupancy": 0.126953125,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003553676325827837,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.737109375,
"signal/frontier_aurc_reward/group_std_mean": 0.00554114431142807,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.361080304486677e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.361080304486677e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.19457192122936248,
"signal/frontier_coverage_1/group_bin_occupancy": 0.87734375,
"signal/frontier_coverage_1/group_std_mean": 0.25038447678089143,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003482837276533246,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003482837276533246,
"signal/frontier_coverage_10/centered_abs_mean": 0.19457192122936248,
"signal/frontier_coverage_10/group_bin_occupancy": 0.87734375,
"signal/frontier_coverage_10/group_std_mean": 0.25038447678089143,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003482837276533246,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003482837276533246,
"signal/frontier_coverage_15/centered_abs_mean": 0.19457192122936248,
"signal/frontier_coverage_15/group_bin_occupancy": 0.87734375,
"signal/frontier_coverage_15/group_std_mean": 0.25038447678089143,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003482837276533246,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003482837276533246,
"signal/frontier_coverage_20/centered_abs_mean": 0.19457192122936248,
"signal/frontier_coverage_20/group_bin_occupancy": 0.87734375,
"signal/frontier_coverage_20/group_std_mean": 0.25038447678089143,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003482837276533246,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003482837276533246,
"signal/frontier_coverage_25/centered_abs_mean": 0.19457192122936248,
"signal/frontier_coverage_25/group_bin_occupancy": 0.87734375,
"signal/frontier_coverage_25/group_std_mean": 0.25038447678089143,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003482837276533246,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003482837276533246,
"signal/frontier_coverage_5/centered_abs_mean": 0.19457192122936248,
"signal/frontier_coverage_5/group_bin_occupancy": 0.87734375,
"signal/frontier_coverage_5/group_std_mean": 0.25038447678089143,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003482837276533246,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003482837276533246,
"signal/frontier_ece_reward/centered_abs_mean": 0.01716732941567898,
"signal/frontier_ece_reward/group_bin_occupancy": 0.703515625,
"signal/frontier_ece_reward/group_std_mean": 0.02221398986876011,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021459161769598724,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021459161769598724,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2748726367950439,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.735546875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3535886824131012,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03435907959938049,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03435907959938049,
"step": 125
},
{
"calibration/aurc": 0.3402747101814266,
"calibration/batch_distribution_entropy": 0.982515497589976,
"calibration/batch_entropy_100bins": 0.9682872609351033,
"calibration/batch_entropy_10bins": 0.982515497589976,
"calibration/batch_entropy_50bins": 0.9768947206180159,
"calibration/batch_uniqueness": 0.9531341552734375,
"calibration/buffer_distribution_entropy": 0.9987716981394495,
"calibration/buffer_entropy_100bins": 0.9977724602459563,
"calibration/buffer_entropy_10bins": 0.9987716981394495,
"calibration/buffer_entropy_50bins": 0.9983235617956613,
"calibration/confidence_entropy": 0.5100973588451903,
"calibration/coverage@0%": 0.009765625,
"calibration/coverage@1%": 0.009765625,
"calibration/coverage@10%": 0.0328125,
"calibration/coverage@15%": 0.0578125,
"calibration/coverage@20%": 0.109375,
"calibration/coverage@25%": 0.216015625,
"calibration/coverage@30%": 0.336328125,
"calibration/coverage@5%": 0.0140625,
"calibration/ece": 0.11401476367502783,
"calibration/mean_confidence": 0.49950840361909454,
"calibration/prompt_uniqueness": 0.86982421875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 1027.2,
"completions/max_terminated_length": 647.4,
"completions/mean_length": 209.46474609375,
"completions/mean_terminated_length": 209.2056671142578,
"completions/min_length": 92.0,
"completions/min_terminated_length": 92.0,
"epoch": 0.416,
"grad_norm": 0.000968018255662173,
"learning_rate": 1e-06,
"loss": 0.0009,
"num_tokens": 440926092.0,
"reward": 0.8401611685752869,
"reward_std": 0.10533516258001327,
"rewards/accuracy_reward": 0.50615234375,
"rewards/brier_reward": 0.7812270641326904,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0030852626543492077,
"rewards/frontier_coverage_1": 0.11723168343305587,
"rewards/frontier_coverage_10": 0.11723168343305587,
"rewards/frontier_coverage_15": 0.11723168343305587,
"rewards/frontier_coverage_20": 0.11723168343305587,
"rewards/frontier_coverage_25": 0.11723168343305587,
"rewards/frontier_coverage_5": 0.11723168343305587,
"rewards/frontier_ece_reward": 0.008592206053435802,
"rewards/frontier_entropy_batch_reward": -0.19225098192691803,
"signal/accuracy_reward/centered_abs_mean": 0.126690673828125,
"signal/accuracy_reward/group_bin_occupancy": 0.180078125,
"signal/accuracy_reward/group_std_mean": 0.16168214678764342,
"signal/accuracy_reward/group_zero_std_frac": 0.559375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0633453369140625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0633453369140625,
"signal/advantage_abs_mean": 0.08324380666017532,
"signal/advantage_pre_scale_abs_mean": 0.08324380666017532,
"signal/advantage_pre_scale_std": 0.12175452709197998,
"signal/advantage_std": 0.12175452709197998,
"signal/brier_reward/centered_abs_mean": 0.14609736502170562,
"signal/brier_reward/group_bin_occupancy": 0.859375,
"signal/brier_reward/group_std_mean": 0.1860405534505844,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018262170627713202,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.018262170627713202,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029900921043008568,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.74140625,
"signal/frontier_aurc_reward/group_std_mean": 0.004770136158913374,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.352264633984305e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.352264633984305e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.20230944752693175,
"signal/frontier_coverage_1/group_bin_occupancy": 0.88828125,
"signal/frontier_coverage_1/group_std_mean": 0.25601867139339446,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036213390529155733,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036213390529155733,
"signal/frontier_coverage_10/centered_abs_mean": 0.20230944752693175,
"signal/frontier_coverage_10/group_bin_occupancy": 0.88828125,
"signal/frontier_coverage_10/group_std_mean": 0.25601867139339446,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036213390529155733,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036213390529155733,
"signal/frontier_coverage_15/centered_abs_mean": 0.20230944752693175,
"signal/frontier_coverage_15/group_bin_occupancy": 0.88828125,
"signal/frontier_coverage_15/group_std_mean": 0.25601867139339446,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036213390529155733,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036213390529155733,
"signal/frontier_coverage_20/centered_abs_mean": 0.20230944752693175,
"signal/frontier_coverage_20/group_bin_occupancy": 0.88828125,
"signal/frontier_coverage_20/group_std_mean": 0.25601867139339446,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036213390529155733,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036213390529155733,
"signal/frontier_coverage_25/centered_abs_mean": 0.20230944752693175,
"signal/frontier_coverage_25/group_bin_occupancy": 0.88828125,
"signal/frontier_coverage_25/group_std_mean": 0.25601867139339446,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036213390529155733,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036213390529155733,
"signal/frontier_coverage_5/centered_abs_mean": 0.20230944752693175,
"signal/frontier_coverage_5/group_bin_occupancy": 0.88828125,
"signal/frontier_coverage_5/group_std_mean": 0.25601867139339446,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036213390529155733,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036213390529155733,
"signal/frontier_ece_reward/centered_abs_mean": 0.01373654417693615,
"signal/frontier_ece_reward/group_bin_occupancy": 0.695703125,
"signal/frontier_ece_reward/group_std_mean": 0.017755693942308425,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017170680221170187,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017170680221170187,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2682903289794922,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34430991411209105,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03353629112243652,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03353629112243652,
"step": 130
},
{
"calibration/aurc": 0.2828474082475755,
"calibration/batch_distribution_entropy": 0.9857383661471948,
"calibration/batch_entropy_100bins": 0.9731037393328108,
"calibration/batch_entropy_10bins": 0.9857383661471948,
"calibration/batch_entropy_50bins": 0.9798947749352924,
"calibration/batch_uniqueness": 0.9544403076171875,
"calibration/buffer_distribution_entropy": 0.998966911938475,
"calibration/buffer_entropy_100bins": 0.9983970981633485,
"calibration/buffer_entropy_10bins": 0.998966911938475,
"calibration/buffer_entropy_50bins": 0.9987812332687316,
"calibration/confidence_entropy": 0.477685401183353,
"calibration/coverage@0%": 0.015625,
"calibration/coverage@1%": 0.015625,
"calibration/coverage@10%": 0.132421875,
"calibration/coverage@15%": 0.248046875,
"calibration/coverage@20%": 0.333203125,
"calibration/coverage@25%": 0.408984375,
"calibration/coverage@30%": 0.49609375,
"calibration/coverage@5%": 0.0484375,
"calibration/ece": 0.10728726491884,
"calibration/mean_confidence": 0.5202790137028369,
"calibration/prompt_uniqueness": 0.853662109375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 575.0,
"completions/max_terminated_length": 575.0,
"completions/mean_length": 204.96689453125,
"completions/mean_terminated_length": 204.96689453125,
"completions/min_length": 97.2,
"completions/min_terminated_length": 97.2,
"epoch": 0.432,
"grad_norm": 0.001178718637675047,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 458039289.0,
"reward": 0.8579501390457154,
"reward_std": 0.09831726402044297,
"rewards/accuracy_reward": 0.5365234375,
"rewards/brier_reward": 0.7933999061584472,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002635437436401844,
"rewards/frontier_coverage_1": 0.1119713842868805,
"rewards/frontier_coverage_10": 0.1119713842868805,
"rewards/frontier_coverage_15": 0.1119713842868805,
"rewards/frontier_coverage_20": 0.1119713842868805,
"rewards/frontier_coverage_25": 0.11118775270879269,
"rewards/frontier_coverage_5": 0.1119713842868805,
"rewards/frontier_ece_reward": 0.00932666277512908,
"rewards/frontier_entropy_batch_reward": -0.18015409708023072,
"signal/accuracy_reward/centered_abs_mean": 0.1129638671875,
"signal/accuracy_reward/group_bin_occupancy": 0.178515625,
"signal/accuracy_reward/group_std_mean": 0.1487947881221771,
"signal/accuracy_reward/group_zero_std_frac": 0.571875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05648193359375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05648193359375,
"signal/advantage_abs_mean": 0.07683975026011466,
"signal/advantage_pre_scale_abs_mean": 0.07683975026011466,
"signal/advantage_pre_scale_std": 0.11419809311628341,
"signal/advantage_std": 0.11419809311628341,
"signal/brier_reward/centered_abs_mean": 0.13628783226013183,
"signal/brier_reward/group_bin_occupancy": 0.845703125,
"signal/brier_reward/group_std_mean": 0.1745894193649292,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01703597903251648,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01703597903251648,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002808052161708474,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72109375,
"signal/frontier_aurc_reward/group_std_mean": 0.004525643587112427,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.0264131277799604e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.0264131277799604e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18720480799674988,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_1/group_std_mean": 0.23972273170948027,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003350965864956379,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003350965864956379,
"signal/frontier_coverage_10/centered_abs_mean": 0.18720480799674988,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_10/group_std_mean": 0.23972273170948027,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003350965864956379,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003350965864956379,
"signal/frontier_coverage_15/centered_abs_mean": 0.18720480799674988,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_15/group_std_mean": 0.23972273170948027,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003350965864956379,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003350965864956379,
"signal/frontier_coverage_20/centered_abs_mean": 0.18720480799674988,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_20/group_std_mean": 0.23972273170948027,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003350965864956379,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003350965864956379,
"signal/frontier_coverage_25/centered_abs_mean": 0.182588392496109,
"signal/frontier_coverage_25/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_25/group_std_mean": 0.2337813049554825,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032683320809155703,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032683320809155703,
"signal/frontier_coverage_5/centered_abs_mean": 0.18720480799674988,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_5/group_std_mean": 0.23972273170948027,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003350965864956379,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003350965864956379,
"signal/frontier_ece_reward/centered_abs_mean": 0.012440289743244648,
"signal/frontier_ece_reward/group_bin_occupancy": 0.68828125,
"signal/frontier_ece_reward/group_std_mean": 0.015995739214122295,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001555036217905581,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001555036217905581,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2538039118051529,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7359375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32851467132568357,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03172548897564411,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03172548897564411,
"step": 135
},
{
"calibration/aurc": 0.30295920649554897,
"calibration/batch_distribution_entropy": 0.9762808168631931,
"calibration/batch_entropy_100bins": 0.9670601797644209,
"calibration/batch_entropy_10bins": 0.9762808168631931,
"calibration/batch_entropy_50bins": 0.9758043195231133,
"calibration/batch_uniqueness": 0.9525875723374158,
"calibration/buffer_distribution_entropy": 0.9992497597924999,
"calibration/buffer_entropy_100bins": 0.9987190075433615,
"calibration/buffer_entropy_10bins": 0.9992497597924999,
"calibration/buffer_entropy_50bins": 0.9990799529370424,
"calibration/confidence_entropy": 0.5108151699420393,
"calibration/coverage@0%": 0.02461243272994129,
"calibration/coverage@1%": 0.02461243272994129,
"calibration/coverage@10%": 0.08008118272994129,
"calibration/coverage@15%": 0.11719285102739727,
"calibration/coverage@20%": 0.19414597602739728,
"calibration/coverage@25%": 0.2558815435420744,
"calibration/coverage@30%": 0.3833361362524462,
"calibration/coverage@5%": 0.05195618272994129,
"calibration/ece": 0.11912945503053644,
"calibration/mean_confidence": 0.5507951330679218,
"calibration/prompt_uniqueness": 0.8687448174102498,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1153.2,
"completions/max_terminated_length": 614.4,
"completions/mean_length": 210.47998046875,
"completions/mean_terminated_length": 210.09168395996093,
"completions/min_length": 100.6,
"completions/min_terminated_length": 100.6,
"epoch": 0.448,
"grad_norm": 0.0009407022153027356,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 475147404.0,
"reward": 0.842948317527771,
"reward_std": 0.10061811208724976,
"rewards/accuracy_reward": 0.50947265625,
"rewards/brier_reward": 0.7875774383544922,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.003255841927602887,
"rewards/frontier_coverage_1": 0.11736078262329101,
"rewards/frontier_coverage_10": 0.11736078262329101,
"rewards/frontier_coverage_15": 0.11736078262329101,
"rewards/frontier_coverage_20": 0.11736078262329101,
"rewards/frontier_coverage_25": 0.11350735127925873,
"rewards/frontier_coverage_5": 0.11736078262329101,
"rewards/frontier_ece_reward": 0.007119755912572146,
"rewards/frontier_entropy_batch_reward": -0.187257120013237,
"signal/accuracy_reward/centered_abs_mean": 0.111285400390625,
"signal/accuracy_reward/group_bin_occupancy": 0.173828125,
"signal/accuracy_reward/group_std_mean": 0.1417740285396576,
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0556427001953125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0556427001953125,
"signal/advantage_abs_mean": 0.07906914353370667,
"signal/advantage_pre_scale_abs_mean": 0.07906914353370667,
"signal/advantage_pre_scale_std": 0.1164934977889061,
"signal/advantage_std": 0.1164934977889061,
"signal/brier_reward/centered_abs_mean": 0.13891042470932008,
"signal/brier_reward/group_bin_occupancy": 0.863671875,
"signal/brier_reward/group_std_mean": 0.17801333963871002,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01736380308866501,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01736380308866501,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_bin_occupancy": 0.1265625,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003165739495307207,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.707421875,
"signal/frontier_aurc_reward/group_std_mean": 0.005288008600473404,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.6666734599275516e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.6666734599275516e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18679011166095733,
"signal/frontier_coverage_1/group_bin_occupancy": 0.89921875,
"signal/frontier_coverage_1/group_std_mean": 0.23579410016536712,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003343542804941535,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003343542804941535,
"signal/frontier_coverage_10/centered_abs_mean": 0.18679011166095733,
"signal/frontier_coverage_10/group_bin_occupancy": 0.89921875,
"signal/frontier_coverage_10/group_std_mean": 0.23579410016536712,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003343542804941535,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003343542804941535,
"signal/frontier_coverage_15/centered_abs_mean": 0.18679011166095733,
"signal/frontier_coverage_15/group_bin_occupancy": 0.89921875,
"signal/frontier_coverage_15/group_std_mean": 0.23579410016536712,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003343542804941535,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003343542804941535,
"signal/frontier_coverage_20/centered_abs_mean": 0.18679011166095733,
"signal/frontier_coverage_20/group_bin_occupancy": 0.89921875,
"signal/frontier_coverage_20/group_std_mean": 0.23579410016536712,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003343542804941535,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003343542804941535,
"signal/frontier_coverage_25/centered_abs_mean": 0.17116012871265412,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8921875,
"signal/frontier_coverage_25/group_std_mean": 0.21680428385734557,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030637661926448344,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030637661926448344,
"signal/frontier_coverage_5/centered_abs_mean": 0.18679011166095733,
"signal/frontier_coverage_5/group_bin_occupancy": 0.89921875,
"signal/frontier_coverage_5/group_std_mean": 0.23579410016536712,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003343542804941535,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003343542804941535,
"signal/frontier_ece_reward/centered_abs_mean": 0.011517700739204884,
"signal/frontier_ece_reward/group_bin_occupancy": 0.701171875,
"signal/frontier_ece_reward/group_std_mean": 0.014944654144346713,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014397125924006104,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014397125924006104,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26602200865745546,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.748046875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34339261054992676,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03325275108218193,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03325275108218193,
"step": 140
},
{
"calibration/aurc": 0.4176907351056336,
"calibration/batch_distribution_entropy": 0.9858941408112836,
"calibration/batch_entropy_100bins": 0.9737808315282928,
"calibration/batch_entropy_10bins": 0.9858941408112836,
"calibration/batch_entropy_50bins": 0.9818068448300299,
"calibration/batch_uniqueness": 0.9548906392026806,
"calibration/buffer_distribution_entropy": 0.9989634378896353,
"calibration/buffer_entropy_100bins": 0.9986770693112248,
"calibration/buffer_entropy_10bins": 0.9989634378896353,
"calibration/buffer_entropy_50bins": 0.9989321635624518,
"calibration/confidence_entropy": 0.5133265562346525,
"calibration/coverage@0%": 0.003910836594911937,
"calibration/coverage@1%": 0.003910836594911937,
"calibration/coverage@10%": 0.014478351272015655,
"calibration/coverage@15%": 0.02074058219178082,
"calibration/coverage@20%": 0.06727540973581213,
"calibration/coverage@25%": 0.1275272137964775,
"calibration/coverage@30%": 0.19516725782778865,
"calibration/coverage@5%": 0.003910836594911937,
"calibration/ece": 0.13511346945023558,
"calibration/mean_confidence": 0.48324365347240567,
"calibration/prompt_uniqueness": 0.8706947413989333,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 1045.6,
"completions/max_terminated_length": 775.0,
"completions/mean_length": 212.744140625,
"completions/mean_terminated_length": 212.48613586425782,
"completions/min_length": 99.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.464,
"grad_norm": 0.0010246856836602092,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 492496720.0,
"reward": 0.8157252669334412,
"reward_std": 0.09751812219619752,
"rewards/accuracy_reward": 0.45556640625,
"rewards/brier_reward": 0.7689092993736267,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.003909493004903198,
"rewards/frontier_coverage_1": 0.13721695691347122,
"rewards/frontier_coverage_10": 0.13721695691347122,
"rewards/frontier_coverage_15": 0.13721695691347122,
"rewards/frontier_coverage_20": 0.13721695691347122,
"rewards/frontier_coverage_25": 0.126965943723917,
"rewards/frontier_coverage_5": 0.13721695691347122,
"rewards/frontier_ece_reward": 0.005434584524482489,
"rewards/frontier_entropy_batch_reward": -0.18550443947315215,
"signal/accuracy_reward/centered_abs_mean": 0.098260498046875,
"signal/accuracy_reward/group_bin_occupancy": 0.175,
"signal/accuracy_reward/group_std_mean": 0.13415665179491043,
"signal/accuracy_reward/group_zero_std_frac": 0.6,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0491302490234375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0491302490234375,
"signal/advantage_abs_mean": 0.07577944248914718,
"signal/advantage_pre_scale_abs_mean": 0.07577944248914718,
"signal/advantage_pre_scale_std": 0.11422829777002334,
"signal/advantage_std": 0.11422829777002334,
"signal/brier_reward/centered_abs_mean": 0.14183038473129272,
"signal/brier_reward/group_bin_occupancy": 0.86953125,
"signal/brier_reward/group_std_mean": 0.17996921241283417,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01772879809141159,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01772879809141159,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003397146938368678,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71171875,
"signal/frontier_aurc_reward/group_std_mean": 0.005560421571135521,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.080892926547676e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.080892926547676e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17900091111660005,
"signal/frontier_coverage_1/group_bin_occupancy": 0.88984375,
"signal/frontier_coverage_1/group_std_mean": 0.23006309568881989,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032041160855442287,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032041160855442287,
"signal/frontier_coverage_10/centered_abs_mean": 0.17900091111660005,
"signal/frontier_coverage_10/group_bin_occupancy": 0.88984375,
"signal/frontier_coverage_10/group_std_mean": 0.23006309568881989,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032041160855442287,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032041160855442287,
"signal/frontier_coverage_15/centered_abs_mean": 0.17900091111660005,
"signal/frontier_coverage_15/group_bin_occupancy": 0.88984375,
"signal/frontier_coverage_15/group_std_mean": 0.23006309568881989,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032041160855442287,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032041160855442287,
"signal/frontier_coverage_20/centered_abs_mean": 0.17900091111660005,
"signal/frontier_coverage_20/group_bin_occupancy": 0.88984375,
"signal/frontier_coverage_20/group_std_mean": 0.23006309568881989,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032041160855442287,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032041160855442287,
"signal/frontier_coverage_25/centered_abs_mean": 0.16153070628643035,
"signal/frontier_coverage_25/group_bin_occupancy": 0.885546875,
"signal/frontier_coverage_25/group_std_mean": 0.2079919010400772,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002891399711370468,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002891399711370468,
"signal/frontier_coverage_5/centered_abs_mean": 0.17900091111660005,
"signal/frontier_coverage_5/group_bin_occupancy": 0.88984375,
"signal/frontier_coverage_5/group_std_mean": 0.23006309568881989,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032041160855442287,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032041160855442287,
"signal/frontier_ece_reward/centered_abs_mean": 0.010474135167896748,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6859375,
"signal/frontier_ece_reward/group_std_mean": 0.013670408725738525,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013092668959870934,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013092668959870934,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2555492341518402,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3300695061683655,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03194365426898003,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03194365426898003,
"step": 145
},
{
"calibration/aurc": 0.28135508395814696,
"calibration/batch_distribution_entropy": 0.9787142260851314,
"calibration/batch_entropy_100bins": 0.9686388153723096,
"calibration/batch_entropy_10bins": 0.9787142260851314,
"calibration/batch_entropy_50bins": 0.9765269277569176,
"calibration/batch_uniqueness": 0.9529508764845552,
"calibration/buffer_distribution_entropy": 0.9985915112971814,
"calibration/buffer_entropy_100bins": 0.9985480404161521,
"calibration/buffer_entropy_10bins": 0.9985915112971814,
"calibration/buffer_entropy_50bins": 0.9987196165320225,
"calibration/confidence_entropy": 0.4845285132161723,
"calibration/coverage@0%": 0.003515625,
"calibration/coverage@1%": 0.003515625,
"calibration/coverage@10%": 0.049001225490196074,
"calibration/coverage@15%": 0.09084405637254903,
"calibration/coverage@20%": 0.3448452818627451,
"calibration/coverage@25%": 0.47707720588235303,
"calibration/coverage@30%": 0.587483149509804,
"calibration/coverage@5%": 0.01371170343137255,
"calibration/ece": 0.11707550508132455,
"calibration/mean_confidence": 0.52203246818975,
"calibration/prompt_uniqueness": 0.8567325810787374,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 1324.2,
"completions/max_terminated_length": 653.6,
"completions/mean_length": 213.4330078125,
"completions/mean_terminated_length": 212.65774536132812,
"completions/min_length": 95.2,
"completions/min_terminated_length": 95.2,
"epoch": 0.48,
"grad_norm": 0.0011695589637383819,
"learning_rate": 1e-06,
"loss": 0.0014,
"num_tokens": 509730306.0,
"reward": 0.8438830256462098,
"reward_std": 0.1029052346944809,
"rewards/accuracy_reward": 0.51396484375,
"rewards/brier_reward": 0.7825690031051635,
"rewards/format_reward": 0.99912109375,
"rewards/frontier_aurc_reward": -0.0032245948910713195,
"rewards/frontier_coverage_1": 0.11764285415410995,
"rewards/frontier_coverage_10": 0.11764285415410995,
"rewards/frontier_coverage_15": 0.11764285415410995,
"rewards/frontier_coverage_20": 0.11763967126607895,
"rewards/frontier_coverage_25": 0.10978160202503204,
"rewards/frontier_coverage_5": 0.11764285415410995,
"rewards/frontier_ece_reward": 0.007182773388922215,
"rewards/frontier_entropy_batch_reward": -0.1905221700668335,
"signal/accuracy_reward/centered_abs_mean": 0.122174072265625,
"signal/accuracy_reward/group_bin_occupancy": 0.180859375,
"signal/accuracy_reward/group_std_mean": 0.15882102251052857,
"signal/accuracy_reward/group_zero_std_frac": 0.553125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0610870361328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0610870361328125,
"signal/advantage_abs_mean": 0.07921299338340759,
"signal/advantage_pre_scale_abs_mean": 0.07921299338340759,
"signal/advantage_pre_scale_std": 0.1196265995502472,
"signal/advantage_std": 0.1196265995502472,
"signal/brier_reward/centered_abs_mean": 0.14188800156116485,
"signal/brier_reward/group_bin_occupancy": 0.84609375,
"signal/brier_reward/group_std_mean": 0.18255722522735596,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017736000195145606,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017736000195145606,
"signal/format_reward/centered_abs_mean": 0.001690673828125,
"signal/format_reward/group_bin_occupancy": 0.128125,
"signal/format_reward/group_std_mean": 0.004635536018759013,
"signal/format_reward/group_zero_std_frac": 0.975,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008453369140625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0008453369140625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032735579181462525,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.716015625,
"signal/frontier_aurc_reward/group_std_mean": 0.0054481208324432375,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.859668599441648e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.859668599441648e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.19356184601783752,
"signal/frontier_coverage_1/group_bin_occupancy": 0.862109375,
"signal/frontier_coverage_1/group_std_mean": 0.24662669599056244,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034647570922970773,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034647570922970773,
"signal/frontier_coverage_10/centered_abs_mean": 0.19356184601783752,
"signal/frontier_coverage_10/group_bin_occupancy": 0.862109375,
"signal/frontier_coverage_10/group_std_mean": 0.24662669599056244,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034647570922970773,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034647570922970773,
"signal/frontier_coverage_15/centered_abs_mean": 0.19356184601783752,
"signal/frontier_coverage_15/group_bin_occupancy": 0.862109375,
"signal/frontier_coverage_15/group_std_mean": 0.24662669599056244,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034647570922970773,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034647570922970773,
"signal/frontier_coverage_20/centered_abs_mean": 0.19355521202087403,
"signal/frontier_coverage_20/group_bin_occupancy": 0.862109375,
"signal/frontier_coverage_20/group_std_mean": 0.24661834239959718,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003464638348668814,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003464638348668814,
"signal/frontier_coverage_25/centered_abs_mean": 0.17109472453594207,
"signal/frontier_coverage_25/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_25/group_std_mean": 0.21914859712123871,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030625955201685428,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030625955201685428,
"signal/frontier_coverage_5/centered_abs_mean": 0.19356184601783752,
"signal/frontier_coverage_5/group_bin_occupancy": 0.862109375,
"signal/frontier_coverage_5/group_std_mean": 0.24662669599056244,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034647570922970773,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034647570922970773,
"signal/frontier_ece_reward/centered_abs_mean": 0.010768765956163407,
"signal/frontier_ece_reward/group_bin_occupancy": 0.670703125,
"signal/frontier_ece_reward/group_std_mean": 0.013872439600527286,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013460957445204258,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013460957445204258,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26164351403713226,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.750390625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33615357279777525,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03270543925464153,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03270543925464153,
"step": 150
},
{
"epoch": 0.48,
"eval_calibration/aurc": 0.501575210515319,
"eval_calibration/batch_distribution_entropy": 0.9228523420660562,
"eval_calibration/batch_entropy_100bins": 0.7070031781570842,
"eval_calibration/batch_entropy_10bins": 0.9228523420660562,
"eval_calibration/batch_entropy_50bins": 0.7799834900668859,
"eval_calibration/batch_uniqueness": 0.896484375,
"eval_calibration/buffer_distribution_entropy": 0.998397887423804,
"eval_calibration/buffer_entropy_100bins": 0.9984792581609957,
"eval_calibration/buffer_entropy_10bins": 0.998397887423804,
"eval_calibration/buffer_entropy_50bins": 0.9986163202908288,
"eval_calibration/confidence_entropy": 0.4588275516098055,
"eval_calibration/coverage@0%": 0.0703125,
"eval_calibration/coverage@1%": 0.0703125,
"eval_calibration/coverage@10%": 0.0703125,
"eval_calibration/coverage@15%": 0.1328125,
"eval_calibration/coverage@20%": 0.140625,
"eval_calibration/coverage@25%": 0.1484375,
"eval_calibration/coverage@30%": 0.15625,
"eval_calibration/coverage@5%": 0.0703125,
"eval_calibration/ece": 0.24121247296918133,
"eval_calibration/mean_confidence": 0.47050686804175734,
"eval_calibration/prompt_uniqueness": 0.896484375,
"eval_completions/clipped_ratio": 0.001953125,
"eval_completions/max_length": 663.5,
"eval_completions/max_terminated_length": 373.25,
"eval_completions/mean_length": 217.01414489746094,
"eval_completions/mean_terminated_length": 214.43864822387695,
"eval_completions/min_length": 119.75,
"eval_completions/min_terminated_length": 119.75,
"eval_loss": 0.0,
"eval_num_tokens": 509730306.0,
"eval_reward": 0.6953702718019485,
"eval_reward_std": 0.22602055966854095,
"eval_rewards/accuracy_reward": 0.40234375,
"eval_rewards/brier_reward": 0.7895104587078094,
"eval_rewards/format_reward": 0.998046875,
"eval_rewards/frontier_aurc_reward": -0.004140242876019329,
"eval_rewards/frontier_coverage_1": 0.19547064229846,
"eval_rewards/frontier_coverage_10": 0.19547064229846,
"eval_rewards/frontier_coverage_15": 0.1954706460237503,
"eval_rewards/frontier_coverage_20": 0.19546055048704147,
"eval_rewards/frontier_coverage_25": 0.16091356799006462,
"eval_rewards/frontier_coverage_5": 0.19547064229846,
"eval_rewards/frontier_ece_reward": 0.007530742208473384,
"eval_rewards/frontier_entropy_batch_reward": -0.998046875,
"eval_runtime": 29.3853,
"eval_samples_per_second": 17.015,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4638671875,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.48863568156957626,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23193359375,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23193359375,
"eval_signal/advantage_abs_mean": 0.20584385097026825,
"eval_signal/advantage_pre_scale_abs_mean": 0.20584385097026825,
"eval_signal/advantage_pre_scale_std": 0.2239415980875492,
"eval_signal/advantage_std": 0.2239415980875492,
"eval_signal/brier_reward/centered_abs_mean": 0.1952781230211258,
"eval_signal/brier_reward/group_bin_occupancy": 0.921875,
"eval_signal/brier_reward/group_std_mean": 0.24598337337374687,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024409765377640724,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.024409765377640724,
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
"eval_signal/format_reward/group_bin_occupancy": 0.1328125,
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0055227604461833835,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.703125,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.01063884247560054,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.88574065559078e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.88574065559078e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.33650386333465576,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4147630110383034,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0060234187403693795,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0060234187403693795,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.33650386333465576,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4147630110383034,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0060234187403693795,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0060234187403693795,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.33650386333465576,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_15/group_std_mean": 0.4147630110383034,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0060234187403693795,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0060234187403693795,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.33647605031728745,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_20/group_std_mean": 0.4147294908761978,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006022921064868569,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006022921064868569,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2806103155016899,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_25/group_std_mean": 0.34814976155757904,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005022924277000129,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005022924277000129,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.33650386333465576,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4147630110383034,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0060234187403693795,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0060234187403693795,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.013130403822287917,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.828125,
"eval_signal/frontier_ece_reward/group_std_mean": 0.017393003683537245,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016413004777859896,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016413004777859896,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0037841796875,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.1328125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.011048543266952038,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9375,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"eval_signal/frontier_entropy_batch_reward/weight": 0.125,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0004730224609375,
"eval_steps_per_second": 0.136,
"step": 150
},
{
"calibration/aurc": 0.3772412508603652,
"calibration/batch_distribution_entropy": 0.9858011213469208,
"calibration/batch_entropy_100bins": 0.9743770131797682,
"calibration/batch_entropy_10bins": 0.9858011213469208,
"calibration/batch_entropy_50bins": 0.981834889265462,
"calibration/batch_uniqueness": 0.95457763671875,
"calibration/buffer_distribution_entropy": 0.9983367910852584,
"calibration/buffer_entropy_100bins": 0.9984834597266543,
"calibration/buffer_entropy_10bins": 0.9983367910852584,
"calibration/buffer_entropy_50bins": 0.9985992857477267,
"calibration/confidence_entropy": 0.49097018484416743,
"calibration/coverage@0%": 0.008984375,
"calibration/coverage@1%": 0.008984375,
"calibration/coverage@10%": 0.096484375,
"calibration/coverage@15%": 0.157421875,
"calibration/coverage@20%": 0.1921875,
"calibration/coverage@25%": 0.24140625,
"calibration/coverage@30%": 0.323046875,
"calibration/coverage@5%": 0.04375,
"calibration/ece": 0.1415806258155156,
"calibration/mean_confidence": 0.5163618468152427,
"calibration/prompt_uniqueness": 0.857763671875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 549.8,
"completions/max_terminated_length": 549.8,
"completions/mean_length": 213.34248046875,
"completions/mean_terminated_length": 213.34248046875,
"completions/min_length": 96.4,
"completions/min_terminated_length": 96.4,
"epoch": 0.496,
"grad_norm": 0.0009145310032181442,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 527222773.0,
"reward": 0.8564480781555176,
"reward_std": 0.09648310244083405,
"rewards/accuracy_reward": 0.53896484375,
"rewards/brier_reward": 0.7830032706260681,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.003137602610513568,
"rewards/frontier_coverage_1": 0.0957273930311203,
"rewards/frontier_coverage_10": 0.0957273930311203,
"rewards/frontier_coverage_15": 0.09572737365961075,
"rewards/frontier_coverage_20": 0.09494999200105667,
"rewards/frontier_coverage_25": 0.073183061927557,
"rewards/frontier_coverage_5": 0.0957273930311203,
"rewards/frontier_ece_reward": 0.006351951695978642,
"rewards/frontier_entropy_batch_reward": -0.17130873203277588,
"signal/accuracy_reward/centered_abs_mean": 0.099688720703125,
"signal/accuracy_reward/group_bin_occupancy": 0.175,
"signal/accuracy_reward/group_std_mean": 0.13495108485221863,
"signal/accuracy_reward/group_zero_std_frac": 0.6,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0498443603515625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0498443603515625,
"signal/advantage_abs_mean": 0.07473702281713486,
"signal/advantage_pre_scale_abs_mean": 0.07473702281713486,
"signal/advantage_pre_scale_std": 0.1126061201095581,
"signal/advantage_std": 0.1126061201095581,
"signal/brier_reward/centered_abs_mean": 0.13330003023147582,
"signal/brier_reward/group_bin_occupancy": 0.850390625,
"signal/brier_reward/group_std_mean": 0.17171771228313445,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016662503778934478,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.016662503778934478,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003219319973140955,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.708203125,
"signal/frontier_aurc_reward/group_std_mean": 0.0053937271237373356,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.762582732131705e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.762582732131705e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17066286504268646,
"signal/frontier_coverage_1/group_bin_occupancy": 0.87265625,
"signal/frontier_coverage_1/group_std_mean": 0.2204454004764557,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003054865123704076,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003054865123704076,
"signal/frontier_coverage_10/centered_abs_mean": 0.17066286504268646,
"signal/frontier_coverage_10/group_bin_occupancy": 0.87265625,
"signal/frontier_coverage_10/group_std_mean": 0.2204454004764557,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003054865123704076,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003054865123704076,
"signal/frontier_coverage_15/centered_abs_mean": 0.17066278159618378,
"signal/frontier_coverage_15/group_bin_occupancy": 0.87265625,
"signal/frontier_coverage_15/group_std_mean": 0.22044530212879182,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003054863726720214,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003054863726720214,
"signal/frontier_coverage_20/centered_abs_mean": 0.16988441944122315,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8734375,
"signal/frontier_coverage_20/group_std_mean": 0.21948779225349427,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003040931047871709,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003040931047871709,
"signal/frontier_coverage_25/centered_abs_mean": 0.13763956129550933,
"signal/frontier_coverage_25/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_25/group_std_mean": 0.17838802933692932,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024637479800730944,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024637479800730944,
"signal/frontier_coverage_5/centered_abs_mean": 0.17066286504268646,
"signal/frontier_coverage_5/group_bin_occupancy": 0.87265625,
"signal/frontier_coverage_5/group_std_mean": 0.2204454004764557,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003054865123704076,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003054865123704076,
"signal/frontier_ece_reward/centered_abs_mean": 0.009669752418994903,
"signal/frontier_ece_reward/group_bin_occupancy": 0.671484375,
"signal/frontier_ece_reward/group_std_mean": 0.012576807662844658,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001208719052374363,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001208719052374363,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24946886897087098,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73828125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32889034748077395,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031183608621358872,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031183608621358872,
"step": 155
},
{
"calibration/aurc": 0.3124343045271322,
"calibration/batch_distribution_entropy": 0.9821167478401351,
"calibration/batch_entropy_100bins": 0.9707625540350067,
"calibration/batch_entropy_10bins": 0.9821167478401351,
"calibration/batch_entropy_50bins": 0.9800767408019752,
"calibration/batch_uniqueness": 0.953835017942229,
"calibration/buffer_distribution_entropy": 0.9983246583833173,
"calibration/buffer_entropy_100bins": 0.9985049021749475,
"calibration/buffer_entropy_10bins": 0.9983246583833173,
"calibration/buffer_entropy_50bins": 0.998608856762916,
"calibration/confidence_entropy": 0.512842485457416,
"calibration/coverage@0%": 0.02736744740704501,
"calibration/coverage@1%": 0.02736744740704501,
"calibration/coverage@10%": 0.22644936399217222,
"calibration/coverage@15%": 0.33432378302348337,
"calibration/coverage@20%": 0.3933394997553816,
"calibration/coverage@25%": 0.45234375,
"calibration/coverage@30%": 0.491796875,
"calibration/coverage@5%": 0.12906525195694715,
"calibration/ece": 0.14509790879365198,
"calibration/mean_confidence": 0.49541147618364095,
"calibration/prompt_uniqueness": 0.8591433077523414,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 916.0,
"completions/max_terminated_length": 579.6,
"completions/mean_length": 209.73173828125,
"completions/mean_terminated_length": 209.47265625,
"completions/min_length": 95.4,
"completions/min_terminated_length": 95.4,
"epoch": 0.512,
"grad_norm": 0.0008832181338220835,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 544516090.0,
"reward": 0.8589070677757263,
"reward_std": 0.09946933686733246,
"rewards/accuracy_reward": 0.5400390625,
"rewards/brier_reward": 0.7991329669952393,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0025545242242515087,
"rewards/frontier_coverage_1": 0.11153013110160828,
"rewards/frontier_coverage_10": 0.11153013110160828,
"rewards/frontier_coverage_15": 0.11152207553386688,
"rewards/frontier_coverage_20": 0.11059406325221062,
"rewards/frontier_coverage_25": 0.08714946508407592,
"rewards/frontier_coverage_5": 0.11153013110160828,
"rewards/frontier_ece_reward": 0.006665406748652458,
"rewards/frontier_entropy_batch_reward": -0.1849699854850769,
"signal/accuracy_reward/centered_abs_mean": 0.10582275390625,
"signal/accuracy_reward/group_bin_occupancy": 0.178125,
"signal/accuracy_reward/group_std_mean": 0.1435195803642273,
"signal/accuracy_reward/group_zero_std_frac": 0.575,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052911376953125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.052911376953125,
"signal/advantage_abs_mean": 0.0774817332625389,
"signal/advantage_pre_scale_abs_mean": 0.0774817332625389,
"signal/advantage_pre_scale_std": 0.11714037954807281,
"signal/advantage_std": 0.11714037954807281,
"signal/brier_reward/centered_abs_mean": 0.1255343437194824,
"signal/brier_reward/group_bin_occupancy": 0.844140625,
"signal/brier_reward/group_std_mean": 0.16391099691390992,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0156917929649353,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0156917929649353,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_bin_occupancy": 0.1265625,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027026119641959667,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.712109375,
"signal/frontier_aurc_reward/group_std_mean": 0.004470287868753075,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8376752238254996e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8376752238254996e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16258499324321746,
"signal/frontier_coverage_1/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_1/group_std_mean": 0.21444275677204133,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002910271333530545,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002910271333530545,
"signal/frontier_coverage_10/centered_abs_mean": 0.16258499324321746,
"signal/frontier_coverage_10/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_10/group_std_mean": 0.21444275677204133,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002910271333530545,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002910271333530545,
"signal/frontier_coverage_15/centered_abs_mean": 0.16256897747516633,
"signal/frontier_coverage_15/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_15/group_std_mean": 0.2144217312335968,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029099844861775635,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029099844861775635,
"signal/frontier_coverage_20/centered_abs_mean": 0.16080774068832399,
"signal/frontier_coverage_20/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_20/group_std_mean": 0.21214835047721864,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00287845847196877,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00287845847196877,
"signal/frontier_coverage_25/centered_abs_mean": 0.11882531195878983,
"signal/frontier_coverage_25/group_bin_occupancy": 0.858984375,
"signal/frontier_coverage_25/group_std_mean": 0.157957324385643,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021269729593768718,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021269729593768718,
"signal/frontier_coverage_5/centered_abs_mean": 0.16258499324321746,
"signal/frontier_coverage_5/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_5/group_std_mean": 0.21444275677204133,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002910271333530545,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002910271333530545,
"signal/frontier_ece_reward/centered_abs_mean": 0.008545207604765893,
"signal/frontier_ece_reward/group_bin_occupancy": 0.658203125,
"signal/frontier_ece_reward/group_std_mean": 0.011142410896718502,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010681509505957366,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010681509505957366,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2622275412082672,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.720703125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34102784395217894,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0327784426510334,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0327784426510334,
"step": 160
},
{
"calibration/aurc": 0.22197753458783573,
"calibration/batch_distribution_entropy": 0.9872636635687544,
"calibration/batch_entropy_100bins": 0.9717745831016632,
"calibration/batch_entropy_10bins": 0.9872636635687544,
"calibration/batch_entropy_50bins": 0.9817316279365228,
"calibration/batch_uniqueness": 0.9543426513671875,
"calibration/buffer_distribution_entropy": 0.998607875057014,
"calibration/buffer_entropy_100bins": 0.9986924219957551,
"calibration/buffer_entropy_10bins": 0.998607875057014,
"calibration/buffer_entropy_50bins": 0.9987976735559277,
"calibration/confidence_entropy": 0.484241145901464,
"calibration/coverage@0%": 0.02734375,
"calibration/coverage@1%": 0.02734375,
"calibration/coverage@10%": 0.273046875,
"calibration/coverage@15%": 0.334375,
"calibration/coverage@20%": 0.46015625,
"calibration/coverage@25%": 0.627734375,
"calibration/coverage@30%": 0.740234375,
"calibration/coverage@5%": 0.116796875,
"calibration/ece": 0.11655803249046533,
"calibration/mean_confidence": 0.5053498338953919,
"calibration/prompt_uniqueness": 0.849365234375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 1177.8,
"completions/max_terminated_length": 759.8,
"completions/mean_length": 211.98857421875,
"completions/mean_terminated_length": 211.72989807128906,
"completions/min_length": 104.4,
"completions/min_terminated_length": 104.4,
"epoch": 0.528,
"grad_norm": 0.0009716249769553542,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 561716389.0,
"reward": 0.8563125967979431,
"reward_std": 0.09635183066129685,
"rewards/accuracy_reward": 0.5341796875,
"rewards/brier_reward": 0.8004718899726868,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0024792027892544866,
"rewards/frontier_coverage_1": 0.12539061158895493,
"rewards/frontier_coverage_10": 0.12539061158895493,
"rewards/frontier_coverage_15": 0.12535874992609025,
"rewards/frontier_coverage_20": 0.12457017600536346,
"rewards/frontier_coverage_25": 0.0941608265042305,
"rewards/frontier_coverage_5": 0.12539061158895493,
"rewards/frontier_ece_reward": 0.0064132180996239185,
"rewards/frontier_entropy_batch_reward": -0.19510821104049683,
"signal/accuracy_reward/centered_abs_mean": 0.11046142578125,
"signal/accuracy_reward/group_bin_occupancy": 0.176953125,
"signal/accuracy_reward/group_std_mean": 0.1455621302127838,
"signal/accuracy_reward/group_zero_std_frac": 0.584375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.055230712890625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.055230712890625,
"signal/advantage_abs_mean": 0.07561022490262985,
"signal/advantage_pre_scale_abs_mean": 0.07561022490262985,
"signal/advantage_pre_scale_std": 0.11381375342607498,
"signal/advantage_std": 0.11381375342607498,
"signal/brier_reward/centered_abs_mean": 0.12420621514320374,
"signal/brier_reward/group_bin_occupancy": 0.838671875,
"signal/brier_reward/group_std_mean": 0.16060249507427216,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015525776892900467,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015525776892900467,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002632760489359498,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.712109375,
"signal/frontier_aurc_reward/group_std_mean": 0.004449516860768199,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.712641093647107e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.712641093647107e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.178127783536911,
"signal/frontier_coverage_1/group_bin_occupancy": 0.872265625,
"signal/frontier_coverage_1/group_std_mean": 0.2289435774087906,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031884873285889627,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031884873285889627,
"signal/frontier_coverage_10/centered_abs_mean": 0.178127783536911,
"signal/frontier_coverage_10/group_bin_occupancy": 0.872265625,
"signal/frontier_coverage_10/group_std_mean": 0.2289435774087906,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031884873285889627,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031884873285889627,
"signal/frontier_coverage_15/centered_abs_mean": 0.17806898057460785,
"signal/frontier_coverage_15/group_bin_occupancy": 0.872265625,
"signal/frontier_coverage_15/group_std_mean": 0.2288702607154846,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003187434747815132,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003187434747815132,
"signal/frontier_coverage_20/centered_abs_mean": 0.17593927085399627,
"signal/frontier_coverage_20/group_bin_occupancy": 0.870703125,
"signal/frontier_coverage_20/group_std_mean": 0.22620816826820372,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003149312874302268,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003149312874302268,
"signal/frontier_coverage_25/centered_abs_mean": 0.1212777316570282,
"signal/frontier_coverage_25/group_bin_occupancy": 0.870703125,
"signal/frontier_coverage_25/group_std_mean": 0.15683144927024842,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021708713844418525,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021708713844418525,
"signal/frontier_coverage_5/centered_abs_mean": 0.178127783536911,
"signal/frontier_coverage_5/group_bin_occupancy": 0.872265625,
"signal/frontier_coverage_5/group_std_mean": 0.2289435774087906,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031884873285889627,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031884873285889627,
"signal/frontier_ece_reward/centered_abs_mean": 0.007738732825964689,
"signal/frontier_ece_reward/group_bin_occupancy": 0.648046875,
"signal/frontier_ece_reward/group_std_mean": 0.009942644834518432,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009673416032455861,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009673416032455861,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2633429080247879,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7296875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3415177345275879,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032917863503098485,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032917863503098485,
"step": 165
},
{
"calibration/aurc": 0.2246998145590191,
"calibration/batch_distribution_entropy": 0.9828267726341278,
"calibration/batch_entropy_100bins": 0.9699359138709003,
"calibration/batch_entropy_10bins": 0.9828267726341278,
"calibration/batch_entropy_50bins": 0.9776825192493742,
"calibration/batch_uniqueness": 0.9534942626953125,
"calibration/buffer_distribution_entropy": 0.9986446644020803,
"calibration/buffer_entropy_100bins": 0.9987341361318449,
"calibration/buffer_entropy_10bins": 0.9986446644020803,
"calibration/buffer_entropy_50bins": 0.9988464842427364,
"calibration/confidence_entropy": 0.4779419414178685,
"calibration/coverage@0%": 0.05390625,
"calibration/coverage@1%": 0.058984375,
"calibration/coverage@10%": 0.214453125,
"calibration/coverage@15%": 0.3390625,
"calibration/coverage@20%": 0.50390625,
"calibration/coverage@25%": 0.621875,
"calibration/coverage@30%": 0.729296875,
"calibration/coverage@5%": 0.1109375,
"calibration/ece": 0.09684584717108229,
"calibration/mean_confidence": 0.5264456085979624,
"calibration/prompt_uniqueness": 0.85,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 761.4,
"completions/max_terminated_length": 552.0,
"completions/mean_length": 216.10869140625,
"completions/mean_terminated_length": 215.97977294921876,
"completions/min_length": 102.8,
"completions/min_terminated_length": 102.8,
"epoch": 0.544,
"grad_norm": 0.0008750662091188133,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 579092926.0,
"reward": 0.865496826171875,
"reward_std": 0.10031740814447403,
"rewards/accuracy_reward": 0.56474609375,
"rewards/brier_reward": 0.7900220870971679,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002628612471744418,
"rewards/frontier_coverage_1": 0.08344129025936127,
"rewards/frontier_coverage_10": 0.08344129025936127,
"rewards/frontier_coverage_15": 0.08342344760894775,
"rewards/frontier_coverage_20": 0.08116559684276581,
"rewards/frontier_coverage_25": 0.059722674638032915,
"rewards/frontier_coverage_5": 0.08344129025936127,
"rewards/frontier_ece_reward": 0.005261074285954237,
"rewards/frontier_entropy_batch_reward": -0.19749387800693513,
"signal/accuracy_reward/centered_abs_mean": 0.117864990234375,
"signal/accuracy_reward/group_bin_occupancy": 0.1828125,
"signal/accuracy_reward/group_std_mean": 0.15820194482803346,
"signal/accuracy_reward/group_zero_std_frac": 0.5375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0589324951171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0589324951171875,
"signal/advantage_abs_mean": 0.07773261219263077,
"signal/advantage_pre_scale_abs_mean": 0.07773261219263077,
"signal/advantage_pre_scale_std": 0.11587968170642853,
"signal/advantage_std": 0.11587968170642853,
"signal/brier_reward/centered_abs_mean": 0.1324952781200409,
"signal/brier_reward/group_bin_occupancy": 0.84609375,
"signal/brier_reward/group_std_mean": 0.17098439037799834,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01656190976500511,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01656190976500511,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027288103476166723,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6984375,
"signal/frontier_aurc_reward/group_std_mean": 0.004612684063613415,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.884570444119163e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.884570444119163e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17990356385707856,
"signal/frontier_coverage_1/group_bin_occupancy": 0.859765625,
"signal/frontier_coverage_1/group_std_mean": 0.23137963116168975,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032202736940234898,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032202736940234898,
"signal/frontier_coverage_10/centered_abs_mean": 0.17990356385707856,
"signal/frontier_coverage_10/group_bin_occupancy": 0.859765625,
"signal/frontier_coverage_10/group_std_mean": 0.23137963116168975,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032202736940234898,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032202736940234898,
"signal/frontier_coverage_15/centered_abs_mean": 0.1798170268535614,
"signal/frontier_coverage_15/group_bin_occupancy": 0.859375,
"signal/frontier_coverage_15/group_std_mean": 0.2312684863805771,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032187245786190035,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032187245786190035,
"signal/frontier_coverage_20/centered_abs_mean": 0.17393405735492706,
"signal/frontier_coverage_20/group_bin_occupancy": 0.856640625,
"signal/frontier_coverage_20/group_std_mean": 0.22385527491569518,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031134195160120726,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031134195160120726,
"signal/frontier_coverage_25/centered_abs_mean": 0.11192511320114136,
"signal/frontier_coverage_25/group_bin_occupancy": 0.86171875,
"signal/frontier_coverage_25/group_std_mean": 0.14527169466018677,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020034594694152475,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020034594694152475,
"signal/frontier_coverage_5/centered_abs_mean": 0.17990356385707856,
"signal/frontier_coverage_5/group_bin_occupancy": 0.859765625,
"signal/frontier_coverage_5/group_std_mean": 0.23137963116168975,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032202736940234898,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032202736940234898,
"signal/frontier_ece_reward/centered_abs_mean": 0.007925903517752885,
"signal/frontier_ece_reward/group_bin_occupancy": 0.65703125,
"signal/frontier_ece_reward/group_std_mean": 0.010128208808600903,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009907379397191107,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009907379397191107,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2713680982589722,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.741796875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34780768752098085,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03392101228237152,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03392101228237152,
"step": 170
},
{
"calibration/aurc": 0.25887046651992324,
"calibration/batch_distribution_entropy": 0.9862801195014452,
"calibration/batch_entropy_100bins": 0.9684416178563928,
"calibration/batch_entropy_10bins": 0.9862801195014452,
"calibration/batch_entropy_50bins": 0.9790974340217475,
"calibration/batch_uniqueness": 0.953765869140625,
"calibration/buffer_distribution_entropy": 0.99843546825646,
"calibration/buffer_entropy_100bins": 0.998662413000743,
"calibration/buffer_entropy_10bins": 0.99843546825646,
"calibration/buffer_entropy_50bins": 0.9987507257151149,
"calibration/confidence_entropy": 0.48703887912051763,
"calibration/coverage@0%": 0.038671875,
"calibration/coverage@1%": 0.103125,
"calibration/coverage@10%": 0.23203125,
"calibration/coverage@15%": 0.319921875,
"calibration/coverage@20%": 0.375,
"calibration/coverage@25%": 0.4625,
"calibration/coverage@30%": 0.621875,
"calibration/coverage@5%": 0.170703125,
"calibration/ece": 0.11540190639652079,
"calibration/mean_confidence": 0.4965868130535844,
"calibration/prompt_uniqueness": 0.851806640625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 526.8,
"completions/max_terminated_length": 526.8,
"completions/mean_length": 215.82470703125,
"completions/mean_terminated_length": 215.82470703125,
"completions/min_length": 106.2,
"completions/min_terminated_length": 106.2,
"epoch": 0.56,
"grad_norm": 0.0008583422750234604,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 596124379.0,
"reward": 0.8527642726898194,
"reward_std": 0.09059911817312241,
"rewards/accuracy_reward": 0.526953125,
"rewards/brier_reward": 0.8009752631187439,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.002781519223935902,
"rewards/frontier_coverage_1": 0.12176511883735656,
"rewards/frontier_coverage_10": 0.12176511883735656,
"rewards/frontier_coverage_15": 0.12170367538928986,
"rewards/frontier_coverage_20": 0.11667201519012452,
"rewards/frontier_coverage_25": 0.07989428639411926,
"rewards/frontier_coverage_5": 0.12176511883735656,
"rewards/frontier_ece_reward": 0.00570831261575222,
"rewards/frontier_entropy_batch_reward": -0.1898701012134552,
"signal/accuracy_reward/centered_abs_mean": 0.09024658203125,
"signal/accuracy_reward/group_bin_occupancy": 0.173828125,
"signal/accuracy_reward/group_std_mean": 0.1275490090250969,
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045123291015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045123291015625,
"signal/advantage_abs_mean": 0.06931805834174157,
"signal/advantage_pre_scale_abs_mean": 0.06931805834174157,
"signal/advantage_pre_scale_std": 0.10619149655103684,
"signal/advantage_std": 0.10619149655103684,
"signal/brier_reward/centered_abs_mean": 0.12430946081876755,
"signal/brier_reward/group_bin_occupancy": 0.854296875,
"signal/brier_reward/group_std_mean": 0.16047678291797637,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015538682602345944,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015538682602345944,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028530734591186045,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.723828125,
"signal/frontier_aurc_reward/group_std_mean": 0.004656852129846812,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.107001634314656e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.107001634314656e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16476930677890778,
"signal/frontier_coverage_1/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_1/group_std_mean": 0.2144735872745514,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029493705835193394,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029493705835193394,
"signal/frontier_coverage_10/centered_abs_mean": 0.16476930677890778,
"signal/frontier_coverage_10/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_10/group_std_mean": 0.2144735872745514,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029493705835193394,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029493705835193394,
"signal/frontier_coverage_15/centered_abs_mean": 0.16467354595661163,
"signal/frontier_coverage_15/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_15/group_std_mean": 0.21435152888298034,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002947656437754631,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002947656437754631,
"signal/frontier_coverage_20/centered_abs_mean": 0.15480645895004272,
"signal/frontier_coverage_20/group_bin_occupancy": 0.87578125,
"signal/frontier_coverage_20/group_std_mean": 0.20163175463676453,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027710356283932925,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027710356283932925,
"signal/frontier_coverage_25/centered_abs_mean": 0.09889017939567565,
"signal/frontier_coverage_25/group_bin_occupancy": 0.875390625,
"signal/frontier_coverage_25/group_std_mean": 0.1293856218457222,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017701340839266777,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017701340839266777,
"signal/frontier_coverage_5/centered_abs_mean": 0.16476930677890778,
"signal/frontier_coverage_5/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_5/group_std_mean": 0.2144735872745514,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029493705835193394,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029493705835193394,
"signal/frontier_ece_reward/centered_abs_mean": 0.007086984347552061,
"signal/frontier_ece_reward/group_bin_occupancy": 0.631640625,
"signal/frontier_ece_reward/group_std_mean": 0.009137248806655406,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008858730434440076,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008858730434440076,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25609534978866577,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.743359375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3285098135471344,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03201191872358322,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03201191872358322,
"step": 175
},
{
"calibration/aurc": 0.31203197751817957,
"calibration/batch_distribution_entropy": 0.9843787104200963,
"calibration/batch_entropy_100bins": 0.9705130031573705,
"calibration/batch_entropy_10bins": 0.9843787104200963,
"calibration/batch_entropy_50bins": 0.9781790741680929,
"calibration/batch_uniqueness": 0.953887939453125,
"calibration/buffer_distribution_entropy": 0.9986194552567443,
"calibration/buffer_entropy_100bins": 0.9987917994612259,
"calibration/buffer_entropy_10bins": 0.9986194552567443,
"calibration/buffer_entropy_50bins": 0.998877645743185,
"calibration/confidence_entropy": 0.49376287006709063,
"calibration/coverage@0%": 0.011328125,
"calibration/coverage@1%": 0.011328125,
"calibration/coverage@10%": 0.125390625,
"calibration/coverage@15%": 0.211328125,
"calibration/coverage@20%": 0.283203125,
"calibration/coverage@25%": 0.368359375,
"calibration/coverage@30%": 0.49375,
"calibration/coverage@5%": 0.066015625,
"calibration/ece": 0.08653007786443709,
"calibration/mean_confidence": 0.49493694107697583,
"calibration/prompt_uniqueness": 0.851953125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 759.8,
"completions/max_terminated_length": 546.6,
"completions/mean_length": 215.15576171875,
"completions/mean_terminated_length": 215.0267578125,
"completions/min_length": 105.2,
"completions/min_terminated_length": 105.2,
"epoch": 0.576,
"grad_norm": 0.000952628324739635,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 613514198.0,
"reward": 0.8436745047569275,
"reward_std": 0.09130887687206268,
"rewards/accuracy_reward": 0.51513671875,
"rewards/brier_reward": 0.7896932244300843,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.003295312123373151,
"rewards/frontier_coverage_1": 0.11995794028043746,
"rewards/frontier_coverage_10": 0.11995794028043746,
"rewards/frontier_coverage_15": 0.11995095014572144,
"rewards/frontier_coverage_20": 0.11125928610563278,
"rewards/frontier_coverage_25": 0.07660634070634842,
"rewards/frontier_coverage_5": 0.11995794028043746,
"rewards/frontier_ece_reward": 0.004759292863309383,
"rewards/frontier_entropy_batch_reward": -0.19996359348297119,
"signal/accuracy_reward/centered_abs_mean": 0.090301513671875,
"signal/accuracy_reward/group_bin_occupancy": 0.173828125,
"signal/accuracy_reward/group_std_mean": 0.1269981548190117,
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0451507568359375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0451507568359375,
"signal/advantage_abs_mean": 0.06951765716075897,
"signal/advantage_pre_scale_abs_mean": 0.06951765716075897,
"signal/advantage_pre_scale_std": 0.10639693737030029,
"signal/advantage_std": 0.10639693737030029,
"signal/brier_reward/centered_abs_mean": 0.12400663793087005,
"signal/brier_reward/group_bin_occupancy": 0.833203125,
"signal/brier_reward/group_std_mean": 0.16084616780281066,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015500829741358756,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015500829741358756,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0031059539876878263,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.695703125,
"signal/frontier_aurc_reward/group_std_mean": 0.005166613683104515,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.5596576567040755e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.5596576567040755e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16344794929027556,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8609375,
"signal/frontier_coverage_1/group_std_mean": 0.21142135560512543,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029257182497531174,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029257182497531174,
"signal/frontier_coverage_10/centered_abs_mean": 0.16344794929027556,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8609375,
"signal/frontier_coverage_10/group_std_mean": 0.21142135560512543,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029257182497531174,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029257182497531174,
"signal/frontier_coverage_15/centered_abs_mean": 0.16335625648498536,
"signal/frontier_coverage_15/group_bin_occupancy": 0.860546875,
"signal/frontier_coverage_15/group_std_mean": 0.2113026887178421,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029240769799798727,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029240769799798727,
"signal/frontier_coverage_20/centered_abs_mean": 0.15308941304683685,
"signal/frontier_coverage_20/group_bin_occupancy": 0.86015625,
"signal/frontier_coverage_20/group_std_mean": 0.19825595915317534,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002740300307050347,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002740300307050347,
"signal/frontier_coverage_25/centered_abs_mean": 0.09508876204490661,
"signal/frontier_coverage_25/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_25/group_std_mean": 0.12375225573778152,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017020887462422252,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017020887462422252,
"signal/frontier_coverage_5/centered_abs_mean": 0.16344794929027556,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8609375,
"signal/frontier_coverage_5/group_std_mean": 0.21142135560512543,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029257182497531174,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029257182497531174,
"signal/frontier_ece_reward/centered_abs_mean": 0.006654571555554867,
"signal/frontier_ece_reward/group_bin_occupancy": 0.634375,
"signal/frontier_ece_reward/group_std_mean": 0.008567211776971817,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008318214444443583,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008318214444443583,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2636649996042252,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72421875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3369534254074097,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03295812495052815,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03295812495052815,
"step": 180
},
{
"calibration/aurc": 0.2965379623783643,
"calibration/batch_distribution_entropy": 0.9852064048622537,
"calibration/batch_entropy_100bins": 0.9720746322070163,
"calibration/batch_entropy_10bins": 0.9852064048622537,
"calibration/batch_entropy_50bins": 0.980828527810799,
"calibration/batch_uniqueness": 0.9545549219314161,
"calibration/buffer_distribution_entropy": 0.9986850434371703,
"calibration/buffer_entropy_100bins": 0.9988615389589619,
"calibration/buffer_entropy_10bins": 0.9986850434371703,
"calibration/buffer_entropy_50bins": 0.9989465814800556,
"calibration/confidence_entropy": 0.49104615217744146,
"calibration/coverage@0%": 0.0344017551369863,
"calibration/coverage@1%": 0.0344017551369863,
"calibration/coverage@10%": 0.20481057363013697,
"calibration/coverage@15%": 0.3334041707436399,
"calibration/coverage@20%": 0.46352281066536205,
"calibration/coverage@25%": 0.5444043542074364,
"calibration/coverage@30%": 0.6159162487769081,
"calibration/coverage@5%": 0.0793503852739726,
"calibration/ece": 0.12458582109149696,
"calibration/mean_confidence": 0.493458359695666,
"calibration/prompt_uniqueness": 0.8485828157518209,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 720.4,
"completions/max_terminated_length": 523.0,
"completions/mean_length": 209.785546875,
"completions/mean_terminated_length": 209.65625915527343,
"completions/min_length": 99.2,
"completions/min_terminated_length": 99.2,
"epoch": 0.592,
"grad_norm": 0.0010041077621281147,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 630830114.0,
"reward": 0.8443014621734619,
"reward_std": 0.09108059257268905,
"rewards/accuracy_reward": 0.5109375,
"rewards/brier_reward": 0.7945732355117798,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.003106560418382287,
"rewards/frontier_coverage_1": 0.1313086122274399,
"rewards/frontier_coverage_10": 0.1313086122274399,
"rewards/frontier_coverage_15": 0.13125890344381333,
"rewards/frontier_coverage_20": 0.12137791365385056,
"rewards/frontier_coverage_25": 0.07633394300937653,
"rewards/frontier_coverage_5": 0.1313086122274399,
"rewards/frontier_ece_reward": 0.005067649204283953,
"rewards/frontier_entropy_batch_reward": -0.19127190113067627,
"signal/accuracy_reward/centered_abs_mean": 0.1004638671875,
"signal/accuracy_reward/group_bin_occupancy": 0.17265625,
"signal/accuracy_reward/group_std_mean": 0.13193922638893127,
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05023193359375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05023193359375,
"signal/advantage_abs_mean": 0.0713750347495079,
"signal/advantage_pre_scale_abs_mean": 0.0713750347495079,
"signal/advantage_pre_scale_std": 0.10935924351215362,
"signal/advantage_std": 0.10935924351215362,
"signal/brier_reward/centered_abs_mean": 0.12105749100446701,
"signal/brier_reward/group_bin_occupancy": 0.844140625,
"signal/brier_reward/group_std_mean": 0.15631654858589172,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015132186375558377,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015132186375558377,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003012161422520876,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.707421875,
"signal/frontier_aurc_reward/group_std_mean": 0.005084943398833275,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.3917687182547525e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.3917687182547525e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17011265456676483,
"signal/frontier_coverage_1/group_bin_occupancy": 0.873046875,
"signal/frontier_coverage_1/group_std_mean": 0.21760738790035247,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003045016434043646,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003045016434043646,
"signal/frontier_coverage_10/centered_abs_mean": 0.17011265456676483,
"signal/frontier_coverage_10/group_bin_occupancy": 0.873046875,
"signal/frontier_coverage_10/group_std_mean": 0.21760738790035247,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003045016434043646,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003045016434043646,
"signal/frontier_coverage_15/centered_abs_mean": 0.17000848054885864,
"signal/frontier_coverage_15/group_bin_occupancy": 0.873046875,
"signal/frontier_coverage_15/group_std_mean": 0.21747492849826813,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003043151693418622,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003043151693418622,
"signal/frontier_coverage_20/centered_abs_mean": 0.15837956964969635,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_20/group_std_mean": 0.2029614955186844,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028349942062050105,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028349942062050105,
"signal/frontier_coverage_25/centered_abs_mean": 0.09450580030679703,
"signal/frontier_coverage_25/group_bin_occupancy": 0.884375,
"signal/frontier_coverage_25/group_std_mean": 0.12207887768745422,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016916538355872036,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016916538355872036,
"signal/frontier_coverage_5/centered_abs_mean": 0.17011265456676483,
"signal/frontier_coverage_5/group_bin_occupancy": 0.873046875,
"signal/frontier_coverage_5/group_std_mean": 0.21760738790035247,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003045016434043646,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003045016434043646,
"signal/frontier_ece_reward/centered_abs_mean": 0.006299029383808374,
"signal/frontier_ece_reward/group_bin_occupancy": 0.619140625,
"signal/frontier_ece_reward/group_std_mean": 0.008179245609790086,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007873786729760468,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007873786729760468,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26203358769416807,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.737890625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33473817110061643,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03275419846177101,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03275419846177101,
"step": 185
},
{
"calibration/aurc": 0.23255238602768316,
"calibration/batch_distribution_entropy": 0.9705031192858022,
"calibration/batch_entropy_100bins": 0.9659821492408802,
"calibration/batch_entropy_10bins": 0.9705031192858022,
"calibration/batch_entropy_50bins": 0.9722374803618952,
"calibration/batch_uniqueness": 0.951416015625,
"calibration/buffer_distribution_entropy": 0.9986525167285963,
"calibration/buffer_entropy_100bins": 0.998873465649002,
"calibration/buffer_entropy_10bins": 0.9986525167285963,
"calibration/buffer_entropy_50bins": 0.9989401550363587,
"calibration/confidence_entropy": 0.4738903778069483,
"calibration/coverage@0%": 0.066015625,
"calibration/coverage@1%": 0.067578125,
"calibration/coverage@10%": 0.28515625,
"calibration/coverage@15%": 0.3921875,
"calibration/coverage@20%": 0.471875,
"calibration/coverage@25%": 0.570703125,
"calibration/coverage@30%": 0.68125,
"calibration/coverage@5%": 0.166015625,
"calibration/ece": 0.09631291270696571,
"calibration/mean_confidence": 0.4673449272543544,
"calibration/prompt_uniqueness": 0.84677734375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 756.8,
"completions/max_terminated_length": 747.8,
"completions/mean_length": 205.48544921875,
"completions/mean_terminated_length": 205.35531005859374,
"completions/min_length": 100.4,
"completions/min_terminated_length": 100.4,
"epoch": 0.608,
"grad_norm": 0.000828551419544965,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 647933773.0,
"reward": 0.8504921317100524,
"reward_std": 0.08589145988225937,
"rewards/accuracy_reward": 0.51875,
"rewards/brier_reward": 0.8136067509651184,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002254353184252977,
"rewards/frontier_coverage_1": 0.1501113146543503,
"rewards/frontier_coverage_10": 0.1501113146543503,
"rewards/frontier_coverage_15": 0.1499549314379692,
"rewards/frontier_coverage_20": 0.13523284196853638,
"rewards/frontier_coverage_25": 0.08837753832340241,
"rewards/frontier_coverage_5": 0.1501113146543503,
"rewards/frontier_ece_reward": 0.005275832582265138,
"rewards/frontier_entropy_batch_reward": -0.20682401657104493,
"signal/accuracy_reward/centered_abs_mean": 0.0917724609375,
"signal/accuracy_reward/group_bin_occupancy": 0.16796875,
"signal/accuracy_reward/group_std_mean": 0.12107746154069901,
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04588623046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04588623046875,
"signal/advantage_abs_mean": 0.06623770222067833,
"signal/advantage_pre_scale_abs_mean": 0.06623770222067833,
"signal/advantage_pre_scale_std": 0.10089752227067947,
"signal/advantage_std": 0.10089752227067947,
"signal/brier_reward/centered_abs_mean": 0.11607680916786194,
"signal/brier_reward/group_bin_occupancy": 0.835546875,
"signal/brier_reward/group_std_mean": 0.14877235889434814,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014509601145982742,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014509601145982742,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020517975790426135,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.727734375,
"signal/frontier_aurc_reward/group_std_mean": 0.003500781860202551,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.672717430163175e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.672717430163175e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17476378679275512,
"signal/frontier_coverage_1/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_1/group_std_mean": 0.2204089343547821,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031282717362046243,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031282717362046243,
"signal/frontier_coverage_10/centered_abs_mean": 0.17476378679275512,
"signal/frontier_coverage_10/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_10/group_std_mean": 0.2204089343547821,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031282717362046243,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031282717362046243,
"signal/frontier_coverage_15/centered_abs_mean": 0.17451978027820586,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8765625,
"signal/frontier_coverage_15/group_std_mean": 0.22010447680950165,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003123903926461935,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003123903926461935,
"signal/frontier_coverage_20/centered_abs_mean": 0.15353093445301055,
"signal/frontier_coverage_20/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_20/group_std_mean": 0.19391053915023804,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027482036035507917,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027482036035507917,
"signal/frontier_coverage_25/centered_abs_mean": 0.09261592626571655,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8921875,
"signal/frontier_coverage_25/group_std_mean": 0.1174243450164795,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016578249633312225,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016578249633312225,
"signal/frontier_coverage_5/centered_abs_mean": 0.17476378679275512,
"signal/frontier_coverage_5/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_5/group_std_mean": 0.2204089343547821,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031282717362046243,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031282717362046243,
"signal/frontier_ece_reward/centered_abs_mean": 0.0058389359153807165,
"signal/frontier_ece_reward/group_bin_occupancy": 0.5984375,
"signal/frontier_ece_reward/group_std_mean": 0.007358342409133911,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007298669894225896,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007298669894225896,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27921711802482607,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.740234375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3562686026096344,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03490213975310326,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03490213975310326,
"step": 190
},
{
"calibration/aurc": 0.24642497306977423,
"calibration/batch_distribution_entropy": 0.9901574898144636,
"calibration/batch_entropy_100bins": 0.9759458013037052,
"calibration/batch_entropy_10bins": 0.9901574898144636,
"calibration/batch_entropy_50bins": 0.9836366792813663,
"calibration/batch_uniqueness": 0.955316162109375,
"calibration/buffer_distribution_entropy": 0.9987390546134606,
"calibration/buffer_entropy_100bins": 0.9989018115142876,
"calibration/buffer_entropy_10bins": 0.9987390546134606,
"calibration/buffer_entropy_50bins": 0.9989672677498665,
"calibration/confidence_entropy": 0.5117506475315936,
"calibration/coverage@0%": 0.030859375,
"calibration/coverage@1%": 0.030859375,
"calibration/coverage@10%": 0.20859375,
"calibration/coverage@15%": 0.33515625,
"calibration/coverage@20%": 0.439453125,
"calibration/coverage@25%": 0.546875,
"calibration/coverage@30%": 0.626171875,
"calibration/coverage@5%": 0.05625,
"calibration/ece": 0.1060162229629203,
"calibration/mean_confidence": 0.48300620790316096,
"calibration/prompt_uniqueness": 0.856640625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 878.8,
"completions/max_terminated_length": 470.2,
"completions/mean_length": 202.49697265625,
"completions/mean_terminated_length": 202.23670043945313,
"completions/min_length": 101.4,
"completions/min_terminated_length": 101.4,
"epoch": 0.624,
"grad_norm": 0.0011328250402584672,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 665351246.0,
"reward": 0.8511114478111267,
"reward_std": 0.0926138237118721,
"rewards/accuracy_reward": 0.52197265625,
"rewards/brier_reward": 0.8015788435935974,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.002650001086294651,
"rewards/frontier_coverage_1": 0.1292887583374977,
"rewards/frontier_coverage_10": 0.12927541583776475,
"rewards/frontier_coverage_15": 0.1291019305586815,
"rewards/frontier_coverage_20": 0.11305647492408752,
"rewards/frontier_coverage_25": 0.07279382422566413,
"rewards/frontier_coverage_5": 0.1292887583374977,
"rewards/frontier_ece_reward": 0.004003529995679855,
"rewards/frontier_entropy_batch_reward": -0.1836717516183853,
"signal/accuracy_reward/centered_abs_mean": 0.103009033203125,
"signal/accuracy_reward/group_bin_occupancy": 0.175,
"signal/accuracy_reward/group_std_mean": 0.13735188841819762,
"signal/accuracy_reward/group_zero_std_frac": 0.6,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0515045166015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0515045166015625,
"signal/advantage_abs_mean": 0.07203299552202225,
"signal/advantage_pre_scale_abs_mean": 0.07203299552202225,
"signal/advantage_pre_scale_std": 0.10877863466739654,
"signal/advantage_std": 0.10877863466739654,
"signal/brier_reward/centered_abs_mean": 0.12054053992033005,
"signal/brier_reward/group_bin_occupancy": 0.860546875,
"signal/brier_reward/group_std_mean": 0.15555098354816438,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015067567490041256,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015067567490041256,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023642276879400014,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73203125,
"signal/frontier_aurc_reward/group_std_mean": 0.0038254653103649617,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2319672502344474e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2319672502344474e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1756080448627472,
"signal/frontier_coverage_1/group_bin_occupancy": 0.879296875,
"signal/frontier_coverage_1/group_std_mean": 0.2242843985557556,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031433838419616224,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031433838419616224,
"signal/frontier_coverage_10/centered_abs_mean": 0.1755845367908478,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8796875,
"signal/frontier_coverage_10/group_std_mean": 0.22425468266010284,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031429629772901533,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031429629772901533,
"signal/frontier_coverage_15/centered_abs_mean": 0.17519534826278688,
"signal/frontier_coverage_15/group_bin_occupancy": 0.880078125,
"signal/frontier_coverage_15/group_std_mean": 0.2237583041191101,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031359965912997724,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031359965912997724,
"signal/frontier_coverage_20/centered_abs_mean": 0.14687740206718444,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8765625,
"signal/frontier_coverage_20/group_std_mean": 0.1877150535583496,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026291054207831623,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026291054207831623,
"signal/frontier_coverage_25/centered_abs_mean": 0.08315311372280121,
"signal/frontier_coverage_25/group_bin_occupancy": 0.90390625,
"signal/frontier_coverage_25/group_std_mean": 0.10691126137971878,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014884406700730324,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014884406700730324,
"signal/frontier_coverage_5/centered_abs_mean": 0.1756080448627472,
"signal/frontier_coverage_5/group_bin_occupancy": 0.879296875,
"signal/frontier_coverage_5/group_std_mean": 0.2242843985557556,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031433838419616224,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031433838419616224,
"signal/frontier_ece_reward/centered_abs_mean": 0.005254755448549986,
"signal/frontier_ece_reward/group_bin_occupancy": 0.621484375,
"signal/frontier_ece_reward/group_std_mean": 0.006762361247092485,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006568444310687483,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006568444310687483,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.262921079993248,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.723828125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.341296112537384,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032865134999156,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032865134999156,
"step": 195
},
{
"calibration/aurc": 0.26412426599353245,
"calibration/batch_distribution_entropy": 0.9795424548247323,
"calibration/batch_entropy_100bins": 0.9705601598425802,
"calibration/batch_entropy_10bins": 0.9795424548247323,
"calibration/batch_entropy_50bins": 0.9781327762313137,
"calibration/batch_uniqueness": 0.9532878416500845,
"calibration/buffer_distribution_entropy": 0.9988867867659781,
"calibration/buffer_entropy_100bins": 0.9989921477324802,
"calibration/buffer_entropy_10bins": 0.9988867867659781,
"calibration/buffer_entropy_50bins": 0.9990694759295478,
"calibration/confidence_entropy": 0.4984931459943328,
"calibration/coverage@0%": 0.05156555772994129,
"calibration/coverage@1%": 0.08789368272994129,
"calibration/coverage@10%": 0.2625030577299413,
"calibration/coverage@15%": 0.3410431445694716,
"calibration/coverage@20%": 0.3887123899217221,
"calibration/coverage@25%": 0.5305237891389433,
"calibration/coverage@30%": 0.6500649767612525,
"calibration/coverage@5%": 0.15898743272994129,
"calibration/ece": 0.17953614494430098,
"calibration/mean_confidence": 0.5356850748597626,
"calibration/prompt_uniqueness": 0.8523553854220864,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 933.6,
"completions/max_terminated_length": 651.6,
"completions/mean_length": 207.77373046875,
"completions/mean_terminated_length": 207.12470397949218,
"completions/min_length": 98.8,
"completions/min_terminated_length": 98.8,
"epoch": 0.64,
"grad_norm": 0.0009690375300124288,
"learning_rate": 1e-06,
"loss": 0.0012,
"num_tokens": 682821537.0,
"reward": 0.8684950113296509,
"reward_std": 0.08685783594846726,
"rewards/accuracy_reward": 0.5673828125,
"rewards/brier_reward": 0.80077143907547,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.0026358509669080375,
"rewards/frontier_coverage_1": 0.09513780698180199,
"rewards/frontier_coverage_10": 0.09511241912841797,
"rewards/frontier_coverage_15": 0.09505020901560783,
"rewards/frontier_coverage_20": 0.08291480019688606,
"rewards/frontier_coverage_25": 0.05603170394897461,
"rewards/frontier_coverage_5": 0.09513780698180199,
"rewards/frontier_ece_reward": 0.004073908319696784,
"rewards/frontier_entropy_batch_reward": -0.19807116389274598,
"signal/accuracy_reward/centered_abs_mean": 0.081982421875,
"signal/accuracy_reward/group_bin_occupancy": 0.1671875,
"signal/accuracy_reward/group_std_mean": 0.11289723217487335,
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0409912109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0409912109375,
"signal/advantage_abs_mean": 0.06701909229159356,
"signal/advantage_pre_scale_abs_mean": 0.06701909229159356,
"signal/advantage_pre_scale_std": 0.10343301296234131,
"signal/advantage_std": 0.10343301296234131,
"signal/brier_reward/centered_abs_mean": 0.11489285230636596,
"signal/brier_reward/group_bin_occupancy": 0.845703125,
"signal/brier_reward/group_std_mean": 0.14897901713848113,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014361606538295746,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014361606538295746,
"signal/format_reward/centered_abs_mean": 0.00106201171875,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.002154887933284044,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000531005859375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000531005859375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027433151146396993,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7265625,
"signal/frontier_aurc_reward/group_std_mean": 0.0047297993209213015,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.910533752990887e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.910533752990887e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14741408526897432,
"signal/frontier_coverage_1/group_bin_occupancy": 0.873046875,
"signal/frontier_coverage_1/group_std_mean": 0.1911382108926773,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026387120597064495,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026387120597064495,
"signal/frontier_coverage_10/centered_abs_mean": 0.14739079475402833,
"signal/frontier_coverage_10/group_bin_occupancy": 0.873046875,
"signal/frontier_coverage_10/group_std_mean": 0.19110864102840425,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026382951997220515,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026382951997220515,
"signal/frontier_coverage_15/centered_abs_mean": 0.146929270029068,
"signal/frontier_coverage_15/group_bin_occupancy": 0.87421875,
"signal/frontier_coverage_15/group_std_mean": 0.19052064120769502,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026300338562577964,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026300338562577964,
"signal/frontier_coverage_20/centered_abs_mean": 0.11997720450162888,
"signal/frontier_coverage_20/group_bin_occupancy": 0.866015625,
"signal/frontier_coverage_20/group_std_mean": 0.15618555545806884,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002147591905668378,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002147591905668378,
"signal/frontier_coverage_25/centered_abs_mean": 0.06797240227460861,
"signal/frontier_coverage_25/group_bin_occupancy": 0.901953125,
"signal/frontier_coverage_25/group_std_mean": 0.0883466050028801,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012167059583589434,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012167059583589434,
"signal/frontier_coverage_5/centered_abs_mean": 0.14741408526897432,
"signal/frontier_coverage_5/group_bin_occupancy": 0.873046875,
"signal/frontier_coverage_5/group_std_mean": 0.1911382108926773,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026387120597064495,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026387120597064495,
"signal/frontier_ece_reward/centered_abs_mean": 0.005082414392381907,
"signal/frontier_ece_reward/group_bin_occupancy": 0.602734375,
"signal/frontier_ece_reward/group_std_mean": 0.006547214556485414,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006353017990477384,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006353017990477384,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2626469016075134,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74609375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3328437089920044,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03283086270093918,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03283086270093918,
"step": 200
},
{
"epoch": 0.64,
"eval_calibration/aurc": 0.4118891587153638,
"eval_calibration/batch_distribution_entropy": 0.9499651057906557,
"eval_calibration/batch_entropy_100bins": 0.7164103655215839,
"eval_calibration/batch_entropy_10bins": 0.9499651057906557,
"eval_calibration/batch_entropy_50bins": 0.8018192330251381,
"eval_calibration/batch_uniqueness": 0.90625,
"eval_calibration/buffer_distribution_entropy": 0.9989505087605473,
"eval_calibration/buffer_entropy_100bins": 0.9990211296746674,
"eval_calibration/buffer_entropy_10bins": 0.9989505087605473,
"eval_calibration/buffer_entropy_50bins": 0.9991013446788948,
"eval_calibration/confidence_entropy": 0.512406467521397,
"eval_calibration/coverage@0%": 0.03125,
"eval_calibration/coverage@1%": 0.03125,
"eval_calibration/coverage@10%": 0.03125,
"eval_calibration/coverage@15%": 0.1484375,
"eval_calibration/coverage@20%": 0.3046875,
"eval_calibration/coverage@25%": 0.3828125,
"eval_calibration/coverage@30%": 0.4375,
"eval_calibration/coverage@5%": 0.03125,
"eval_calibration/ece": 0.20437410058850125,
"eval_calibration/mean_confidence": 0.4736161076469313,
"eval_calibration/prompt_uniqueness": 0.90625,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 420.0,
"eval_completions/max_terminated_length": 420.0,
"eval_completions/mean_length": 208.73026657104492,
"eval_completions/mean_terminated_length": 208.73026657104492,
"eval_completions/min_length": 124.75,
"eval_completions/min_terminated_length": 124.75,
"eval_loss": 0.0,
"eval_num_tokens": 682821537.0,
"eval_reward": 0.7161590754985809,
"eval_reward_std": 0.22477618232369423,
"eval_rewards/accuracy_reward": 0.44921875,
"eval_rewards/brier_reward": 0.7985615581274033,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.003726774360984564,
"eval_rewards/frontier_coverage_1": 0.17506344616413116,
"eval_rewards/frontier_coverage_10": 0.17506344616413116,
"eval_rewards/frontier_coverage_15": 0.17469647899270058,
"eval_rewards/frontier_coverage_20": 0.13535447604954243,
"eval_rewards/frontier_coverage_25": 0.07677387073636055,
"eval_rewards/frontier_coverage_5": 0.17506344616413116,
"eval_rewards/frontier_ece_reward": 0.0037689171731472015,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 21.9268,
"eval_samples_per_second": 22.803,
"eval_signal/accuracy_reward/centered_abs_mean": 0.474609375,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.4946126714348793,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2373046875,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2373046875,
"eval_signal/advantage_abs_mean": 0.20923983305692673,
"eval_signal/advantage_pre_scale_abs_mean": 0.20923983305692673,
"eval_signal/advantage_pre_scale_std": 0.22233285754919052,
"eval_signal/advantage_std": 0.22233285754919052,
"eval_signal/brier_reward/centered_abs_mean": 0.182050883769989,
"eval_signal/brier_reward/group_bin_occupancy": 0.8828125,
"eval_signal/brier_reward/group_std_mean": 0.23487457260489464,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022756360471248627,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.022756360471248627,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.00498336530290544,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6171875,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.009891956811770797,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.920223444874864e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.920223444874864e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3510345071554184,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4272778555750847,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006283517461270094,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006283517461270094,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3510345071554184,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4272778555750847,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006283517461270094,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006283517461270094,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3502937853336334,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_15/group_std_mean": 0.42644187808036804,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0062702588038519025,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0062702588038519025,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.2606714144349098,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_20/group_std_mean": 0.32316891103982925,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004666018299758434,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004666018299758434,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.12922955304384232,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_25/group_std_mean": 0.16818556562066078,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023132089991122484,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023132089991122484,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3510345071554184,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4272778555750847,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006283517461270094,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006283517461270094,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.006779930088669062,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.859375,
"eval_signal/frontier_ece_reward/group_std_mean": 0.0086629968136549,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008474912610836327,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008474912610836327,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.125,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.182,
"step": 200
},
{
"calibration/aurc": 0.4163644231985885,
"calibration/batch_distribution_entropy": 0.9749810507515579,
"calibration/batch_entropy_100bins": 0.966999722833614,
"calibration/batch_entropy_10bins": 0.9749810507515579,
"calibration/batch_entropy_50bins": 0.9740783954275158,
"calibration/batch_uniqueness": 0.9524871826171875,
"calibration/buffer_distribution_entropy": 0.9990417591124992,
"calibration/buffer_entropy_100bins": 0.9990906778413995,
"calibration/buffer_entropy_10bins": 0.9990417591124992,
"calibration/buffer_entropy_50bins": 0.9991687120016991,
"calibration/confidence_entropy": 0.5268865931440982,
"calibration/coverage@0%": 0.005078125,
"calibration/coverage@1%": 0.005078125,
"calibration/coverage@10%": 0.009375,
"calibration/coverage@15%": 0.014453125,
"calibration/coverage@20%": 0.0734375,
"calibration/coverage@25%": 0.187109375,
"calibration/coverage@30%": 0.301953125,
"calibration/coverage@5%": 0.005078125,
"calibration/ece": 0.09387041510133062,
"calibration/mean_confidence": 0.47938372087901754,
"calibration/prompt_uniqueness": 0.85546875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 676.0,
"completions/max_terminated_length": 459.2,
"completions/mean_length": 204.43974609375,
"completions/mean_terminated_length": 204.30895385742187,
"completions/min_length": 99.8,
"completions/min_terminated_length": 99.8,
"epoch": 0.656,
"grad_norm": 0.001068526296876371,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 699771544.0,
"reward": 0.8357076287269593,
"reward_std": 0.09418870508670807,
"rewards/accuracy_reward": 0.49921875,
"rewards/brier_reward": 0.7890438675880432,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.003022062359377742,
"rewards/frontier_coverage_1": 0.1237358182668686,
"rewards/frontier_coverage_10": 0.1237358182668686,
"rewards/frontier_coverage_15": 0.12354113608598709,
"rewards/frontier_coverage_20": 0.09611983597278595,
"rewards/frontier_coverage_25": 0.058796758949756625,
"rewards/frontier_coverage_5": 0.1237358182668686,
"rewards/frontier_ece_reward": 0.0032737540546804666,
"rewards/frontier_entropy_batch_reward": -0.19495902359485626,
"signal/accuracy_reward/centered_abs_mean": 0.09501953125,
"signal/accuracy_reward/group_bin_occupancy": 0.1734375,
"signal/accuracy_reward/group_std_mean": 0.1296718657016754,
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.047509765625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.047509765625,
"signal/advantage_abs_mean": 0.07281130701303482,
"signal/advantage_pre_scale_abs_mean": 0.07281130701303482,
"signal/advantage_pre_scale_std": 0.11112865060567856,
"signal/advantage_std": 0.11112865060567856,
"signal/brier_reward/centered_abs_mean": 0.12090405225753784,
"signal/brier_reward/group_bin_occupancy": 0.863671875,
"signal/brier_reward/group_std_mean": 0.15587877333164216,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01511300653219223,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01511300653219223,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002597982669249177,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73671875,
"signal/frontier_aurc_reward/group_std_mean": 0.0043062128126621245,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.6503888734150677e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.6503888734150677e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15984987318515778,
"signal/frontier_coverage_1/group_bin_occupancy": 0.87578125,
"signal/frontier_coverage_1/group_std_mean": 0.20615570545196532,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002861312637105584,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002861312637105584,
"signal/frontier_coverage_10/centered_abs_mean": 0.15984987318515778,
"signal/frontier_coverage_10/group_bin_occupancy": 0.87578125,
"signal/frontier_coverage_10/group_std_mean": 0.20615570545196532,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002861312637105584,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002861312637105584,
"signal/frontier_coverage_15/centered_abs_mean": 0.1595711052417755,
"signal/frontier_coverage_15/group_bin_occupancy": 0.87578125,
"signal/frontier_coverage_15/group_std_mean": 0.205799001455307,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002856322703883052,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002856322703883052,
"signal/frontier_coverage_20/centered_abs_mean": 0.11935619711875915,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8671875,
"signal/frontier_coverage_20/group_std_mean": 0.15463128089904785,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002136475685983896,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002136475685983896,
"signal/frontier_coverage_25/centered_abs_mean": 0.06763988435268402,
"signal/frontier_coverage_25/group_bin_occupancy": 0.902734375,
"signal/frontier_coverage_25/group_std_mean": 0.0881109967827797,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012107538990676404,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012107538990676404,
"signal/frontier_coverage_5/centered_abs_mean": 0.15984987318515778,
"signal/frontier_coverage_5/group_bin_occupancy": 0.87578125,
"signal/frontier_coverage_5/group_std_mean": 0.20615570545196532,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002861312637105584,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002861312637105584,
"signal/frontier_ece_reward/centered_abs_mean": 0.004474427737295628,
"signal/frontier_ece_reward/group_bin_occupancy": 0.625390625,
"signal/frontier_ece_reward/group_std_mean": 0.005852994229644537,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005593034671619535,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005593034671619535,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2663916915655136,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.741015625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34035165309906007,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0332989614456892,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0332989614456892,
"step": 205
},
{
"calibration/aurc": 0.28483097130697954,
"calibration/batch_distribution_entropy": 0.9815272458034034,
"calibration/batch_entropy_100bins": 0.9706086431974328,
"calibration/batch_entropy_10bins": 0.9815272458034034,
"calibration/batch_entropy_50bins": 0.9791595614288557,
"calibration/batch_uniqueness": 0.953887939453125,
"calibration/buffer_distribution_entropy": 0.9991894083094129,
"calibration/buffer_entropy_100bins": 0.9991681415606204,
"calibration/buffer_entropy_10bins": 0.9991894083094129,
"calibration/buffer_entropy_50bins": 0.9992375331871519,
"calibration/confidence_entropy": 0.5007356086799657,
"calibration/coverage@0%": 0.0390625,
"calibration/coverage@1%": 0.0390625,
"calibration/coverage@10%": 0.17421875,
"calibration/coverage@15%": 0.223828125,
"calibration/coverage@20%": 0.317578125,
"calibration/coverage@25%": 0.3875,
"calibration/coverage@30%": 0.490625,
"calibration/coverage@5%": 0.073046875,
"calibration/ece": 0.11163271809879152,
"calibration/mean_confidence": 0.48953429901749335,
"calibration/prompt_uniqueness": 0.85048828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 885.8,
"completions/max_terminated_length": 472.8,
"completions/mean_length": 202.37763671875,
"completions/mean_terminated_length": 202.11712341308595,
"completions/min_length": 93.8,
"completions/min_terminated_length": 93.8,
"epoch": 0.672,
"grad_norm": 0.0008770785643719137,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 716757331.0,
"reward": 0.8457530260086059,
"reward_std": 0.08760204017162324,
"rewards/accuracy_reward": 0.51572265625,
"rewards/brier_reward": 0.799196469783783,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0027179742231965066,
"rewards/frontier_coverage_1": 0.13748425543308257,
"rewards/frontier_coverage_10": 0.13748425543308257,
"rewards/frontier_coverage_15": 0.13730760663747787,
"rewards/frontier_coverage_20": 0.10792672708630562,
"rewards/frontier_coverage_25": 0.06943527311086654,
"rewards/frontier_coverage_5": 0.13748425543308257,
"rewards/frontier_ece_reward": 0.003610169980674982,
"rewards/frontier_entropy_batch_reward": -0.20262654721736909,
"signal/accuracy_reward/centered_abs_mean": 0.102716064453125,
"signal/accuracy_reward/group_bin_occupancy": 0.169140625,
"signal/accuracy_reward/group_std_mean": 0.131059630215168,
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0513580322265625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0513580322265625,
"signal/advantage_abs_mean": 0.06931476294994354,
"signal/advantage_pre_scale_abs_mean": 0.06931476294994354,
"signal/advantage_pre_scale_std": 0.1052887812256813,
"signal/advantage_std": 0.1052887812256813,
"signal/brier_reward/centered_abs_mean": 0.1212724655866623,
"signal/brier_reward/group_bin_occupancy": 0.839453125,
"signal/brier_reward/group_std_mean": 0.15539450347423553,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015159058198332787,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015159058198332787,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024694956839084624,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71484375,
"signal/frontier_aurc_reward/group_std_mean": 0.004247998539358378,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.420397090143524e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.420397090143524e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1761551171541214,
"signal/frontier_coverage_1/group_bin_occupancy": 0.861328125,
"signal/frontier_coverage_1/group_std_mean": 0.2236780822277069,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003153176372870803,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003153176372870803,
"signal/frontier_coverage_10/centered_abs_mean": 0.1761551171541214,
"signal/frontier_coverage_10/group_bin_occupancy": 0.861328125,
"signal/frontier_coverage_10/group_std_mean": 0.2236780822277069,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003153176372870803,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003153176372870803,
"signal/frontier_coverage_15/centered_abs_mean": 0.17571605741977692,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8625,
"signal/frontier_coverage_15/group_std_mean": 0.22313523888587952,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031453173141926527,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031453173141926527,
"signal/frontier_coverage_20/centered_abs_mean": 0.1281582921743393,
"signal/frontier_coverage_20/group_bin_occupancy": 0.857421875,
"signal/frontier_coverage_20/group_std_mean": 0.1638483375310898,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022940333001315594,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022940333001315594,
"signal/frontier_coverage_25/centered_abs_mean": 0.07442445904016495,
"signal/frontier_coverage_25/group_bin_occupancy": 0.890234375,
"signal/frontier_coverage_25/group_std_mean": 0.09590905010700226,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013321977807208897,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013321977807208897,
"signal/frontier_coverage_5/centered_abs_mean": 0.1761551171541214,
"signal/frontier_coverage_5/group_bin_occupancy": 0.861328125,
"signal/frontier_coverage_5/group_std_mean": 0.2236780822277069,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003153176372870803,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003153176372870803,
"signal/frontier_ece_reward/centered_abs_mean": 0.004578849300742149,
"signal/frontier_ece_reward/group_bin_occupancy": 0.598046875,
"signal/frontier_ece_reward/group_std_mean": 0.005887005571275949,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005723561625927687,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005723561625927687,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2648188531398773,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.721875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34055405855178833,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03310235664248466,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03310235664248466,
"step": 210
},
{
"calibration/aurc": 0.34530715915389093,
"calibration/batch_distribution_entropy": 0.9766022674114094,
"calibration/batch_entropy_100bins": 0.9668047432555472,
"calibration/batch_entropy_10bins": 0.9766022674114094,
"calibration/batch_entropy_50bins": 0.9743620147547288,
"calibration/batch_uniqueness": 0.952294921875,
"calibration/buffer_distribution_entropy": 0.9992386461143514,
"calibration/buffer_entropy_100bins": 0.9992085238697724,
"calibration/buffer_entropy_10bins": 0.9992386461143514,
"calibration/buffer_entropy_50bins": 0.9992727921396962,
"calibration/confidence_entropy": 0.5042982261283598,
"calibration/coverage@0%": 0.0109375,
"calibration/coverage@1%": 0.0109375,
"calibration/coverage@10%": 0.091796875,
"calibration/coverage@15%": 0.215234375,
"calibration/coverage@20%": 0.3265625,
"calibration/coverage@25%": 0.392578125,
"calibration/coverage@30%": 0.5296875,
"calibration/coverage@5%": 0.0171875,
"calibration/ece": 0.12370065837025182,
"calibration/mean_confidence": 0.49322062629819213,
"calibration/prompt_uniqueness": 0.851513671875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 444.4,
"completions/max_terminated_length": 444.4,
"completions/mean_length": 202.21005859375,
"completions/mean_terminated_length": 202.21005859375,
"completions/min_length": 95.4,
"completions/min_terminated_length": 95.4,
"epoch": 0.688,
"grad_norm": 0.001138906111009419,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 733781882.0,
"reward": 0.8525039792060852,
"reward_std": 0.08989884555339814,
"rewards/accuracy_reward": 0.5345703125,
"rewards/brier_reward": 0.7973044633865356,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002551911678165197,
"rewards/frontier_coverage_1": 0.11433582976460457,
"rewards/frontier_coverage_10": 0.11433582976460457,
"rewards/frontier_coverage_15": 0.11383322924375534,
"rewards/frontier_coverage_20": 0.08399821668863297,
"rewards/frontier_coverage_25": 0.054333243519067764,
"rewards/frontier_coverage_5": 0.11433582976460457,
"rewards/frontier_ece_reward": 0.003297937847673893,
"rewards/frontier_entropy_batch_reward": -0.20293367207050322,
"signal/accuracy_reward/centered_abs_mean": 0.100048828125,
"signal/accuracy_reward/group_bin_occupancy": 0.173046875,
"signal/accuracy_reward/group_std_mean": 0.13349073976278306,
"signal/accuracy_reward/group_zero_std_frac": 0.615625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0500244140625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0500244140625,
"signal/advantage_abs_mean": 0.06954181641340255,
"signal/advantage_pre_scale_abs_mean": 0.06954181641340255,
"signal/advantage_pre_scale_std": 0.10719988644123077,
"signal/advantage_std": 0.10719988644123077,
"signal/brier_reward/centered_abs_mean": 0.11443150490522384,
"signal/brier_reward/group_bin_occupancy": 0.85390625,
"signal/brier_reward/group_std_mean": 0.1465883433818817,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01430393811315298,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01430393811315298,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002246859250590205,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.731640625,
"signal/frontier_aurc_reward/group_std_mean": 0.0038079099263995885,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0218777576228605e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0218777576228605e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1653559386730194,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_1/group_std_mean": 0.21095694303512574,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002959871245548129,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002959871245548129,
"signal/frontier_coverage_10/centered_abs_mean": 0.1653559386730194,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_10/group_std_mean": 0.21095694303512574,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002959871245548129,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002959871245548129,
"signal/frontier_coverage_15/centered_abs_mean": 0.1643199324607849,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_15/group_std_mean": 0.20968802869319916,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029413266573101284,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029413266573101284,
"signal/frontier_coverage_20/centered_abs_mean": 0.11405820548534393,
"signal/frontier_coverage_20/group_bin_occupancy": 0.86640625,
"signal/frontier_coverage_20/group_std_mean": 0.14639358520507811,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020416418788954615,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020416418788954615,
"signal/frontier_coverage_25/centered_abs_mean": 0.0672955259680748,
"signal/frontier_coverage_25/group_bin_occupancy": 0.915234375,
"signal/frontier_coverage_25/group_std_mean": 0.08634553998708724,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012045898474752903,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012045898474752903,
"signal/frontier_coverage_5/centered_abs_mean": 0.1653559386730194,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_5/group_std_mean": 0.21095694303512574,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002959871245548129,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002959871245548129,
"signal/frontier_ece_reward/centered_abs_mean": 0.0043392408639192585,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6078125,
"signal/frontier_ece_reward/group_std_mean": 0.005523344594985246,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005424051079899073,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005424051079899073,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2766480267047882,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.737890625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3514810025691986,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03458100333809853,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03458100333809853,
"step": 215
},
{
"calibration/aurc": 0.26433509062939187,
"calibration/batch_distribution_entropy": 0.978364776360986,
"calibration/batch_entropy_100bins": 0.9685694678027698,
"calibration/batch_entropy_10bins": 0.978364776360986,
"calibration/batch_entropy_50bins": 0.9765817547053196,
"calibration/batch_uniqueness": 0.9531219482421875,
"calibration/buffer_distribution_entropy": 0.9992599704743942,
"calibration/buffer_entropy_100bins": 0.999201034721138,
"calibration/buffer_entropy_10bins": 0.9992599704743942,
"calibration/buffer_entropy_50bins": 0.999264548139028,
"calibration/confidence_entropy": 0.48513173016721006,
"calibration/coverage@0%": 0.012890625,
"calibration/coverage@1%": 0.012890625,
"calibration/coverage@10%": 0.069921875,
"calibration/coverage@15%": 0.165625,
"calibration/coverage@20%": 0.28046875,
"calibration/coverage@25%": 0.505859375,
"calibration/coverage@30%": 0.727734375,
"calibration/coverage@5%": 0.025,
"calibration/ece": 0.0904218188804096,
"calibration/mean_confidence": 0.5354777583629338,
"calibration/prompt_uniqueness": 0.843310546875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 639.6,
"completions/max_terminated_length": 421.0,
"completions/mean_length": 197.9205078125,
"completions/mean_terminated_length": 197.79017944335936,
"completions/min_length": 93.2,
"completions/min_terminated_length": 93.2,
"epoch": 0.704,
"grad_norm": 0.0008411157759837806,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 750674732.0,
"reward": 0.8568742513656616,
"reward_std": 0.09010809510946274,
"rewards/accuracy_reward": 0.53876953125,
"rewards/brier_reward": 0.8050263285636902,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0029557050904259084,
"rewards/frontier_coverage_1": 0.11192709654569626,
"rewards/frontier_coverage_10": 0.11192709654569626,
"rewards/frontier_coverage_15": 0.11145668923854828,
"rewards/frontier_coverage_20": 0.08219068348407746,
"rewards/frontier_coverage_25": 0.055270007252693175,
"rewards/frontier_coverage_5": 0.11192709654569626,
"rewards/frontier_ece_reward": 0.0034165045712143184,
"rewards/frontier_entropy_batch_reward": -0.1914419263601303,
"signal/accuracy_reward/centered_abs_mean": 0.092047119140625,
"signal/accuracy_reward/group_bin_occupancy": 0.168359375,
"signal/accuracy_reward/group_std_mean": 0.1208561822772026,
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0460235595703125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0460235595703125,
"signal/advantage_abs_mean": 0.07105211615562439,
"signal/advantage_pre_scale_abs_mean": 0.07105211615562439,
"signal/advantage_pre_scale_std": 0.10825964659452439,
"signal/advantage_std": 0.10825964659452439,
"signal/brier_reward/centered_abs_mean": 0.11719977408647538,
"signal/brier_reward/group_bin_occupancy": 0.84921875,
"signal/brier_reward/group_std_mean": 0.15045890510082244,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014649971760809422,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014649971760809422,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028852388728410005,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72890625,
"signal/frontier_aurc_reward/group_std_mean": 0.004860749281942844,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1645773783093316e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1645773783093316e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1517467588186264,
"signal/frontier_coverage_1/group_bin_occupancy": 0.870703125,
"signal/frontier_coverage_1/group_std_mean": 0.1936959743499756,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027162669226527213,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027162669226527213,
"signal/frontier_coverage_10/centered_abs_mean": 0.1517467588186264,
"signal/frontier_coverage_10/group_bin_occupancy": 0.870703125,
"signal/frontier_coverage_10/group_std_mean": 0.1936959743499756,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027162669226527213,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027162669226527213,
"signal/frontier_coverage_15/centered_abs_mean": 0.15077128112316132,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_15/group_std_mean": 0.1924582153558731,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026988058350980284,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026988058350980284,
"signal/frontier_coverage_20/centered_abs_mean": 0.09997670203447342,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8640625,
"signal/frontier_coverage_20/group_std_mean": 0.1284783437848091,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017895829398185015,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017895829398185015,
"signal/frontier_coverage_25/centered_abs_mean": 0.06153928935527801,
"signal/frontier_coverage_25/group_bin_occupancy": 0.922265625,
"signal/frontier_coverage_25/group_std_mean": 0.07910384982824326,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011015532538294793,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011015532538294793,
"signal/frontier_coverage_5/centered_abs_mean": 0.1517467588186264,
"signal/frontier_coverage_5/group_bin_occupancy": 0.870703125,
"signal/frontier_coverage_5/group_std_mean": 0.1936959743499756,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027162669226527213,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027162669226527213,
"signal/frontier_ece_reward/centered_abs_mean": 0.004455319605767727,
"signal/frontier_ece_reward/group_bin_occupancy": 0.625,
"signal/frontier_ece_reward/group_std_mean": 0.005671911407262087,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005569149507209659,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005569149507209659,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26655210852622985,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72734375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34124083518981935,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03331901356577873,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03331901356577873,
"step": 220
},
{
"calibration/aurc": 0.2571725886157596,
"calibration/batch_distribution_entropy": 0.9868377117637062,
"calibration/batch_entropy_100bins": 0.9723589233248291,
"calibration/batch_entropy_10bins": 0.9868377117637062,
"calibration/batch_entropy_50bins": 0.9812132345480924,
"calibration/batch_uniqueness": 0.9542327880859375,
"calibration/buffer_distribution_entropy": 0.999290995119787,
"calibration/buffer_entropy_100bins": 0.9992192079587326,
"calibration/buffer_entropy_10bins": 0.999290995119787,
"calibration/buffer_entropy_50bins": 0.9992962849096898,
"calibration/confidence_entropy": 0.4974882722837635,
"calibration/coverage@0%": 0.0703125,
"calibration/coverage@1%": 0.09921875,
"calibration/coverage@10%": 0.24921875,
"calibration/coverage@15%": 0.331640625,
"calibration/coverage@20%": 0.378125,
"calibration/coverage@25%": 0.46640625,
"calibration/coverage@30%": 0.598046875,
"calibration/coverage@5%": 0.165234375,
"calibration/ece": 0.12935237041983996,
"calibration/mean_confidence": 0.5272114849619792,
"calibration/prompt_uniqueness": 0.843505859375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 385.6,
"completions/max_terminated_length": 385.6,
"completions/mean_length": 191.19853515625,
"completions/mean_terminated_length": 191.19853515625,
"completions/min_length": 92.4,
"completions/min_terminated_length": 92.4,
"epoch": 0.72,
"grad_norm": 0.0011021590325981379,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 767642461.0,
"reward": 0.8635419249534607,
"reward_std": 0.0918789803981781,
"rewards/accuracy_reward": 0.55908203125,
"rewards/brier_reward": 0.8094393730163574,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.002607145463116467,
"rewards/frontier_coverage_1": 0.10515292584896088,
"rewards/frontier_coverage_10": 0.10515292584896088,
"rewards/frontier_coverage_15": 0.10439955592155456,
"rewards/frontier_coverage_20": 0.0752902314066887,
"rewards/frontier_coverage_25": 0.053488964587450026,
"rewards/frontier_coverage_5": 0.10515292584896088,
"rewards/frontier_ece_reward": 0.00315559939481318,
"rewards/frontier_entropy_batch_reward": -0.2187791347503662,
"signal/accuracy_reward/centered_abs_mean": 0.093731689453125,
"signal/accuracy_reward/group_bin_occupancy": 0.171484375,
"signal/accuracy_reward/group_std_mean": 0.12776783406734465,
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0468658447265625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0468658447265625,
"signal/advantage_abs_mean": 0.07090412229299545,
"signal/advantage_pre_scale_abs_mean": 0.07090412229299545,
"signal/advantage_pre_scale_std": 0.1082430675625801,
"signal/advantage_std": 0.1082430675625801,
"signal/brier_reward/centered_abs_mean": 0.10838331580162049,
"signal/brier_reward/group_bin_occupancy": 0.851953125,
"signal/brier_reward/group_std_mean": 0.14058519005775452,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01354791447520256,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01354791447520256,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024020272307097913,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.742578125,
"signal/frontier_aurc_reward/group_std_mean": 0.00396113651804626,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2996287811547516e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2996287811547516e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14759007096290588,
"signal/frontier_coverage_1/group_bin_occupancy": 0.875,
"signal/frontier_coverage_1/group_std_mean": 0.19091827869415284,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002641862211748958,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002641862211748958,
"signal/frontier_coverage_10/centered_abs_mean": 0.14759007096290588,
"signal/frontier_coverage_10/group_bin_occupancy": 0.875,
"signal/frontier_coverage_10/group_std_mean": 0.19091827869415284,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002641862211748958,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002641862211748958,
"signal/frontier_coverage_15/centered_abs_mean": 0.1430205911397934,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_15/group_std_mean": 0.1849964141845703,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025600686203688383,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025600686203688383,
"signal/frontier_coverage_20/centered_abs_mean": 0.09058674424886703,
"signal/frontier_coverage_20/group_bin_occupancy": 0.866015625,
"signal/frontier_coverage_20/group_std_mean": 0.11796402931213379,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016215026378631591,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016215026378631591,
"signal/frontier_coverage_25/centered_abs_mean": 0.056336529552936554,
"signal/frontier_coverage_25/group_bin_occupancy": 0.923046875,
"signal/frontier_coverage_25/group_std_mean": 0.07268869429826737,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010084238601848483,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010084238601848483,
"signal/frontier_coverage_5/centered_abs_mean": 0.14759007096290588,
"signal/frontier_coverage_5/group_bin_occupancy": 0.875,
"signal/frontier_coverage_5/group_std_mean": 0.19091827869415284,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002641862211748958,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002641862211748958,
"signal/frontier_ece_reward/centered_abs_mean": 0.00401081838645041,
"signal/frontier_ece_reward/group_bin_occupancy": 0.633203125,
"signal/frontier_ece_reward/group_std_mean": 0.0050904926843941215,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005013522983063013,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005013522983063013,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2880380153656006,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35947364568710327,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03600475192070007,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03600475192070007,
"step": 225
},
{
"calibration/aurc": 0.2671787874657409,
"calibration/batch_distribution_entropy": 0.9745606845119091,
"calibration/batch_entropy_100bins": 0.9669656345831037,
"calibration/batch_entropy_10bins": 0.9745606845119091,
"calibration/batch_entropy_50bins": 0.974810522331128,
"calibration/batch_uniqueness": 0.952349853515625,
"calibration/buffer_distribution_entropy": 0.9992588263367409,
"calibration/buffer_entropy_100bins": 0.9992122785372862,
"calibration/buffer_entropy_10bins": 0.9992588263367409,
"calibration/buffer_entropy_50bins": 0.9993063227006909,
"calibration/confidence_entropy": 0.4825361471276001,
"calibration/coverage@0%": 0.01875,
"calibration/coverage@1%": 0.01875,
"calibration/coverage@10%": 0.08828125,
"calibration/coverage@15%": 0.25625,
"calibration/coverage@20%": 0.40859375,
"calibration/coverage@25%": 0.503125,
"calibration/coverage@30%": 0.637109375,
"calibration/coverage@5%": 0.023828125,
"calibration/ece": 0.12239777459769277,
"calibration/mean_confidence": 0.5535488767952312,
"calibration/prompt_uniqueness": 0.839697265625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 415.8,
"completions/max_terminated_length": 415.8,
"completions/mean_length": 190.82607421875,
"completions/mean_terminated_length": 190.82607421875,
"completions/min_length": 92.6,
"completions/min_terminated_length": 92.6,
"epoch": 0.736,
"grad_norm": 0.0011723111383616924,
"learning_rate": 1e-06,
"loss": -0.0001,
"num_tokens": 784536104.0,
"reward": 0.8648198366165161,
"reward_std": 0.09010217189788819,
"rewards/accuracy_reward": 0.55693359375,
"rewards/brier_reward": 0.8001532912254333,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0029186454601585867,
"rewards/frontier_coverage_1": 0.10427813678979873,
"rewards/frontier_coverage_10": 0.10427813678979873,
"rewards/frontier_coverage_15": 0.09994309544563293,
"rewards/frontier_coverage_20": 0.07507269382476807,
"rewards/frontier_coverage_25": 0.05625101327896118,
"rewards/frontier_coverage_5": 0.10427813678979873,
"rewards/frontier_ece_reward": 0.0028485337272286413,
"rewards/frontier_entropy_batch_reward": -0.1892842948436737,
"signal/accuracy_reward/centered_abs_mean": 0.097454833984375,
"signal/accuracy_reward/group_bin_occupancy": 0.16796875,
"signal/accuracy_reward/group_std_mean": 0.12492033690214158,
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0487274169921875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0487274169921875,
"signal/advantage_abs_mean": 0.07159559726715088,
"signal/advantage_pre_scale_abs_mean": 0.07159559726715088,
"signal/advantage_pre_scale_std": 0.10877462178468704,
"signal/advantage_std": 0.10877462178468704,
"signal/brier_reward/centered_abs_mean": 0.11685294806957244,
"signal/brier_reward/group_bin_occupancy": 0.8484375,
"signal/brier_reward/group_std_mean": 0.14845768213272095,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014606618508696555,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014606618508696555,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027471881825476886,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.741015625,
"signal/frontier_aurc_reward/group_std_mean": 0.004431968554854393,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.917466576443985e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.917466576443985e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15529634058475494,
"signal/frontier_coverage_1/group_bin_occupancy": 0.87265625,
"signal/frontier_coverage_1/group_std_mean": 0.1962975323200226,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027798044495284556,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027798044495284556,
"signal/frontier_coverage_10/centered_abs_mean": 0.15529634058475494,
"signal/frontier_coverage_10/group_bin_occupancy": 0.87265625,
"signal/frontier_coverage_10/group_std_mean": 0.1962975323200226,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027798044495284556,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027798044495284556,
"signal/frontier_coverage_15/centered_abs_mean": 0.14664601981639863,
"signal/frontier_coverage_15/group_bin_occupancy": 0.866015625,
"signal/frontier_coverage_15/group_std_mean": 0.1854826033115387,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002624963456764817,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002624963456764817,
"signal/frontier_coverage_20/centered_abs_mean": 0.09293387830257416,
"signal/frontier_coverage_20/group_bin_occupancy": 0.87421875,
"signal/frontier_coverage_20/group_std_mean": 0.11852222084999084,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016635163454338908,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016635163454338908,
"signal/frontier_coverage_25/centered_abs_mean": 0.0600375160574913,
"signal/frontier_coverage_25/group_bin_occupancy": 0.931640625,
"signal/frontier_coverage_25/group_std_mean": 0.07639677226543426,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010746715241111815,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010746715241111815,
"signal/frontier_coverage_5/centered_abs_mean": 0.15529634058475494,
"signal/frontier_coverage_5/group_bin_occupancy": 0.87265625,
"signal/frontier_coverage_5/group_std_mean": 0.1962975323200226,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027798044495284556,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027798044495284556,
"signal/frontier_ece_reward/centered_abs_mean": 0.00405830298550427,
"signal/frontier_ece_reward/group_bin_occupancy": 0.619140625,
"signal/frontier_ece_reward/group_std_mean": 0.005047469865530729,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005072878731880337,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005072878731880337,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2611878842115402,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.746875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.333402281999588,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032648485526442526,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032648485526442526,
"step": 230
},
{
"calibration/aurc": 0.268875945135634,
"calibration/batch_distribution_entropy": 0.9749735442052577,
"calibration/batch_entropy_100bins": 0.9643205175783025,
"calibration/batch_entropy_10bins": 0.9749735442052577,
"calibration/batch_entropy_50bins": 0.9732517019604874,
"calibration/batch_uniqueness": 0.9509182219542363,
"calibration/buffer_distribution_entropy": 0.999202156889033,
"calibration/buffer_entropy_100bins": 0.9992126409759396,
"calibration/buffer_entropy_10bins": 0.999202156889033,
"calibration/buffer_entropy_50bins": 0.999297391738472,
"calibration/confidence_entropy": 0.4768481504008153,
"calibration/coverage@0%": 0.03398819716242661,
"calibration/coverage@1%": 0.03398819716242661,
"calibration/coverage@10%": 0.09805069716242662,
"calibration/coverage@15%": 0.2628944471624266,
"calibration/coverage@20%": 0.36172333659491196,
"calibration/coverage@25%": 0.5231508378180039,
"calibration/coverage@30%": 0.6325824058219178,
"calibration/coverage@5%": 0.05586319716242662,
"calibration/ece": 0.1138435006980445,
"calibration/mean_confidence": 0.4696731800362023,
"calibration/prompt_uniqueness": 0.8447302207986473,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 432.0,
"completions/max_terminated_length": 432.0,
"completions/mean_length": 196.7138671875,
"completions/mean_terminated_length": 196.7138671875,
"completions/min_length": 107.6,
"completions/min_terminated_length": 107.6,
"epoch": 0.752,
"grad_norm": 0.0009408697951585054,
"learning_rate": 1e-06,
"loss": -0.0,
"num_tokens": 801777654.0,
"reward": 0.8604554295539856,
"reward_std": 0.08923238068819046,
"rewards/accuracy_reward": 0.54912109375,
"rewards/brier_reward": 0.8006747007369995,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0029859797097742556,
"rewards/frontier_coverage_1": 0.11275802329182624,
"rewards/frontier_coverage_10": 0.11275802329182624,
"rewards/frontier_coverage_15": 0.10675515756011009,
"rewards/frontier_coverage_20": 0.07440133690834046,
"rewards/frontier_coverage_25": 0.05188974887132645,
"rewards/frontier_coverage_5": 0.11275802329182624,
"rewards/frontier_ece_reward": 0.002763616549782455,
"rewards/frontier_entropy_batch_reward": -0.19688346982002258,
"signal/accuracy_reward/centered_abs_mean": 0.090399169921875,
"signal/accuracy_reward/group_bin_occupancy": 0.171875,
"signal/accuracy_reward/group_std_mean": 0.12397283762693405,
"signal/accuracy_reward/group_zero_std_frac": 0.625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0451995849609375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0451995849609375,
"signal/advantage_abs_mean": 0.06917839050292969,
"signal/advantage_pre_scale_abs_mean": 0.06917839050292969,
"signal/advantage_pre_scale_std": 0.10713021010160446,
"signal/advantage_std": 0.10713021010160446,
"signal/brier_reward/centered_abs_mean": 0.11189695447683334,
"signal/brier_reward/group_bin_occupancy": 0.845703125,
"signal/brier_reward/group_std_mean": 0.1444198101758957,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013987119309604168,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013987119309604168,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002711809379979968,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73671875,
"signal/frontier_aurc_reward/group_std_mean": 0.004320217343047261,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.854138751397841e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.854138751397841e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14956962913274766,
"signal/frontier_coverage_1/group_bin_occupancy": 0.866796875,
"signal/frontier_coverage_1/group_std_mean": 0.19387493133544922,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002677296195179224,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002677296195179224,
"signal/frontier_coverage_10/centered_abs_mean": 0.14956962913274766,
"signal/frontier_coverage_10/group_bin_occupancy": 0.866796875,
"signal/frontier_coverage_10/group_std_mean": 0.19387493133544922,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002677296195179224,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002677296195179224,
"signal/frontier_coverage_15/centered_abs_mean": 0.14117977619171143,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86171875,
"signal/frontier_coverage_15/group_std_mean": 0.18318403959274293,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002527117915451527,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002527117915451527,
"signal/frontier_coverage_20/centered_abs_mean": 0.08791815936565399,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_20/group_std_mean": 0.11480976194143296,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001573735009878874,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001573735009878874,
"signal/frontier_coverage_25/centered_abs_mean": 0.05685350224375725,
"signal/frontier_coverage_25/group_bin_occupancy": 0.923828125,
"signal/frontier_coverage_25/group_std_mean": 0.07329353988170624,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010176776675507426,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010176776675507426,
"signal/frontier_coverage_5/centered_abs_mean": 0.14956962913274766,
"signal/frontier_coverage_5/group_bin_occupancy": 0.866796875,
"signal/frontier_coverage_5/group_std_mean": 0.19387493133544922,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002677296195179224,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002677296195179224,
"signal/frontier_ece_reward/centered_abs_mean": 0.0038073719944804905,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6296875,
"signal/frontier_ece_reward/group_std_mean": 0.004847258795052767,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004759214993100613,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004759214993100613,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2709112524986267,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.744921875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3456527829170227,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033863906562328336,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033863906562328336,
"step": 235
},
{
"calibration/aurc": 0.2740796036833044,
"calibration/batch_distribution_entropy": 0.9818428608553773,
"calibration/batch_entropy_100bins": 0.9719007710164602,
"calibration/batch_entropy_10bins": 0.9818428608553773,
"calibration/batch_entropy_50bins": 0.9787606167041576,
"calibration/batch_uniqueness": 0.9538522404981341,
"calibration/buffer_distribution_entropy": 0.9993119692396348,
"calibration/buffer_entropy_100bins": 0.9992856717754369,
"calibration/buffer_entropy_10bins": 0.9993119692396348,
"calibration/buffer_entropy_50bins": 0.9993856639865714,
"calibration/confidence_entropy": 0.5110251790731171,
"calibration/coverage@0%": 0.07266542318982387,
"calibration/coverage@1%": 0.07305604818982388,
"calibration/coverage@10%": 0.23283543297455972,
"calibration/coverage@15%": 0.31722342832681016,
"calibration/coverage@20%": 0.386768438111546,
"calibration/coverage@25%": 0.4594606164383562,
"calibration/coverage@30%": 0.5407473091976517,
"calibration/coverage@5%": 0.17580112524461838,
"calibration/ece": 0.1521925787277262,
"calibration/mean_confidence": 0.4857958555314106,
"calibration/prompt_uniqueness": 0.859637178069719,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 676.6,
"completions/max_terminated_length": 469.4,
"completions/mean_length": 211.4650390625,
"completions/mean_terminated_length": 211.33624572753905,
"completions/min_length": 108.2,
"completions/min_terminated_length": 108.2,
"epoch": 0.768,
"grad_norm": 0.0010750554502010345,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 818875760.0,
"reward": 0.8467617988586426,
"reward_std": 0.0898010030388832,
"rewards/accuracy_reward": 0.51259765625,
"rewards/brier_reward": 0.8113283753395081,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002595884189940989,
"rewards/frontier_coverage_1": 0.1418531656265259,
"rewards/frontier_coverage_10": 0.1418424904346466,
"rewards/frontier_coverage_15": 0.1365887075662613,
"rewards/frontier_coverage_20": 0.09369135200977326,
"rewards/frontier_coverage_25": 0.060536155849695204,
"rewards/frontier_coverage_5": 0.1418531656265259,
"rewards/frontier_ece_reward": 0.002913234336301684,
"rewards/frontier_entropy_batch_reward": -0.19235891699790955,
"signal/accuracy_reward/centered_abs_mean": 0.093255615234375,
"signal/accuracy_reward/group_bin_occupancy": 0.168359375,
"signal/accuracy_reward/group_std_mean": 0.12222997695207596,
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0466278076171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0466278076171875,
"signal/advantage_abs_mean": 0.0705685243010521,
"signal/advantage_pre_scale_abs_mean": 0.0705685243010521,
"signal/advantage_pre_scale_std": 0.1090763971209526,
"signal/advantage_std": 0.1090763971209526,
"signal/brier_reward/centered_abs_mean": 0.11311222910881043,
"signal/brier_reward/group_bin_occupancy": 0.855078125,
"signal/brier_reward/group_std_mean": 0.14575394093990326,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014139028638601303,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014139028638601303,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022981606656685472,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7140625,
"signal/frontier_aurc_reward/group_std_mean": 0.0038101823534816503,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.113707545911893e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.113707545911893e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15940954387187958,
"signal/frontier_coverage_1/group_bin_occupancy": 0.886328125,
"signal/frontier_coverage_1/group_std_mean": 0.20271311998367308,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002853430714458227,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002853430714458227,
"signal/frontier_coverage_10/centered_abs_mean": 0.15939579010009766,
"signal/frontier_coverage_10/group_bin_occupancy": 0.886328125,
"signal/frontier_coverage_10/group_std_mean": 0.20269620716571807,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002853184659034014,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002853184659034014,
"signal/frontier_coverage_15/centered_abs_mean": 0.14816523492336273,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_15/group_std_mean": 0.18849847018718718,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026521575171500446,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026521575171500446,
"signal/frontier_coverage_20/centered_abs_mean": 0.09317153096199035,
"signal/frontier_coverage_20/group_bin_occupancy": 0.891015625,
"signal/frontier_coverage_20/group_std_mean": 0.11924822032451629,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016677704174071551,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016677704174071551,
"signal/frontier_coverage_25/centered_abs_mean": 0.05879691541194916,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9234375,
"signal/frontier_coverage_25/group_std_mean": 0.07511676400899887,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010524647310376166,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010524647310376166,
"signal/frontier_coverage_5/centered_abs_mean": 0.15940954387187958,
"signal/frontier_coverage_5/group_bin_occupancy": 0.886328125,
"signal/frontier_coverage_5/group_std_mean": 0.20271311998367308,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002853430714458227,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002853430714458227,
"signal/frontier_ece_reward/centered_abs_mean": 0.003463554894551635,
"signal/frontier_ece_reward/group_bin_occupancy": 0.62421875,
"signal/frontier_ece_reward/group_std_mean": 0.004399275593459606,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00043294436181895435,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00043294436181895435,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25717605352401735,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33379308581352235,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03214700669050217,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03214700669050217,
"step": 240
},
{
"calibration/aurc": 0.3350168970245671,
"calibration/batch_distribution_entropy": 0.979507349012119,
"calibration/batch_entropy_100bins": 0.9683820677767004,
"calibration/batch_entropy_10bins": 0.979507349012119,
"calibration/batch_entropy_50bins": 0.9770155344856404,
"calibration/batch_uniqueness": 0.9531518665621839,
"calibration/buffer_distribution_entropy": 0.9993121305283615,
"calibration/buffer_entropy_100bins": 0.999273233968751,
"calibration/buffer_entropy_10bins": 0.9993121305283615,
"calibration/buffer_entropy_50bins": 0.9993773005255697,
"calibration/confidence_entropy": 0.4675455419188176,
"calibration/coverage@0%": 0.02188035102739726,
"calibration/coverage@1%": 0.04570847602739726,
"calibration/coverage@10%": 0.18681353962818004,
"calibration/coverage@15%": 0.24155531433463798,
"calibration/coverage@20%": 0.30644569471624267,
"calibration/coverage@25%": 0.4002813111545988,
"calibration/coverage@30%": 0.45930390777886493,
"calibration/coverage@5%": 0.11211472602739728,
"calibration/ece": 0.16025081061134686,
"calibration/mean_confidence": 0.5276171519486046,
"calibration/prompt_uniqueness": 0.8416884470928719,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 730.6,
"completions/max_terminated_length": 561.8,
"completions/mean_length": 217.26181640625,
"completions/mean_terminated_length": 217.13308410644532,
"completions/min_length": 111.6,
"completions/min_terminated_length": 111.6,
"epoch": 0.784,
"grad_norm": 0.0007616052753292024,
"learning_rate": 1e-06,
"loss": -0.0,
"num_tokens": 836274889.0,
"reward": 0.8599894285202027,
"reward_std": 0.08833477348089218,
"rewards/accuracy_reward": 0.55615234375,
"rewards/brier_reward": 0.7853159546852112,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0031998661812394857,
"rewards/frontier_coverage_1": 0.08971645757555961,
"rewards/frontier_coverage_10": 0.08970820307731628,
"rewards/frontier_coverage_15": 0.08065339028835297,
"rewards/frontier_coverage_20": 0.06030083037912846,
"rewards/frontier_coverage_25": 0.04737272821366787,
"rewards/frontier_coverage_5": 0.08971645757555961,
"rewards/frontier_ece_reward": 0.0020965512841939924,
"rewards/frontier_entropy_batch_reward": -0.19676691591739653,
"signal/accuracy_reward/centered_abs_mean": 0.091058349609375,
"signal/accuracy_reward/group_bin_occupancy": 0.169140625,
"signal/accuracy_reward/group_std_mean": 0.12126990556716918,
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0455291748046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0455291748046875,
"signal/advantage_abs_mean": 0.06926657930016518,
"signal/advantage_pre_scale_abs_mean": 0.06926657930016518,
"signal/advantage_pre_scale_std": 0.10528580248355865,
"signal/advantage_std": 0.10528580248355865,
"signal/brier_reward/centered_abs_mean": 0.11832668632268906,
"signal/brier_reward/group_bin_occupancy": 0.83984375,
"signal/brier_reward/group_std_mean": 0.15334346294403076,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014790835790336132,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014790835790336132,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002936989581212401,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.726171875,
"signal/frontier_aurc_reward/group_std_mean": 0.004758535791188479,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.257211159914732e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.257211159914732e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15365730226039886,
"signal/frontier_coverage_1/group_bin_occupancy": 0.862890625,
"signal/frontier_coverage_1/group_std_mean": 0.19880372881889344,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002750465599820018,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002750465599820018,
"signal/frontier_coverage_10/centered_abs_mean": 0.15364324450492858,
"signal/frontier_coverage_10/group_bin_occupancy": 0.862890625,
"signal/frontier_coverage_10/group_std_mean": 0.19878601729869844,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002750213909894228,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002750213909894228,
"signal/frontier_coverage_15/centered_abs_mean": 0.1363199084997177,
"signal/frontier_coverage_15/group_bin_occupancy": 0.857421875,
"signal/frontier_coverage_15/group_std_mean": 0.1768506795167923,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024401261936873196,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024401261936873196,
"signal/frontier_coverage_20/centered_abs_mean": 0.08445133566856385,
"signal/frontier_coverage_20/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_20/group_std_mean": 0.11020771414041519,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001511678844690323,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001511678844690323,
"signal/frontier_coverage_25/centered_abs_mean": 0.05710015743970871,
"signal/frontier_coverage_25/group_bin_occupancy": 0.927734375,
"signal/frontier_coverage_25/group_std_mean": 0.07321809381246566,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001022092835046351,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001022092835046351,
"signal/frontier_coverage_5/centered_abs_mean": 0.15365730226039886,
"signal/frontier_coverage_5/group_bin_occupancy": 0.862890625,
"signal/frontier_coverage_5/group_std_mean": 0.19880372881889344,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002750465599820018,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002750465599820018,
"signal/frontier_ece_reward/centered_abs_mean": 0.003645011968910694,
"signal/frontier_ece_reward/group_bin_occupancy": 0.624609375,
"signal/frontier_ece_reward/group_std_mean": 0.004628013540059328,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00045562649611383674,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00045562649611383674,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2689265012741089,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7359375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34439175128936766,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03361581265926361,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03361581265926361,
"step": 245
},
{
"calibration/aurc": 0.2089238888139648,
"calibration/batch_distribution_entropy": 0.9783443187041841,
"calibration/batch_entropy_100bins": 0.9692957828700483,
"calibration/batch_entropy_10bins": 0.9783443187041841,
"calibration/batch_entropy_50bins": 0.9749560863340105,
"calibration/batch_uniqueness": 0.9527830701487154,
"calibration/buffer_distribution_entropy": 0.9992285993286943,
"calibration/buffer_entropy_100bins": 0.9992151365527878,
"calibration/buffer_entropy_10bins": 0.9992285993286943,
"calibration/buffer_entropy_50bins": 0.9992907312602902,
"calibration/confidence_entropy": 0.4936001315000181,
"calibration/coverage@0%": 0.04140854329745597,
"calibration/coverage@1%": 0.04140854329745597,
"calibration/coverage@10%": 0.3380305161448141,
"calibration/coverage@15%": 0.48029293052837574,
"calibration/coverage@20%": 0.5584530944227006,
"calibration/coverage@25%": 0.6248937438845401,
"calibration/coverage@30%": 0.7159460616438356,
"calibration/coverage@5%": 0.11017153864970645,
"calibration/ece": 0.11535332939116949,
"calibration/mean_confidence": 0.48316137568507056,
"calibration/prompt_uniqueness": 0.8392774758552288,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 1050.4,
"completions/max_terminated_length": 630.8,
"completions/mean_length": 223.39736328125,
"completions/mean_terminated_length": 223.14104309082032,
"completions/min_length": 114.0,
"completions/min_terminated_length": 114.0,
"epoch": 0.8,
"grad_norm": 0.0011625770712271333,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 853573038.0,
"reward": 0.8727638602256775,
"reward_std": 0.08851251155138015,
"rewards/accuracy_reward": 0.57861328125,
"rewards/brier_reward": 0.8088589787483216,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002810212690383196,
"rewards/frontier_coverage_1": 0.09647311270236969,
"rewards/frontier_coverage_10": 0.09640982151031494,
"rewards/frontier_coverage_15": 0.08719095587730408,
"rewards/frontier_coverage_20": 0.06359865590929985,
"rewards/frontier_coverage_25": 0.049777823686599734,
"rewards/frontier_coverage_5": 0.09647311270236969,
"rewards/frontier_ece_reward": 0.00226962142623961,
"rewards/frontier_entropy_batch_reward": -0.21244405210018158,
"signal/accuracy_reward/centered_abs_mean": 0.089971923828125,
"signal/accuracy_reward/group_bin_occupancy": 0.168359375,
"signal/accuracy_reward/group_std_mean": 0.11883124858140945,
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0449859619140625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0449859619140625,
"signal/advantage_abs_mean": 0.06932459995150567,
"signal/advantage_pre_scale_abs_mean": 0.06932459995150567,
"signal/advantage_pre_scale_std": 0.10704608410596847,
"signal/advantage_std": 0.10704608410596847,
"signal/brier_reward/centered_abs_mean": 0.10898203402757645,
"signal/brier_reward/group_bin_occupancy": 0.8546875,
"signal/brier_reward/group_std_mean": 0.13880451619625092,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013622754253447056,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013622754253447056,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026166523108258843,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.719140625,
"signal/frontier_aurc_reward/group_std_mean": 0.0043006549589335915,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.683807346737012e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.683807346737012e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14726514369249344,
"signal/frontier_coverage_1/group_bin_occupancy": 0.875,
"signal/frontier_coverage_1/group_std_mean": 0.18634527921676636,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026360460091382266,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026360460091382266,
"signal/frontier_coverage_10/centered_abs_mean": 0.14715069383382798,
"signal/frontier_coverage_10/group_bin_occupancy": 0.875,
"signal/frontier_coverage_10/group_std_mean": 0.18619897961616516,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002633997332304716,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002633997332304716,
"signal/frontier_coverage_15/centered_abs_mean": 0.12722482085227965,
"signal/frontier_coverage_15/group_bin_occupancy": 0.867578125,
"signal/frontier_coverage_15/group_std_mean": 0.1613352745771408,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002277324162423611,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002277324162423611,
"signal/frontier_coverage_20/centered_abs_mean": 0.07807688787579536,
"signal/frontier_coverage_20/group_bin_occupancy": 0.88515625,
"signal/frontier_coverage_20/group_std_mean": 0.10010033547878265,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013975762762129308,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013975762762129308,
"signal/frontier_coverage_25/centered_abs_mean": 0.05350769758224487,
"signal/frontier_coverage_25/group_bin_occupancy": 0.937109375,
"signal/frontier_coverage_25/group_std_mean": 0.06782967150211335,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009577877586707473,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009577877586707473,
"signal/frontier_coverage_5/centered_abs_mean": 0.14726514369249344,
"signal/frontier_coverage_5/group_bin_occupancy": 0.875,
"signal/frontier_coverage_5/group_std_mean": 0.18634527921676636,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026360460091382266,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026360460091382266,
"signal/frontier_ece_reward/centered_abs_mean": 0.0036554763093590735,
"signal/frontier_ece_reward/group_bin_occupancy": 0.608203125,
"signal/frontier_ece_reward/group_std_mean": 0.004595436621457338,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004569345386698842,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004569345386698842,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27770012617111206,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.727734375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35029610991477966,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03471251577138901,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03471251577138901,
"step": 250
},
{
"epoch": 0.8,
"eval_calibration/aurc": 0.44251735983763185,
"eval_calibration/batch_distribution_entropy": 0.9223826154717725,
"eval_calibration/batch_entropy_100bins": 0.7052278361140918,
"eval_calibration/batch_entropy_10bins": 0.9223826154717725,
"eval_calibration/batch_entropy_50bins": 0.7855204844461221,
"eval_calibration/batch_uniqueness": 0.8955078125,
"eval_calibration/buffer_distribution_entropy": 0.9993625308716999,
"eval_calibration/buffer_entropy_100bins": 0.9992947637811891,
"eval_calibration/buffer_entropy_10bins": 0.9993625308716999,
"eval_calibration/buffer_entropy_50bins": 0.9993754687374323,
"eval_calibration/confidence_entropy": 0.47403639371622897,
"eval_calibration/coverage@0%": 0.0546875,
"eval_calibration/coverage@1%": 0.0546875,
"eval_calibration/coverage@10%": 0.0546875,
"eval_calibration/coverage@15%": 0.0703125,
"eval_calibration/coverage@20%": 0.140625,
"eval_calibration/coverage@25%": 0.203125,
"eval_calibration/coverage@30%": 0.28125,
"eval_calibration/coverage@5%": 0.0546875,
"eval_calibration/ece": 0.22032250116355573,
"eval_calibration/mean_confidence": 0.448963486796308,
"eval_calibration/prompt_uniqueness": 0.8955078125,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 445.0,
"eval_completions/max_terminated_length": 445.0,
"eval_completions/mean_length": 228.59785842895508,
"eval_completions/mean_terminated_length": 228.59785842895508,
"eval_completions/min_length": 137.5,
"eval_completions/min_terminated_length": 137.5,
"eval_loss": 0.0,
"eval_num_tokens": 853573038.0,
"eval_reward": 0.7110898196697235,
"eval_reward_std": 0.22806879505515099,
"eval_rewards/accuracy_reward": 0.4375,
"eval_rewards/brier_reward": 0.8064036816358566,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.0034677567309699953,
"eval_rewards/frontier_coverage_1": 0.19150707125663757,
"eval_rewards/frontier_coverage_10": 0.19115351140499115,
"eval_rewards/frontier_coverage_15": 0.1673499085009098,
"eval_rewards/frontier_coverage_20": 0.1066279262304306,
"eval_rewards/frontier_coverage_25": 0.0597064346075058,
"eval_rewards/frontier_coverage_5": 0.19150707125663757,
"eval_rewards/frontier_ece_reward": 0.002807055599987507,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 22.2722,
"eval_samples_per_second": 22.45,
"eval_signal/accuracy_reward/centered_abs_mean": 0.476806640625,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.49588072299957275,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2384033203125,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2384033203125,
"eval_signal/advantage_abs_mean": 0.21377598494291306,
"eval_signal/advantage_pre_scale_abs_mean": 0.21377598494291306,
"eval_signal/advantage_pre_scale_std": 0.225502610206604,
"eval_signal/advantage_std": 0.225502610206604,
"eval_signal/brier_reward/centered_abs_mean": 0.1754986234009266,
"eval_signal/brier_reward/group_bin_occupancy": 0.8359375,
"eval_signal/brier_reward/group_std_mean": 0.2285812497138977,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021937327925115824,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.021937327925115824,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004540800233371556,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6484375,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.00849473278503865,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.128032095555682e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.128032095555682e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.35326529294252396,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_1/group_std_mean": 0.42462950199842453,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006323448498733342,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006323448498733342,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.35248684138059616,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_10/group_std_mean": 0.42372994869947433,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006309514516033232,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006309514516033232,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.29927831143140793,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_15/group_std_mean": 0.3624297082424164,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005357081652618945,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005357081652618945,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.172193493694067,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.921875,
"eval_signal/frontier_coverage_20/group_std_mean": 0.21549956128001213,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030822635162621737,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030822635162621737,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.08839073590934277,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9453125,
"eval_signal/frontier_coverage_25/group_std_mean": 0.11184324324131012,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015821942070033401,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015821942070033401,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.35326529294252396,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_5/group_std_mean": 0.42462950199842453,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006323448498733342,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006323448498733342,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0050687192706391215,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.890625,
"eval_signal/frontier_ece_reward/group_std_mean": 0.006167635438032448,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006335899088298902,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006335899088298902,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.125,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.18,
"step": 250
},
{
"calibration/aurc": 0.2352606782149013,
"calibration/batch_distribution_entropy": 0.9709767853004306,
"calibration/batch_entropy_100bins": 0.9633920257815575,
"calibration/batch_entropy_10bins": 0.9709767853004306,
"calibration/batch_entropy_50bins": 0.9695731037391886,
"calibration/batch_uniqueness": 0.9511383056640625,
"calibration/buffer_distribution_entropy": 0.9993028558668,
"calibration/buffer_entropy_100bins": 0.999259175252465,
"calibration/buffer_entropy_10bins": 0.9993028558668,
"calibration/buffer_entropy_50bins": 0.9993303494209318,
"calibration/confidence_entropy": 0.4811342741701587,
"calibration/coverage@0%": 0.001953125,
"calibration/coverage@1%": 0.001953125,
"calibration/coverage@10%": 0.124609375,
"calibration/coverage@15%": 0.19921875,
"calibration/coverage@20%": 0.509765625,
"calibration/coverage@25%": 0.679296875,
"calibration/coverage@30%": 0.790234375,
"calibration/coverage@5%": 0.06015625,
"calibration/ece": 0.14137887296716461,
"calibration/mean_confidence": 0.5129679753761538,
"calibration/prompt_uniqueness": 0.846533203125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 664.6,
"completions/max_terminated_length": 664.6,
"completions/mean_length": 223.664453125,
"completions/mean_terminated_length": 223.664453125,
"completions/min_length": 110.6,
"completions/min_terminated_length": 110.6,
"epoch": 0.816,
"grad_norm": 0.0016217977972701192,
"learning_rate": 1e-06,
"loss": -0.0,
"num_tokens": 870962530.0,
"reward": 0.8748025059700012,
"reward_std": 0.09103592932224273,
"rewards/accuracy_reward": 0.586328125,
"rewards/brier_reward": 0.7861274123191834,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002859151270240545,
"rewards/frontier_coverage_1": 0.06481336802244186,
"rewards/frontier_coverage_10": 0.06481285095214843,
"rewards/frontier_coverage_15": 0.06343448236584663,
"rewards/frontier_coverage_20": 0.049446874484419825,
"rewards/frontier_coverage_25": 0.04227612838149071,
"rewards/frontier_coverage_5": 0.06481328457593918,
"rewards/frontier_ece_reward": 0.001674002129584551,
"rewards/frontier_entropy_batch_reward": -0.1839560568332672,
"signal/accuracy_reward/centered_abs_mean": 0.1002685546875,
"signal/accuracy_reward/group_bin_occupancy": 0.170703125,
"signal/accuracy_reward/group_std_mean": 0.13107529729604722,
"signal/accuracy_reward/group_zero_std_frac": 0.634375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05013427734375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05013427734375,
"signal/advantage_abs_mean": 0.07067288607358932,
"signal/advantage_pre_scale_abs_mean": 0.07067288607358932,
"signal/advantage_pre_scale_std": 0.10996713936328888,
"signal/advantage_std": 0.10996713936328888,
"signal/brier_reward/centered_abs_mean": 0.12410824000835419,
"signal/brier_reward/group_bin_occupancy": 0.863671875,
"signal/brier_reward/group_std_mean": 0.1572820007801056,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015513530001044273,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015513530001044273,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027249534614384174,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73359375,
"signal/frontier_aurc_reward/group_std_mean": 0.004633441660553217,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.877666797256097e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.877666797256097e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16365497708320617,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8765625,
"signal/frontier_coverage_1/group_std_mean": 0.20742543637752534,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029294240288436414,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029294240288436414,
"signal/frontier_coverage_10/centered_abs_mean": 0.16364520490169526,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8765625,
"signal/frontier_coverage_10/group_std_mean": 0.2074132442474365,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002929249033331871,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002929249033331871,
"signal/frontier_coverage_15/centered_abs_mean": 0.14128550589084626,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_15/group_std_mean": 0.1796106904745102,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002529010409489274,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002529010409489274,
"signal/frontier_coverage_20/centered_abs_mean": 0.085682213306427,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_20/group_std_mean": 0.11009515970945358,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015337116550654174,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015337116550654174,
"signal/frontier_coverage_25/centered_abs_mean": 0.057272438704967496,
"signal/frontier_coverage_25/group_bin_occupancy": 0.927734375,
"signal/frontier_coverage_25/group_std_mean": 0.07305631190538406,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001025176583789289,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001025176583789289,
"signal/frontier_coverage_5/centered_abs_mean": 0.16365209817886353,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8765625,
"signal/frontier_coverage_5/group_std_mean": 0.2074216663837433,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029293723870068788,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029293723870068788,
"signal/frontier_ece_reward/centered_abs_mean": 0.0036711919121444224,
"signal/frontier_ece_reward/group_bin_occupancy": 0.631640625,
"signal/frontier_ece_reward/group_std_mean": 0.004636763595044613,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004588989890180528,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004588989890180528,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2510399729013443,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.731640625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32446773648262023,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031379996612668035,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031379996612668035,
"step": 255
},
{
"calibration/aurc": 0.2889344001133566,
"calibration/batch_distribution_entropy": 0.9739075456970238,
"calibration/batch_entropy_100bins": 0.9649792933196213,
"calibration/batch_entropy_10bins": 0.9739075456970238,
"calibration/batch_entropy_50bins": 0.9740646917655301,
"calibration/batch_uniqueness": 0.9515716552734375,
"calibration/buffer_distribution_entropy": 0.999235170401582,
"calibration/buffer_entropy_100bins": 0.9992304590039129,
"calibration/buffer_entropy_10bins": 0.999235170401582,
"calibration/buffer_entropy_50bins": 0.9992710439676749,
"calibration/confidence_entropy": 0.48948251997062514,
"calibration/coverage@0%": 0.0328125,
"calibration/coverage@1%": 0.0328125,
"calibration/coverage@10%": 0.243359375,
"calibration/coverage@15%": 0.329296875,
"calibration/coverage@20%": 0.369921875,
"calibration/coverage@25%": 0.431640625,
"calibration/coverage@30%": 0.516796875,
"calibration/coverage@5%": 0.16796875,
"calibration/ece": 0.11959374788247965,
"calibration/mean_confidence": 0.4745517848151627,
"calibration/prompt_uniqueness": 0.841357421875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 473.2,
"completions/max_terminated_length": 473.2,
"completions/mean_length": 232.755078125,
"completions/mean_terminated_length": 232.755078125,
"completions/min_length": 112.2,
"completions/min_terminated_length": 112.2,
"epoch": 0.832,
"grad_norm": 0.0008559515117667615,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 888354294.0,
"reward": 0.8606176733970642,
"reward_std": 0.08793712109327316,
"rewards/accuracy_reward": 0.54833984375,
"rewards/brier_reward": 0.8110590815544129,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0024320174707099795,
"rewards/frontier_coverage_1": 0.11919757276773453,
"rewards/frontier_coverage_10": 0.11918442994356156,
"rewards/frontier_coverage_15": 0.10204497873783111,
"rewards/frontier_coverage_20": 0.07099157050251961,
"rewards/frontier_coverage_25": 0.05516631901264191,
"rewards/frontier_coverage_5": 0.11918965280056,
"rewards/frontier_ece_reward": 0.00244655329734087,
"rewards/frontier_entropy_batch_reward": -0.20545812249183654,
"signal/accuracy_reward/centered_abs_mean": 0.091961669921875,
"signal/accuracy_reward/group_bin_occupancy": 0.169921875,
"signal/accuracy_reward/group_std_mean": 0.1235174298286438,
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0459808349609375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0459808349609375,
"signal/advantage_abs_mean": 0.06834444552659988,
"signal/advantage_pre_scale_abs_mean": 0.06834444552659988,
"signal/advantage_pre_scale_std": 0.10633230209350586,
"signal/advantage_std": 0.10633230209350586,
"signal/brier_reward/centered_abs_mean": 0.10559364557266235,
"signal/brier_reward/group_bin_occupancy": 0.8421875,
"signal/brier_reward/group_std_mean": 0.13621910512447358,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013199205696582793,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013199205696582793,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00220845362637192,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.730078125,
"signal/frontier_aurc_reward/group_std_mean": 0.00363809815607965,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.953131890739314e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.953131890739314e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1493607133626938,
"signal/frontier_coverage_1/group_bin_occupancy": 0.873046875,
"signal/frontier_coverage_1/group_std_mean": 0.1920451521873474,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026735567953437567,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026735567953437567,
"signal/frontier_coverage_10/centered_abs_mean": 0.1493442475795746,
"signal/frontier_coverage_10/group_bin_occupancy": 0.873046875,
"signal/frontier_coverage_10/group_std_mean": 0.1920243412256241,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026732619386166333,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026732619386166333,
"signal/frontier_coverage_15/centered_abs_mean": 0.12451920211315155,
"signal/frontier_coverage_15/group_bin_occupancy": 0.861328125,
"signal/frontier_coverage_15/group_std_mean": 0.16057583391666413,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002228893619030714,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002228893619030714,
"signal/frontier_coverage_20/centered_abs_mean": 0.07849853485822678,
"signal/frontier_coverage_20/group_bin_occupancy": 0.89375,
"signal/frontier_coverage_20/group_std_mean": 0.10143940895795822,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014051236677914858,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014051236677914858,
"signal/frontier_coverage_25/centered_abs_mean": 0.05491392761468887,
"signal/frontier_coverage_25/group_bin_occupancy": 0.93203125,
"signal/frontier_coverage_25/group_std_mean": 0.06983330100774765,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009829592425376176,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009829592425376176,
"signal/frontier_coverage_5/centered_abs_mean": 0.14935098588466644,
"signal/frontier_coverage_5/group_bin_occupancy": 0.873046875,
"signal/frontier_coverage_5/group_std_mean": 0.19203279614448548,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026733824983239175,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026733824983239175,
"signal/frontier_ece_reward/centered_abs_mean": 0.003441282361745834,
"signal/frontier_ece_reward/group_bin_occupancy": 0.59765625,
"signal/frontier_ece_reward/group_std_mean": 0.004348612949252128,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00043016029521822927,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00043016029521822927,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2689357101917267,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.726953125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3395772337913513,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033616963773965836,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033616963773965836,
"step": 260
},
{
"calibration/aurc": 0.3178738194955885,
"calibration/batch_distribution_entropy": 0.9734277851333974,
"calibration/batch_entropy_100bins": 0.9655052615628407,
"calibration/batch_entropy_10bins": 0.9734277851333974,
"calibration/batch_entropy_50bins": 0.9738673939128841,
"calibration/batch_uniqueness": 0.9522003173828125,
"calibration/buffer_distribution_entropy": 0.9992495195467482,
"calibration/buffer_entropy_100bins": 0.9992220261544034,
"calibration/buffer_entropy_10bins": 0.9992495195467482,
"calibration/buffer_entropy_50bins": 0.9992710813705669,
"calibration/confidence_entropy": 0.4941748172082874,
"calibration/coverage@0%": 0.021875,
"calibration/coverage@1%": 0.021875,
"calibration/coverage@10%": 0.169140625,
"calibration/coverage@15%": 0.223828125,
"calibration/coverage@20%": 0.397265625,
"calibration/coverage@25%": 0.471875,
"calibration/coverage@30%": 0.56328125,
"calibration/coverage@5%": 0.075390625,
"calibration/ece": 0.13729417046243933,
"calibration/mean_confidence": 0.5503775516014088,
"calibration/prompt_uniqueness": 0.85205078125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 760.8,
"completions/max_terminated_length": 552.2,
"completions/mean_length": 240.17587890625,
"completions/mean_terminated_length": 240.0502502441406,
"completions/min_length": 121.4,
"completions/min_terminated_length": 121.4,
"epoch": 0.848,
"grad_norm": 0.0008649929077364504,
"learning_rate": 1e-06,
"loss": -0.0002,
"num_tokens": 905828063.0,
"reward": 0.8515474200248718,
"reward_std": 0.08773799389600753,
"rewards/accuracy_reward": 0.53330078125,
"rewards/brier_reward": 0.8009130716323852,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002978764148429036,
"rewards/frontier_coverage_1": 0.11573868244886398,
"rewards/frontier_coverage_10": 0.11573390364646911,
"rewards/frontier_coverage_15": 0.09874581471085549,
"rewards/frontier_coverage_20": 0.06614762619137764,
"rewards/frontier_coverage_25": 0.04877230152487755,
"rewards/frontier_coverage_5": 0.11573788076639176,
"rewards/frontier_ece_reward": 0.0023759857984259726,
"rewards/frontier_entropy_batch_reward": -0.20361319780349732,
"signal/accuracy_reward/centered_abs_mean": 0.084564208984375,
"signal/accuracy_reward/group_bin_occupancy": 0.16640625,
"signal/accuracy_reward/group_std_mean": 0.1135980024933815,
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0422821044921875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0422821044921875,
"signal/advantage_abs_mean": 0.06841744184494018,
"signal/advantage_pre_scale_abs_mean": 0.06841744184494018,
"signal/advantage_pre_scale_std": 0.10599033236503601,
"signal/advantage_std": 0.10599033236503601,
"signal/brier_reward/centered_abs_mean": 0.11178396046161651,
"signal/brier_reward/group_bin_occupancy": 0.857421875,
"signal/brier_reward/group_std_mean": 0.14463868141174316,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013972995057702064,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013972995057702064,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027990068774670362,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.719140625,
"signal/frontier_aurc_reward/group_std_mean": 0.00469839870929718,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.010222375858575e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.010222375858575e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14509033262729645,
"signal/frontier_coverage_1/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_1/group_std_mean": 0.18849168419837953,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025971168652176857,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025971168652176857,
"signal/frontier_coverage_10/centered_abs_mean": 0.14508127570152282,
"signal/frontier_coverage_10/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_10/group_std_mean": 0.188480207324028,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025969548150897026,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025969548150897026,
"signal/frontier_coverage_15/centered_abs_mean": 0.12304246425628662,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_15/group_std_mean": 0.16019290089607238,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002202460076659918,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002202460076659918,
"signal/frontier_coverage_20/centered_abs_mean": 0.07703937292098999,
"signal/frontier_coverage_20/group_bin_occupancy": 0.898046875,
"signal/frontier_coverage_20/group_std_mean": 0.10073128789663315,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013790046563372017,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013790046563372017,
"signal/frontier_coverage_25/centered_abs_mean": 0.05316209346055985,
"signal/frontier_coverage_25/group_bin_occupancy": 0.931640625,
"signal/frontier_coverage_25/group_std_mean": 0.06862208545207978,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009516014717519284,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009516014717519284,
"signal/frontier_coverage_5/centered_abs_mean": 0.14508905708789827,
"signal/frontier_coverage_5/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_5/group_std_mean": 0.18849003911018372,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002597094140946865,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002597094140946865,
"signal/frontier_ece_reward/centered_abs_mean": 0.003437171783298254,
"signal/frontier_ece_reward/group_bin_occupancy": 0.625390625,
"signal/frontier_ece_reward/group_std_mean": 0.004384188260883093,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00042964647291228176,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00042964647291228176,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27023649513721465,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.748828125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3401765406131744,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03377956189215183,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03377956189215183,
"step": 265
},
{
"calibration/aurc": 0.2825008543621849,
"calibration/batch_distribution_entropy": 0.9662087974973617,
"calibration/batch_entropy_100bins": 0.96186889855781,
"calibration/batch_entropy_10bins": 0.9662087974973617,
"calibration/batch_entropy_50bins": 0.9688150730230692,
"calibration/batch_uniqueness": 0.9511749267578125,
"calibration/buffer_distribution_entropy": 0.9992175012534602,
"calibration/buffer_entropy_100bins": 0.9991897788432713,
"calibration/buffer_entropy_10bins": 0.9992175012534602,
"calibration/buffer_entropy_50bins": 0.9992416855129468,
"calibration/confidence_entropy": 0.49441546229639216,
"calibration/coverage@0%": 0.00625,
"calibration/coverage@1%": 0.00625,
"calibration/coverage@10%": 0.0859375,
"calibration/coverage@15%": 0.175,
"calibration/coverage@20%": 0.253125,
"calibration/coverage@25%": 0.408984375,
"calibration/coverage@30%": 0.52109375,
"calibration/coverage@5%": 0.020703125,
"calibration/ece": 0.13110279802041727,
"calibration/mean_confidence": 0.5927889569782829,
"calibration/prompt_uniqueness": 0.85537109375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 952.0,
"completions/max_terminated_length": 568.8,
"completions/mean_length": 247.87822265625,
"completions/mean_terminated_length": 247.37486267089844,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.864,
"grad_norm": 0.001017643604427576,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 923353152.0,
"reward": 0.8750402927398682,
"reward_std": 0.09378763735294342,
"rewards/accuracy_reward": 0.59384765625,
"rewards/brier_reward": 0.7912804245948791,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0026999496389180423,
"rewards/frontier_coverage_1": 0.07068156786262988,
"rewards/frontier_coverage_10": 0.07068105041980743,
"rewards/frontier_coverage_15": 0.0655278891324997,
"rewards/frontier_coverage_20": 0.05093179382383824,
"rewards/frontier_coverage_25": 0.04501226842403412,
"rewards/frontier_coverage_5": 0.07068156786262988,
"rewards/frontier_ece_reward": 0.0017709420528262854,
"rewards/frontier_entropy_batch_reward": -0.21965786516666413,
"signal/accuracy_reward/centered_abs_mean": 0.095587158203125,
"signal/accuracy_reward/group_bin_occupancy": 0.171484375,
"signal/accuracy_reward/group_std_mean": 0.1286234974861145,
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0477935791015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0477935791015625,
"signal/advantage_abs_mean": 0.07281743288040161,
"signal/advantage_pre_scale_abs_mean": 0.07281743288040161,
"signal/advantage_pre_scale_std": 0.11024300009012222,
"signal/advantage_std": 0.11024300009012222,
"signal/brier_reward/centered_abs_mean": 0.11920353770256042,
"signal/brier_reward/group_bin_occupancy": 0.844921875,
"signal/brier_reward/group_std_mean": 0.15356789529323578,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014900442212820053,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014900442212820053,
"signal/format_reward/centered_abs_mean": 0.00074462890625,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0018734002020210027,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000372314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026988452998921277,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.723828125,
"signal/frontier_aurc_reward/group_std_mean": 0.004450180754065514,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8309330304618924e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8309330304618924e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15822483897209166,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8578125,
"signal/frontier_coverage_1/group_std_mean": 0.20437292754650116,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028322245460003616,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028322245460003616,
"signal/frontier_coverage_10/centered_abs_mean": 0.15821611285209655,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8578125,
"signal/frontier_coverage_10/group_std_mean": 0.2043617308139801,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002832068270072341,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002832068270072341,
"signal/frontier_coverage_15/centered_abs_mean": 0.13421038091182708,
"signal/frontier_coverage_15/group_bin_occupancy": 0.85,
"signal/frontier_coverage_15/group_std_mean": 0.17385528981685638,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002402365766465664,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002402365766465664,
"signal/frontier_coverage_20/centered_abs_mean": 0.08285623341798783,
"signal/frontier_coverage_20/group_bin_occupancy": 0.870703125,
"signal/frontier_coverage_20/group_std_mean": 0.10784974545240403,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014831265201792122,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014831265201792122,
"signal/frontier_coverage_25/centered_abs_mean": 0.05798155665397644,
"signal/frontier_coverage_25/group_bin_occupancy": 0.923828125,
"signal/frontier_coverage_25/group_std_mean": 0.07409553527832032,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010378698818385601,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010378698818385601,
"signal/frontier_coverage_5/centered_abs_mean": 0.15822483897209166,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8578125,
"signal/frontier_coverage_5/group_std_mean": 0.20437292754650116,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028322245460003616,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028322245460003616,
"signal/frontier_ece_reward/centered_abs_mean": 0.003631744394078851,
"signal/frontier_ece_reward/group_bin_occupancy": 0.634375,
"signal/frontier_ece_reward/group_std_mean": 0.004583617858588696,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00045396804925985635,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00045396804925985635,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28241761326789855,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7359375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3559215545654297,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03530220165848732,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03530220165848732,
"step": 270
},
{
"calibration/aurc": 0.3989955123694968,
"calibration/batch_distribution_entropy": 0.983939721229347,
"calibration/batch_entropy_100bins": 0.9701765594162302,
"calibration/batch_entropy_10bins": 0.983939721229347,
"calibration/batch_entropy_50bins": 0.9790045138186654,
"calibration/batch_uniqueness": 0.953621613094267,
"calibration/buffer_distribution_entropy": 0.9991278370535038,
"calibration/buffer_entropy_100bins": 0.9991747826000577,
"calibration/buffer_entropy_10bins": 0.9991278370535038,
"calibration/buffer_entropy_50bins": 0.9992235492506977,
"calibration/confidence_entropy": 0.4716516326426182,
"calibration/coverage@0%": 0.005865490459882583,
"calibration/coverage@1%": 0.005865490459882583,
"calibration/coverage@10%": 0.008605216487279844,
"calibration/coverage@15%": 0.0304955051369863,
"calibration/coverage@20%": 0.04614420254403131,
"calibration/coverage@25%": 0.1395272749510763,
"calibration/coverage@30%": 0.2842060604207436,
"calibration/coverage@5%": 0.005865490459882583,
"calibration/ece": 0.12940730029672529,
"calibration/mean_confidence": 0.49376591700142836,
"calibration/prompt_uniqueness": 0.8365943170362904,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 1032.8,
"completions/max_terminated_length": 679.8,
"completions/mean_length": 250.778515625,
"completions/mean_terminated_length": 250.52763977050782,
"completions/min_length": 122.6,
"completions/min_terminated_length": 122.6,
"epoch": 0.88,
"grad_norm": 0.0010307779302820563,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 941068196.0,
"reward": 0.8426900625228881,
"reward_std": 0.09440464824438095,
"rewards/accuracy_reward": 0.51435546875,
"rewards/brier_reward": 0.7943570613861084,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.003383295517414808,
"rewards/frontier_coverage_1": 0.13087365329265593,
"rewards/frontier_coverage_10": 0.13065045028924943,
"rewards/frontier_coverage_15": 0.11363004744052888,
"rewards/frontier_coverage_20": 0.07452845722436904,
"rewards/frontier_coverage_25": 0.05105185955762863,
"rewards/frontier_coverage_5": 0.13087365329265593,
"rewards/frontier_ece_reward": 0.002461729710921645,
"rewards/frontier_entropy_batch_reward": -0.20190061628818512,
"signal/accuracy_reward/centered_abs_mean": 0.103082275390625,
"signal/accuracy_reward/group_bin_occupancy": 0.174609375,
"signal/accuracy_reward/group_std_mean": 0.13669233918190002,
"signal/accuracy_reward/group_zero_std_frac": 0.603125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0515411376953125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0515411376953125,
"signal/advantage_abs_mean": 0.07417571395635605,
"signal/advantage_pre_scale_abs_mean": 0.07417571395635605,
"signal/advantage_pre_scale_std": 0.11395351439714432,
"signal/advantage_std": 0.11395351439714432,
"signal/brier_reward/centered_abs_mean": 0.12301892042160034,
"signal/brier_reward/group_bin_occupancy": 0.8390625,
"signal/brier_reward/group_std_mean": 0.15880888998508452,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015377365052700043,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015377365052700043,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0033534748945385218,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.69921875,
"signal/frontier_aurc_reward/group_std_mean": 0.005596455931663513,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.002719528623857e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.002719528623857e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16604825258255004,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_1/group_std_mean": 0.21365970373153687,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00297226351685822,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00297226351685822,
"signal/frontier_coverage_10/centered_abs_mean": 0.1656820148229599,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_10/group_std_mean": 0.21318538784980773,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029657080769538878,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029657080769538878,
"signal/frontier_coverage_15/centered_abs_mean": 0.14318577647209169,
"signal/frontier_coverage_15/group_bin_occupancy": 0.857421875,
"signal/frontier_coverage_15/group_std_mean": 0.1846143424510956,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025630252901464702,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025630252901464702,
"signal/frontier_coverage_20/centered_abs_mean": 0.0882651075720787,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_20/group_std_mean": 0.11416510492563248,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001579945394769311,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001579945394769311,
"signal/frontier_coverage_25/centered_abs_mean": 0.05941323563456535,
"signal/frontier_coverage_25/group_bin_occupancy": 0.937109375,
"signal/frontier_coverage_25/group_std_mean": 0.075755076110363,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010634968522936106,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010634968522936106,
"signal/frontier_coverage_5/centered_abs_mean": 0.16604825258255004,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_5/group_std_mean": 0.21365970373153687,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00297226351685822,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00297226351685822,
"signal/frontier_ece_reward/centered_abs_mean": 0.0036736581940203907,
"signal/frontier_ece_reward/group_bin_occupancy": 0.623828125,
"signal/frontier_ece_reward/group_std_mean": 0.004686945211142301,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00045920727425254884,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00045920727425254884,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26177450716495515,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.740234375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33326379060745237,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032721813395619394,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032721813395619394,
"step": 275
},
{
"calibration/aurc": 0.34579738475043287,
"calibration/batch_distribution_entropy": 0.9820375851432381,
"calibration/batch_entropy_100bins": 0.9662901559276893,
"calibration/batch_entropy_10bins": 0.9820375851432381,
"calibration/batch_entropy_50bins": 0.9762958973616612,
"calibration/batch_uniqueness": 0.9531724427664274,
"calibration/buffer_distribution_entropy": 0.9989790305500732,
"calibration/buffer_entropy_100bins": 0.9990994457185127,
"calibration/buffer_entropy_10bins": 0.9989790305500732,
"calibration/buffer_entropy_50bins": 0.9991439357595896,
"calibration/confidence_entropy": 0.49993937105601816,
"calibration/coverage@0%": 0.02343979329745597,
"calibration/coverage@1%": 0.02343979329745597,
"calibration/coverage@10%": 0.05156479329745597,
"calibration/coverage@15%": 0.138674168297456,
"calibration/coverage@20%": 0.2552195450097847,
"calibration/coverage@25%": 0.3666271709882583,
"calibration/coverage@30%": 0.4494916523972603,
"calibration/coverage@5%": 0.04062729329745597,
"calibration/ece": 0.1285110208907984,
"calibration/mean_confidence": 0.5151410765100136,
"calibration/prompt_uniqueness": 0.8424560800923517,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 760.6,
"completions/max_terminated_length": 579.8,
"completions/mean_length": 246.29541015625,
"completions/mean_terminated_length": 246.169140625,
"completions/min_length": 121.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.896,
"grad_norm": 0.000937454926315695,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 958701109.0,
"reward": 0.8541659832000732,
"reward_std": 0.08857241421937942,
"rewards/accuracy_reward": 0.5396484375,
"rewards/brier_reward": 0.7998676776885987,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002759965811856091,
"rewards/frontier_coverage_1": 0.11034291237592697,
"rewards/frontier_coverage_10": 0.11029749512672424,
"rewards/frontier_coverage_15": 0.10030711442232132,
"rewards/frontier_coverage_20": 0.06749739050865174,
"rewards/frontier_coverage_25": 0.047641870379447934,
"rewards/frontier_coverage_5": 0.11033990383148193,
"rewards/frontier_ece_reward": 0.0017286977032199501,
"rewards/frontier_entropy_batch_reward": -0.20393397510051728,
"signal/accuracy_reward/centered_abs_mean": 0.0900390625,
"signal/accuracy_reward/group_bin_occupancy": 0.171875,
"signal/accuracy_reward/group_std_mean": 0.12443099468946457,
"signal/accuracy_reward/group_zero_std_frac": 0.625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04501953125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04501953125,
"signal/advantage_abs_mean": 0.06760159730911255,
"signal/advantage_pre_scale_abs_mean": 0.06760159730911255,
"signal/advantage_pre_scale_std": 0.10470890700817108,
"signal/advantage_std": 0.10470890700817108,
"signal/brier_reward/centered_abs_mean": 0.11129257977008819,
"signal/brier_reward/group_bin_occupancy": 0.850390625,
"signal/brier_reward/group_std_mean": 0.14382209181785582,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013911572471261024,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013911572471261024,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023095492739230393,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73359375,
"signal/frontier_aurc_reward/group_std_mean": 0.0037586647551506756,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.134093142056372e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.134093142056372e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1561950832605362,
"signal/frontier_coverage_1/group_bin_occupancy": 0.869921875,
"signal/frontier_coverage_1/group_std_mean": 0.20052540600299834,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027958919294178487,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027958919294178487,
"signal/frontier_coverage_10/centered_abs_mean": 0.15565426647663116,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_10/group_std_mean": 0.199809730052948,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002786211296916008,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002786211296916008,
"signal/frontier_coverage_15/centered_abs_mean": 0.13624198436737062,
"signal/frontier_coverage_15/group_bin_occupancy": 0.863671875,
"signal/frontier_coverage_15/group_std_mean": 0.1747460901737213,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024387314915657043,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024387314915657043,
"signal/frontier_coverage_20/centered_abs_mean": 0.0824216440320015,
"signal/frontier_coverage_20/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_20/group_std_mean": 0.1063196137547493,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014753472525626421,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014753472525626421,
"signal/frontier_coverage_25/centered_abs_mean": 0.053366570919752124,
"signal/frontier_coverage_25/group_bin_occupancy": 0.906640625,
"signal/frontier_coverage_25/group_std_mean": 0.06851721107959748,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009552616043947637,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009552616043947637,
"signal/frontier_coverage_5/centered_abs_mean": 0.1561916083097458,
"signal/frontier_coverage_5/group_bin_occupancy": 0.869921875,
"signal/frontier_coverage_5/group_std_mean": 0.20052067935466766,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002795829763635993,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002795829763635993,
"signal/frontier_ece_reward/centered_abs_mean": 0.0035572517197579147,
"signal/frontier_ece_reward/group_bin_occupancy": 0.58828125,
"signal/frontier_ece_reward/group_std_mean": 0.004552530776709318,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00044465646496973934,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00044465646496973934,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.276011261343956,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72734375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35015120506286623,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0345014076679945,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0345014076679945,
"step": 280
},
{
"calibration/aurc": 0.35540442785278614,
"calibration/batch_distribution_entropy": 0.9746369661753949,
"calibration/batch_entropy_100bins": 0.9632824538548672,
"calibration/batch_entropy_10bins": 0.9746369661753949,
"calibration/batch_entropy_50bins": 0.973053929289723,
"calibration/batch_uniqueness": 0.951324462890625,
"calibration/buffer_distribution_entropy": 0.9989514375360533,
"calibration/buffer_entropy_100bins": 0.9990828334202781,
"calibration/buffer_entropy_10bins": 0.9989514375360533,
"calibration/buffer_entropy_50bins": 0.9991101689613113,
"calibration/confidence_entropy": 0.5108973531224291,
"calibration/coverage@0%": 0.00703125,
"calibration/coverage@1%": 0.00703125,
"calibration/coverage@10%": 0.090234375,
"calibration/coverage@15%": 0.190234375,
"calibration/coverage@20%": 0.295703125,
"calibration/coverage@25%": 0.3859375,
"calibration/coverage@30%": 0.460546875,
"calibration/coverage@5%": 0.03515625,
"calibration/ece": 0.14220309508484869,
"calibration/mean_confidence": 0.4849877995867112,
"calibration/prompt_uniqueness": 0.84873046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 741.0,
"completions/max_terminated_length": 551.2,
"completions/mean_length": 241.6802734375,
"completions/mean_terminated_length": 241.55431518554687,
"completions/min_length": 119.8,
"completions/min_terminated_length": 119.8,
"epoch": 0.912,
"grad_norm": 0.0008511711494065821,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 976227211.0,
"reward": 0.8543228030204773,
"reward_std": 0.08962784111499786,
"rewards/accuracy_reward": 0.5455078125,
"rewards/brier_reward": 0.7963875532150269,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0024389707017689944,
"rewards/frontier_coverage_1": 0.09579984173178673,
"rewards/frontier_coverage_10": 0.09567191377282143,
"rewards/frontier_coverage_15": 0.0898715415969491,
"rewards/frontier_coverage_20": 0.06119959354400635,
"rewards/frontier_coverage_25": 0.04600660875439644,
"rewards/frontier_coverage_5": 0.09579412266612053,
"rewards/frontier_ece_reward": 0.0018233929062262178,
"rewards/frontier_entropy_batch_reward": -0.2134964257478714,
"signal/accuracy_reward/centered_abs_mean": 0.0875244140625,
"signal/accuracy_reward/group_bin_occupancy": 0.171875,
"signal/accuracy_reward/group_std_mean": 0.12260164320468903,
"signal/accuracy_reward/group_zero_std_frac": 0.625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04376220703125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04376220703125,
"signal/advantage_abs_mean": 0.06862077489495277,
"signal/advantage_pre_scale_abs_mean": 0.06862077489495277,
"signal/advantage_pre_scale_std": 0.10393733531236649,
"signal/advantage_std": 0.10393733531236649,
"signal/brier_reward/centered_abs_mean": 0.11736378222703933,
"signal/brier_reward/group_bin_occupancy": 0.855859375,
"signal/brier_reward/group_std_mean": 0.15200705230236053,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014670472778379916,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014670472778379916,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021737504750490187,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.711328125,
"signal/frontier_aurc_reward/group_std_mean": 0.003746302565559745,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.891013257089071e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.891013257089071e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16225638389587402,
"signal/frontier_coverage_1/group_bin_occupancy": 0.875,
"signal/frontier_coverage_1/group_std_mean": 0.20896171331405639,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029043891932815312,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029043891932815312,
"signal/frontier_coverage_10/centered_abs_mean": 0.16155781745910644,
"signal/frontier_coverage_10/group_bin_occupancy": 0.874609375,
"signal/frontier_coverage_10/group_std_mean": 0.20806140899658204,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028918846510350704,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028918846510350704,
"signal/frontier_coverage_15/centered_abs_mean": 0.14396750777959824,
"signal/frontier_coverage_15/group_bin_occupancy": 0.871875,
"signal/frontier_coverage_15/group_std_mean": 0.18532683253288268,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025770182721316813,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025770182721316813,
"signal/frontier_coverage_20/centered_abs_mean": 0.0858291208744049,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9,
"signal/frontier_coverage_20/group_std_mean": 0.11078901290893554,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015363412443548442,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015363412443548442,
"signal/frontier_coverage_25/centered_abs_mean": 0.05511407479643822,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9265625,
"signal/frontier_coverage_25/group_std_mean": 0.07155242562294006,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009865418775007128,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009865418775007128,
"signal/frontier_coverage_5/centered_abs_mean": 0.16224364936351776,
"signal/frontier_coverage_5/group_bin_occupancy": 0.875,
"signal/frontier_coverage_5/group_std_mean": 0.20894473493099214,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029041611589491366,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029041611589491366,
"signal/frontier_ece_reward/centered_abs_mean": 0.003552433103322983,
"signal/frontier_ece_reward/group_bin_occupancy": 0.576953125,
"signal/frontier_ece_reward/group_std_mean": 0.004548510629683733,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00044405413791537285,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00044405413791537285,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2813218832015991,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.743359375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3521720230579376,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03516523540019989,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03516523540019989,
"step": 285
},
{
"calibration/aurc": 0.3877844515793165,
"calibration/batch_distribution_entropy": 0.9866501795107988,
"calibration/batch_entropy_100bins": 0.9728511672753379,
"calibration/batch_entropy_10bins": 0.9866501795107988,
"calibration/batch_entropy_50bins": 0.9808825904754377,
"calibration/batch_uniqueness": 0.954571533203125,
"calibration/buffer_distribution_entropy": 0.9990166406847901,
"calibration/buffer_entropy_100bins": 0.9991172063970971,
"calibration/buffer_entropy_10bins": 0.9990166406847901,
"calibration/buffer_entropy_50bins": 0.9991579855854283,
"calibration/confidence_entropy": 0.49825895928661373,
"calibration/coverage@0%": 0.006640625,
"calibration/coverage@1%": 0.006640625,
"calibration/coverage@10%": 0.0171875,
"calibration/coverage@15%": 0.0203125,
"calibration/coverage@20%": 0.083984375,
"calibration/coverage@25%": 0.213671875,
"calibration/coverage@30%": 0.295703125,
"calibration/coverage@5%": 0.006640625,
"calibration/ece": 0.1253656157246842,
"calibration/mean_confidence": 0.5123772352862292,
"calibration/prompt_uniqueness": 0.8548828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 680.2,
"completions/max_terminated_length": 468.0,
"completions/mean_length": 233.83251953125,
"completions/mean_terminated_length": 233.70564880371094,
"completions/min_length": 112.6,
"completions/min_terminated_length": 112.6,
"epoch": 0.928,
"grad_norm": 0.0007553471950814128,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 993648472.0,
"reward": 0.8464865446090698,
"reward_std": 0.08689890056848526,
"rewards/accuracy_reward": 0.52626953125,
"rewards/brier_reward": 0.788770604133606,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0032333484385162594,
"rewards/frontier_coverage_1": 0.11250228732824326,
"rewards/frontier_coverage_10": 0.11221933662891388,
"rewards/frontier_coverage_15": 0.1025825411081314,
"rewards/frontier_coverage_20": 0.06942355185747147,
"rewards/frontier_coverage_25": 0.053556407988071444,
"rewards/frontier_coverage_5": 0.11250228732824326,
"rewards/frontier_ece_reward": 0.0022580260410904884,
"rewards/frontier_entropy_batch_reward": -0.2031704902648926,
"signal/accuracy_reward/centered_abs_mean": 0.084185791015625,
"signal/accuracy_reward/group_bin_occupancy": 0.16640625,
"signal/accuracy_reward/group_std_mean": 0.1131853774189949,
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0420928955078125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0420928955078125,
"signal/advantage_abs_mean": 0.06731941103935242,
"signal/advantage_pre_scale_abs_mean": 0.06731941103935242,
"signal/advantage_pre_scale_std": 0.10365704894065857,
"signal/advantage_std": 0.10365704894065857,
"signal/brier_reward/centered_abs_mean": 0.1226132184267044,
"signal/brier_reward/group_bin_occupancy": 0.843359375,
"signal/brier_reward/group_std_mean": 0.15720722079277039,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01532665230333805,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01532665230333805,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003174196882173419,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71796875,
"signal/frontier_aurc_reward/group_std_mean": 0.005680124741047621,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.681812253897078e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.681812253897078e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15731285214424134,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86640625,
"signal/frontier_coverage_1/group_std_mean": 0.20189858376979827,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028158999979496003,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028158999979496003,
"signal/frontier_coverage_10/centered_abs_mean": 0.15667309165000914,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86640625,
"signal/frontier_coverage_10/group_std_mean": 0.2010861098766327,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002804448362439871,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002804448362439871,
"signal/frontier_coverage_15/centered_abs_mean": 0.13898983597755432,
"signal/frontier_coverage_15/group_bin_occupancy": 0.859375,
"signal/frontier_coverage_15/group_std_mean": 0.1787546008825302,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024879179894924165,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024879179894924165,
"signal/frontier_coverage_20/centered_abs_mean": 0.08620916604995728,
"signal/frontier_coverage_20/group_bin_occupancy": 0.887890625,
"signal/frontier_coverage_20/group_std_mean": 0.11133407950401306,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015431440435349942,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015431440435349942,
"signal/frontier_coverage_25/centered_abs_mean": 0.059717252105474475,
"signal/frontier_coverage_25/group_bin_occupancy": 0.925,
"signal/frontier_coverage_25/group_std_mean": 0.07652692198753357,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010689388029277325,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010689388029277325,
"signal/frontier_coverage_5/centered_abs_mean": 0.15731285214424134,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86640625,
"signal/frontier_coverage_5/group_std_mean": 0.20189858376979827,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028158999979496003,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028158999979496003,
"signal/frontier_ece_reward/centered_abs_mean": 0.0038206770084798338,
"signal/frontier_ece_reward/group_bin_occupancy": 0.584765625,
"signal/frontier_ece_reward/group_std_mean": 0.004873193427920341,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004775846260599792,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004775846260599792,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.266440337896347,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.735546875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34233739972114563,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03330504223704338,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03330504223704338,
"step": 290
},
{
"calibration/aurc": 0.2447793406319588,
"calibration/batch_distribution_entropy": 0.9874497094930328,
"calibration/batch_entropy_100bins": 0.9716391593970762,
"calibration/batch_entropy_10bins": 0.9874497094930328,
"calibration/batch_entropy_50bins": 0.9810979705902847,
"calibration/batch_uniqueness": 0.9539276123046875,
"calibration/buffer_distribution_entropy": 0.9989772205264174,
"calibration/buffer_entropy_100bins": 0.9990865812417503,
"calibration/buffer_entropy_10bins": 0.9989772205264174,
"calibration/buffer_entropy_50bins": 0.9991286661644855,
"calibration/confidence_entropy": 0.5002220328331736,
"calibration/coverage@0%": 0.03515625,
"calibration/coverage@1%": 0.03515625,
"calibration/coverage@10%": 0.261328125,
"calibration/coverage@15%": 0.34765625,
"calibration/coverage@20%": 0.43671875,
"calibration/coverage@25%": 0.531640625,
"calibration/coverage@30%": 0.608984375,
"calibration/coverage@5%": 0.149609375,
"calibration/ece": 0.09089448170709025,
"calibration/mean_confidence": 0.4852771091860312,
"calibration/prompt_uniqueness": 0.844384765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 690.6,
"completions/max_terminated_length": 481.0,
"completions/mean_length": 231.37978515625,
"completions/mean_terminated_length": 231.25249328613282,
"completions/min_length": 114.2,
"completions/min_terminated_length": 114.2,
"epoch": 0.944,
"grad_norm": 0.0009843307780101895,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 1010993225.0,
"reward": 0.8495798826217651,
"reward_std": 0.0940755695104599,
"rewards/accuracy_reward": 0.53564453125,
"rewards/brier_reward": 0.789400064945221,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002888223179616034,
"rewards/frontier_coverage_1": 0.10932025760412216,
"rewards/frontier_coverage_10": 0.10875759422779083,
"rewards/frontier_coverage_15": 0.10018027424812317,
"rewards/frontier_coverage_20": 0.06886630058288574,
"rewards/frontier_coverage_25": 0.049372269213199614,
"rewards/frontier_coverage_5": 0.10932025760412216,
"rewards/frontier_ece_reward": 0.0021821844391524793,
"rewards/frontier_entropy_batch_reward": -0.2144875019788742,
"signal/accuracy_reward/centered_abs_mean": 0.105511474609375,
"signal/accuracy_reward/group_bin_occupancy": 0.177734375,
"signal/accuracy_reward/group_std_mean": 0.1422753319144249,
"signal/accuracy_reward/group_zero_std_frac": 0.578125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0527557373046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0527557373046875,
"signal/advantage_abs_mean": 0.07274120301008224,
"signal/advantage_pre_scale_abs_mean": 0.07274120301008224,
"signal/advantage_pre_scale_std": 0.11103657335042953,
"signal/advantage_std": 0.11103657335042953,
"signal/brier_reward/centered_abs_mean": 0.11999978870153427,
"signal/brier_reward/group_bin_occupancy": 0.855859375,
"signal/brier_reward/group_std_mean": 0.153327140212059,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014999973587691784,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014999973587691784,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025067355018109083,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.728125,
"signal/frontier_aurc_reward/group_std_mean": 0.004060426913201809,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.4870566489407794e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.4870566489407794e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1733368456363678,
"signal/frontier_coverage_1/group_bin_occupancy": 0.865625,
"signal/frontier_coverage_1/group_std_mean": 0.2196456164121628,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031027294229716063,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031027294229716063,
"signal/frontier_coverage_10/centered_abs_mean": 0.1724557787179947,
"signal/frontier_coverage_10/group_bin_occupancy": 0.865625,
"signal/frontier_coverage_10/group_std_mean": 0.2185318350791931,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030869582667946817,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030869582667946817,
"signal/frontier_coverage_15/centered_abs_mean": 0.15549071431159972,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_15/group_std_mean": 0.19710105359554292,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027832836378365753,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027832836378365753,
"signal/frontier_coverage_20/centered_abs_mean": 0.09377928972244262,
"signal/frontier_coverage_20/group_bin_occupancy": 0.887890625,
"signal/frontier_coverage_20/group_std_mean": 0.11911326348781585,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001678649242967367,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001678649242967367,
"signal/frontier_coverage_25/centered_abs_mean": 0.05974511280655861,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9234375,
"signal/frontier_coverage_25/group_std_mean": 0.07545108199119568,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010694375028833746,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010694375028833746,
"signal/frontier_coverage_5/centered_abs_mean": 0.1733368456363678,
"signal/frontier_coverage_5/group_bin_occupancy": 0.865625,
"signal/frontier_coverage_5/group_std_mean": 0.2196456164121628,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031027294229716063,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031027294229716063,
"signal/frontier_ece_reward/centered_abs_mean": 0.0038582887034863235,
"signal/frontier_ece_reward/group_bin_occupancy": 0.576953125,
"signal/frontier_ece_reward/group_std_mean": 0.004859161656349897,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00048228608793579044,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00048228608793579044,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.277313631772995,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.736328125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35219224691390993,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034664203971624376,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034664203971624376,
"step": 295
},
{
"calibration/aurc": 0.31620081263607647,
"calibration/batch_distribution_entropy": 0.9888025135738457,
"calibration/batch_entropy_100bins": 0.9754164842245048,
"calibration/batch_entropy_10bins": 0.9888025135738457,
"calibration/batch_entropy_50bins": 0.9829728236087112,
"calibration/batch_uniqueness": 0.9548370361328125,
"calibration/buffer_distribution_entropy": 0.9990489479151083,
"calibration/buffer_entropy_100bins": 0.9991271919139264,
"calibration/buffer_entropy_10bins": 0.9990489479151083,
"calibration/buffer_entropy_50bins": 0.9991661909354466,
"calibration/confidence_entropy": 0.4907980481305323,
"calibration/coverage@0%": 0.01171875,
"calibration/coverage@1%": 0.01171875,
"calibration/coverage@10%": 0.14296875,
"calibration/coverage@15%": 0.1796875,
"calibration/coverage@20%": 0.319140625,
"calibration/coverage@25%": 0.4140625,
"calibration/coverage@30%": 0.48125,
"calibration/coverage@5%": 0.07578125,
"calibration/ece": 0.14081409067623374,
"calibration/mean_confidence": 0.5085684695833261,
"calibration/prompt_uniqueness": 0.84306640625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 750.4,
"completions/max_terminated_length": 527.8,
"completions/mean_length": 230.4478515625,
"completions/mean_terminated_length": 230.19272155761718,
"completions/min_length": 121.6,
"completions/min_terminated_length": 121.6,
"epoch": 0.96,
"grad_norm": 0.0007394987624138594,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 1028293331.0,
"reward": 0.8490480065345765,
"reward_std": 0.0807856947183609,
"rewards/accuracy_reward": 0.5232421875,
"rewards/brier_reward": 0.805585753917694,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002769366092979908,
"rewards/frontier_coverage_1": 0.1345707431435585,
"rewards/frontier_coverage_10": 0.13391480296850206,
"rewards/frontier_coverage_15": 0.12401831150054932,
"rewards/frontier_coverage_20": 0.08151039481163025,
"rewards/frontier_coverage_25": 0.05505374222993851,
"rewards/frontier_coverage_5": 0.13456859886646272,
"rewards/frontier_ece_reward": 0.0024473052471876144,
"rewards/frontier_entropy_batch_reward": -0.2024726927280426,
"signal/accuracy_reward/centered_abs_mean": 0.07801513671875,
"signal/accuracy_reward/group_bin_occupancy": 0.165625,
"signal/accuracy_reward/group_std_mean": 0.10689240992069245,
"signal/accuracy_reward/group_zero_std_frac": 0.675,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039007568359375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.039007568359375,
"signal/advantage_abs_mean": 0.0619741216301918,
"signal/advantage_pre_scale_abs_mean": 0.0619741216301918,
"signal/advantage_pre_scale_std": 0.09630223214626313,
"signal/advantage_std": 0.09630223214626313,
"signal/brier_reward/centered_abs_mean": 0.11064407974481583,
"signal/brier_reward/group_bin_occupancy": 0.850390625,
"signal/brier_reward/group_std_mean": 0.14350316524505616,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013830509968101978,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013830509968101978,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022713606245815753,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7234375,
"signal/frontier_aurc_reward/group_std_mean": 0.0036464712116867303,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0657354111317547e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0657354111317547e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1542697876691818,
"signal/frontier_coverage_1/group_bin_occupancy": 0.870703125,
"signal/frontier_coverage_1/group_std_mean": 0.19899408221244813,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027614288963377477,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027614288963377477,
"signal/frontier_coverage_10/centered_abs_mean": 0.15353223979473113,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_10/group_std_mean": 0.19805727005004883,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002748226933181286,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002748226933181286,
"signal/frontier_coverage_15/centered_abs_mean": 0.14255075454711913,
"signal/frontier_coverage_15/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_15/group_std_mean": 0.18408787548542022,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025516584049910308,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025516584049910308,
"signal/frontier_coverage_20/centered_abs_mean": 0.08376922607421874,
"signal/frontier_coverage_20/group_bin_occupancy": 0.880859375,
"signal/frontier_coverage_20/group_std_mean": 0.10870558023452759,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014994690660387277,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014994690660387277,
"signal/frontier_coverage_25/centered_abs_mean": 0.05670462995767593,
"signal/frontier_coverage_25/group_bin_occupancy": 0.92265625,
"signal/frontier_coverage_25/group_std_mean": 0.07235517054796219,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010150128742679953,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010150128742679953,
"signal/frontier_coverage_5/centered_abs_mean": 0.15426267683506012,
"signal/frontier_coverage_5/group_bin_occupancy": 0.870703125,
"signal/frontier_coverage_5/group_std_mean": 0.1989847391843796,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00276130186393857,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00276130186393857,
"signal/frontier_ece_reward/centered_abs_mean": 0.003688620775938034,
"signal/frontier_ece_reward/group_bin_occupancy": 0.5734375,
"signal/frontier_ece_reward/group_std_mean": 0.004731486923992634,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00046107759699225425,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00046107759699225425,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26567680239677427,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72421875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34176658391952514,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033209600299596784,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033209600299596784,
"step": 300
},
{
"epoch": 0.96,
"eval_calibration/aurc": 0.4157009406435348,
"eval_calibration/batch_distribution_entropy": 0.9144449702126978,
"eval_calibration/batch_entropy_100bins": 0.6990601166122136,
"eval_calibration/batch_entropy_10bins": 0.9144449702126978,
"eval_calibration/batch_entropy_50bins": 0.7689095013084135,
"eval_calibration/batch_uniqueness": 0.8984375,
"eval_calibration/buffer_distribution_entropy": 0.9989272187954801,
"eval_calibration/buffer_entropy_100bins": 0.9990825487296975,
"eval_calibration/buffer_entropy_10bins": 0.9989272187954801,
"eval_calibration/buffer_entropy_50bins": 0.9991098135800613,
"eval_calibration/confidence_entropy": 0.48430149279421963,
"eval_calibration/coverage@0%": 0.09375,
"eval_calibration/coverage@1%": 0.09375,
"eval_calibration/coverage@10%": 0.09375,
"eval_calibration/coverage@15%": 0.1640625,
"eval_calibration/coverage@20%": 0.2578125,
"eval_calibration/coverage@25%": 0.296875,
"eval_calibration/coverage@30%": 0.328125,
"eval_calibration/coverage@5%": 0.09375,
"eval_calibration/ece": 0.17053071360991148,
"eval_calibration/mean_confidence": 0.4246910924677646,
"eval_calibration/prompt_uniqueness": 0.8984375,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 374.75,
"eval_completions/max_terminated_length": 374.75,
"eval_completions/mean_length": 230.63119506835938,
"eval_completions/mean_terminated_length": 230.63119506835938,
"eval_completions/min_length": 139.0,
"eval_completions/min_terminated_length": 139.0,
"eval_loss": 0.0,
"eval_num_tokens": 1028293331.0,
"eval_reward": 0.710255429148674,
"eval_reward_std": 0.22263594716787338,
"eval_rewards/accuracy_reward": 0.43359375,
"eval_rewards/brier_reward": 0.8085650652647018,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.0028918907046318054,
"eval_rewards/frontier_coverage_1": 0.19964230805635452,
"eval_rewards/frontier_coverage_10": 0.19866662845015526,
"eval_rewards/frontier_coverage_15": 0.184698436409235,
"eval_rewards/frontier_coverage_20": 0.1101516131311655,
"eval_rewards/frontier_coverage_25": 0.0620901882648468,
"eval_rewards/frontier_coverage_5": 0.1996377371251583,
"eval_rewards/frontier_ece_reward": 0.0027777274372056127,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 20.2818,
"eval_samples_per_second": 24.653,
"eval_signal/accuracy_reward/centered_abs_mean": 0.47607421875,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.4955107420682907,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.238037109375,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.238037109375,
"eval_signal/advantage_abs_mean": 0.20791196078062057,
"eval_signal/advantage_pre_scale_abs_mean": 0.20791196078062057,
"eval_signal/advantage_pre_scale_std": 0.22018880769610405,
"eval_signal/advantage_std": 0.22018880769610405,
"eval_signal/brier_reward/centered_abs_mean": 0.18127229064702988,
"eval_signal/brier_reward/group_bin_occupancy": 0.8984375,
"eval_signal/brier_reward/group_std_mean": 0.22954664751887321,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022659036330878735,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.022659036330878735,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.00344617961673066,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.640625,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.006800854112952948,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.168661366245942e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.168661366245942e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3764362931251526,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4541953206062317,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006738209398463368,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006738209398463368,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.374702051281929,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4522312879562378,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006707166787236929,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006707166787236929,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.34953027218580246,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9765625,
"eval_signal/frontier_coverage_15/group_std_mean": 0.4235554412007332,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006256591761484742,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006256591761484742,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.19098489359021187,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9453125,
"eval_signal/frontier_coverage_20/group_std_mean": 0.23890389502048492,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003418629406951368,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003418629406951368,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.0917560514062643,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_25/group_std_mean": 0.11645574681460857,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016424332570750266,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016424332570750266,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3764154985547066,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_5/group_std_mean": 0.45417140424251556,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0067378373350948095,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0067378373350948095,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.005336694768629968,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9296875,
"eval_signal/frontier_ece_reward/group_std_mean": 0.006381870131008327,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000667086846078746,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000667086846078746,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.125,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.197,
"step": 300
},
{
"calibration/aurc": 0.26110004370907136,
"calibration/batch_distribution_entropy": 0.9663232191264044,
"calibration/batch_entropy_100bins": 0.9602856930043288,
"calibration/batch_entropy_10bins": 0.9663232191264044,
"calibration/batch_entropy_50bins": 0.9700703532954462,
"calibration/batch_uniqueness": 0.9515728882667596,
"calibration/buffer_distribution_entropy": 0.9989411083119799,
"calibration/buffer_entropy_100bins": 0.9990920989566723,
"calibration/buffer_entropy_10bins": 0.9989411083119799,
"calibration/buffer_entropy_50bins": 0.9991269827910889,
"calibration/confidence_entropy": 0.5043553017329548,
"calibration/coverage@0%": 0.03242340386497065,
"calibration/coverage@1%": 0.03242340386497065,
"calibration/coverage@10%": 0.19649201932485322,
"calibration/coverage@15%": 0.3742263943248532,
"calibration/coverage@20%": 0.47579424535225046,
"calibration/coverage@25%": 0.5375168175146772,
"calibration/coverage@30%": 0.6078407840019568,
"calibration/coverage@5%": 0.08125152886497064,
"calibration/ece": 0.1224501044294222,
"calibration/mean_confidence": 0.5046927209883952,
"calibration/prompt_uniqueness": 0.8447966290810353,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 923.2,
"completions/max_terminated_length": 544.6,
"completions/mean_length": 231.37236328125,
"completions/mean_terminated_length": 230.86200561523438,
"completions/min_length": 116.8,
"completions/min_terminated_length": 116.8,
"epoch": 0.976,
"grad_norm": 0.0011766533134505153,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 1045523704.0,
"reward": 0.8560734391212463,
"reward_std": 0.08937492370605468,
"rewards/accuracy_reward": 0.54404296875,
"rewards/brier_reward": 0.8007814288139343,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.0027466853614896538,
"rewards/frontier_coverage_1": 0.11451977603137493,
"rewards/frontier_coverage_10": 0.11409206595271826,
"rewards/frontier_coverage_15": 0.10787402391433716,
"rewards/frontier_coverage_20": 0.07268583029508591,
"rewards/frontier_coverage_25": 0.052094388753175735,
"rewards/frontier_coverage_5": 0.11450284756720067,
"rewards/frontier_ece_reward": 0.0022714813821949065,
"rewards/frontier_entropy_batch_reward": -0.21035043001174927,
"signal/accuracy_reward/centered_abs_mean": 0.094769287109375,
"signal/accuracy_reward/group_bin_occupancy": 0.1703125,
"signal/accuracy_reward/group_std_mean": 0.1257157877087593,
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0473846435546875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0473846435546875,
"signal/advantage_abs_mean": 0.06940693110227585,
"signal/advantage_pre_scale_abs_mean": 0.06940693110227585,
"signal/advantage_pre_scale_std": 0.10633570104837417,
"signal/advantage_std": 0.10633570104837417,
"signal/brier_reward/centered_abs_mean": 0.11308815479278564,
"signal/brier_reward/group_bin_occupancy": 0.853125,
"signal/brier_reward/group_std_mean": 0.1454048365354538,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014136019349098205,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014136019349098205,
"signal/format_reward/centered_abs_mean": 0.001123046875,
"signal/format_reward/group_bin_occupancy": 0.126953125,
"signal/format_reward/group_std_mean": 0.0029782545287162067,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005615234375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0005615234375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023753143846988677,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.725390625,
"signal/frontier_aurc_reward/group_std_mean": 0.003789714723825455,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.251812424627133e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.251812424627133e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16219232231378555,
"signal/frontier_coverage_1/group_bin_occupancy": 0.870703125,
"signal/frontier_coverage_1/group_std_mean": 0.20617010891437532,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029032424092292784,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029032424092292784,
"signal/frontier_coverage_10/centered_abs_mean": 0.16142708957195281,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_10/group_std_mean": 0.2052207589149475,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028895447496324776,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028895447496324776,
"signal/frontier_coverage_15/centered_abs_mean": 0.1506349816918373,
"signal/frontier_coverage_15/group_bin_occupancy": 0.866015625,
"signal/frontier_coverage_15/group_std_mean": 0.19179299771785735,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026963659562170505,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026963659562170505,
"signal/frontier_coverage_20/centered_abs_mean": 0.08505127876996994,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_20/group_std_mean": 0.1089501440525055,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015224177855998277,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015224177855998277,
"signal/frontier_coverage_25/centered_abs_mean": 0.05604914203286171,
"signal/frontier_coverage_25/group_bin_occupancy": 0.93359375,
"signal/frontier_coverage_25/group_std_mean": 0.07139091566205025,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001003279653377831,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001003279653377831,
"signal/frontier_coverage_5/centered_abs_mean": 0.16217613518238067,
"signal/frontier_coverage_5/group_bin_occupancy": 0.870703125,
"signal/frontier_coverage_5/group_std_mean": 0.20614968240261078,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029029527213424444,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029029527213424444,
"signal/frontier_ece_reward/centered_abs_mean": 0.0038645747117698193,
"signal/frontier_ece_reward/group_bin_occupancy": 0.58828125,
"signal/frontier_ece_reward/group_std_mean": 0.004852784611284733,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004830718389712274,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004830718389712274,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2707963943481445,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7171875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3448775112628937,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033849549293518064,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033849549293518064,
"step": 305
},
{
"calibration/aurc": 0.3658527640919357,
"calibration/batch_distribution_entropy": 0.9803335563539537,
"calibration/batch_entropy_100bins": 0.9691597495026512,
"calibration/batch_entropy_10bins": 0.9803335563539537,
"calibration/batch_entropy_50bins": 0.9781998927735055,
"calibration/batch_uniqueness": 0.953106689453125,
"calibration/buffer_distribution_entropy": 0.9989034102433759,
"calibration/buffer_entropy_100bins": 0.9990778744277339,
"calibration/buffer_entropy_10bins": 0.9989034102433759,
"calibration/buffer_entropy_50bins": 0.9991183500383848,
"calibration/confidence_entropy": 0.4943518091239916,
"calibration/coverage@0%": 0.016015625,
"calibration/coverage@1%": 0.016015625,
"calibration/coverage@10%": 0.06015625,
"calibration/coverage@15%": 0.10859375,
"calibration/coverage@20%": 0.154296875,
"calibration/coverage@25%": 0.197265625,
"calibration/coverage@30%": 0.398046875,
"calibration/coverage@5%": 0.020703125,
"calibration/ece": 0.1389597168024852,
"calibration/mean_confidence": 0.46682059475391463,
"calibration/prompt_uniqueness": 0.8416015625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 463.6,
"completions/max_terminated_length": 463.6,
"completions/mean_length": 231.00029296875,
"completions/mean_terminated_length": 231.00029296875,
"completions/min_length": 127.2,
"completions/min_terminated_length": 127.2,
"epoch": 0.992,
"grad_norm": 0.0008861988899298012,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 1063017627.0,
"reward": 0.8441213011741638,
"reward_std": 0.08615548759698868,
"rewards/accuracy_reward": 0.52431640625,
"rewards/brier_reward": 0.8000134348869323,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002819360885769129,
"rewards/frontier_coverage_1": 0.1251816540956497,
"rewards/frontier_coverage_10": 0.12461385577917099,
"rewards/frontier_coverage_15": 0.11719222217798234,
"rewards/frontier_coverage_20": 0.07521957084536553,
"rewards/frontier_coverage_25": 0.05361300930380821,
"rewards/frontier_coverage_5": 0.1251749500632286,
"rewards/frontier_ece_reward": 0.0021149621577933432,
"rewards/frontier_entropy_batch_reward": -0.2345559537410736,
"signal/accuracy_reward/centered_abs_mean": 0.082110595703125,
"signal/accuracy_reward/group_bin_occupancy": 0.166796875,
"signal/accuracy_reward/group_std_mean": 0.11262907832860947,
"signal/accuracy_reward/group_zero_std_frac": 0.665625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0410552978515625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0410552978515625,
"signal/advantage_abs_mean": 0.06661349236965179,
"signal/advantage_pre_scale_abs_mean": 0.06661349236965179,
"signal/advantage_pre_scale_std": 0.1012403666973114,
"signal/advantage_std": 0.1012403666973114,
"signal/brier_reward/centered_abs_mean": 0.10932144820690155,
"signal/brier_reward/group_bin_occupancy": 0.855859375,
"signal/brier_reward/group_std_mean": 0.13959217667579651,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013665181025862694,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013665181025862694,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023461440578103065,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.721875,
"signal/frontier_aurc_reward/group_std_mean": 0.0038652042858302593,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1995976789621634e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1995976789621634e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15163334608078002,
"signal/frontier_coverage_1/group_bin_occupancy": 0.866015625,
"signal/frontier_coverage_1/group_std_mean": 0.19423333406448365,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027142366860061886,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027142366860061886,
"signal/frontier_coverage_10/centered_abs_mean": 0.15079353153705596,
"signal/frontier_coverage_10/group_bin_occupancy": 0.865625,
"signal/frontier_coverage_10/group_std_mean": 0.1931879073381424,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026992041151970626,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026992041151970626,
"signal/frontier_coverage_15/centered_abs_mean": 0.1394236296415329,
"signal/frontier_coverage_15/group_bin_occupancy": 0.859375,
"signal/frontier_coverage_15/group_std_mean": 0.17897272109985352,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002495682844892144,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002495682844892144,
"signal/frontier_coverage_20/centered_abs_mean": 0.07920315265655517,
"signal/frontier_coverage_20/group_bin_occupancy": 0.889453125,
"signal/frontier_coverage_20/group_std_mean": 0.10239728689193725,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014177364064380527,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014177364064380527,
"signal/frontier_coverage_25/centered_abs_mean": 0.05471629798412323,
"signal/frontier_coverage_25/group_bin_occupancy": 0.92109375,
"signal/frontier_coverage_25/group_std_mean": 0.07026181817054748,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009794216603040695,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009794216603040695,
"signal/frontier_coverage_5/centered_abs_mean": 0.1516157403588295,
"signal/frontier_coverage_5/group_bin_occupancy": 0.866015625,
"signal/frontier_coverage_5/group_std_mean": 0.1942117065191269,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002713921666145325,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002713921666145325,
"signal/frontier_ece_reward/centered_abs_mean": 0.003750180173665285,
"signal/frontier_ece_reward/group_bin_occupancy": 0.585546875,
"signal/frontier_ece_reward/group_std_mean": 0.0047975870780646804,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004687725217081606,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004687725217081606,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29427412152290344,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.727734375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36567636132240294,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03678426519036293,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03678426519036293,
"step": 310
},
{
"calibration/aurc": 0.27186836607159154,
"calibration/batch_distribution_entropy": 0.9591700570502693,
"calibration/batch_entropy_100bins": 0.9570191564453638,
"calibration/batch_entropy_10bins": 0.9591700570502693,
"calibration/batch_entropy_50bins": 0.963697706521873,
"calibration/batch_uniqueness": 0.9496231079101562,
"calibration/buffer_distribution_entropy": 0.9989876036497771,
"calibration/buffer_entropy_100bins": 0.9991295514741796,
"calibration/buffer_entropy_10bins": 0.9989876036497771,
"calibration/buffer_entropy_50bins": 0.9991822719110717,
"calibration/confidence_entropy": 0.48186544687195904,
"calibration/coverage@0%": 0.02734375,
"calibration/coverage@1%": 0.02734375,
"calibration/coverage@10%": 0.0556640625,
"calibration/coverage@15%": 0.08203125,
"calibration/coverage@20%": 0.2568359375,
"calibration/coverage@25%": 0.587890625,
"calibration/coverage@30%": 0.7333984375,
"calibration/coverage@5%": 0.0341796875,
"calibration/ece": 0.16727483770900242,
"calibration/mean_confidence": 0.5957875381649225,
"calibration/prompt_uniqueness": 0.8248291015625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 421.5,
"completions/max_terminated_length": 421.5,
"completions/mean_length": 230.24088287353516,
"completions/mean_terminated_length": 230.24088287353516,
"completions/min_length": 128.5,
"completions/min_terminated_length": 128.5,
"epoch": 0.9984,
"num_tokens": 1069964559.0,
"reward": 0.8514951169490814,
"reward_std": 0.09036770090460777,
"rewards/accuracy_reward": 0.549072265625,
"rewards/brier_reward": 0.7759084403514862,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.002963867736980319,
"rewards/frontier_coverage_1": 0.07437552884221077,
"rewards/frontier_coverage_10": 0.07389985024929047,
"rewards/frontier_coverage_15": 0.06876883283257484,
"rewards/frontier_coverage_20": 0.04556947015225887,
"rewards/frontier_coverage_25": 0.03713721036911011,
"rewards/frontier_coverage_5": 0.0743844173848629,
"rewards/frontier_ece_reward": 0.001525860745459795,
"rewards/frontier_entropy_batch_reward": -0.21491432189941406,
"signal/accuracy_reward/centered_abs_mean": 0.0856781005859375,
"signal/accuracy_reward/group_bin_occupancy": 0.1708984375,
"signal/accuracy_reward/group_std_mean": 0.11928322166204453,
"signal/accuracy_reward/group_zero_std_frac": 0.6328125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04283905029296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04283905029296875,
"signal/advantage_abs_mean": 0.0704660713672638,
"signal/advantage_pre_scale_abs_mean": 0.0704660713672638,
"signal/advantage_pre_scale_std": 0.10723469033837318,
"signal/advantage_std": 0.10723469033837318,
"signal/brier_reward/centered_abs_mean": 0.11325568333268166,
"signal/brier_reward/group_bin_occupancy": 0.880859375,
"signal/brier_reward/group_std_mean": 0.1432887762784958,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014156960416585207,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014156960416585207,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002491934224963188,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.736328125,
"signal/frontier_aurc_reward/group_std_mean": 0.0038100657984614372,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.460562558961101e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.460562558961101e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14216963946819305,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8759765625,
"signal/frontier_coverage_1/group_std_mean": 0.1839248612523079,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025448364904150367,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025448364904150367,
"signal/frontier_coverage_10/centered_abs_mean": 0.14118493348360062,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8779296875,
"signal/frontier_coverage_10/group_std_mean": 0.18262220919132233,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002527210279367864,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002527210279367864,
"signal/frontier_coverage_15/centered_abs_mean": 0.13103638216853142,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8720703125,
"signal/frontier_coverage_15/group_std_mean": 0.16926757991313934,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002345551154576242,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002345551154576242,
"signal/frontier_coverage_20/centered_abs_mean": 0.07273482158780098,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8798828125,
"signal/frontier_coverage_20/group_std_mean": 0.09434954449534416,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00130195333622396,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00130195333622396,
"signal/frontier_coverage_25/centered_abs_mean": 0.048997994512319565,
"signal/frontier_coverage_25/group_bin_occupancy": 0.921875,
"signal/frontier_coverage_25/group_std_mean": 0.06378625705838203,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008770640706643462,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008770640706643462,
"signal/frontier_coverage_5/centered_abs_mean": 0.1421535238623619,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8759765625,
"signal/frontier_coverage_5/group_std_mean": 0.18390395492315292,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025445478968322277,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025445478968322277,
"signal/frontier_ece_reward/centered_abs_mean": 0.00375261134468019,
"signal/frontier_ece_reward/group_bin_occupancy": 0.609375,
"signal/frontier_ece_reward/group_std_mean": 0.0048857699148356915,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00046907641808502376,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00046907641808502376,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28836265206336975,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7119140625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3610518276691437,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03604533150792122,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03604533150792122,
"step": 312,
"total_flos": 0.0,
"train_loss": 0.0038806597355384533,
"train_runtime": 60786.4201,
"train_samples_per_second": 0.329,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 312,
"num_input_tokens_seen": 1069964559,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}