{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 50, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.6303003664973054, "calibration/batch_distribution_entropy": 0.6597844034096975, "calibration/batch_entropy_100bins": 0.4873712088023803, "calibration/batch_entropy_10bins": 0.6597844034096975, "calibration/batch_entropy_50bins": 0.5698783479346161, "calibration/batch_uniqueness": 0.7280114042860303, "calibration/confidence_entropy": 0.34552134095532827, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.502710703849331, "calibration/mean_confidence": 0.7903732547788247, "calibration/prompt_uniqueness": 0.5916048209774899, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03564453125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1493.6, "completions/mean_length": 272.20166015625, "completions/mean_terminated_length": 225.48247375488282, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.03546224907040596, "learning_rate": 3.1249999999999997e-07, "loss": 0.069, "num_tokens": 17631377.0, "reward": 0.49061959981918335, "reward_std": 0.39449103474617003, "rewards/accuracy_reward": 0.21728515625, "rewards/brier_reward": 0.3733718991279602, "rewards/format_reward": 0.67998046875, "rewards/frontier_aurc_reward": 0.30710798501968384, "rewards/frontier_coverage_1": 0.30710798501968384, "rewards/frontier_coverage_10": 0.30710798501968384, "rewards/frontier_coverage_15": 0.30710798501968384, "rewards/frontier_coverage_20": 0.30710798501968384, "rewards/frontier_coverage_25": 0.30710798501968384, "rewards/frontier_coverage_5": 0.30710798501968384, "rewards/frontier_ece_reward": 0.30710798501968384, "rewards/frontier_entropy_batch_reward": -0.6524304747581482, "signal/accuracy_reward/centered_abs_mean": 0.238629150390625, "signal/accuracy_reward/group_bin_occupancy": 0.21015625, "signal/accuracy_reward/group_std_mean": 0.28090503215789797, "signal/accuracy_reward/group_zero_std_frac": 0.31875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1193145751953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1193145751953125, "signal/advantage_abs_mean": 0.3349871218204498, "signal/advantage_pre_scale_abs_mean": 0.3349871218204498, "signal/advantage_pre_scale_std": 0.40826708674430845, "signal/advantage_std": 0.40826708674430845, "signal/brier_reward/centered_abs_mean": 0.31911089420318606, "signal/brier_reward/group_bin_occupancy": 0.751171875, "signal/brier_reward/group_std_mean": 0.36441142559051515, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03988886177539826, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.03988886177539826, "signal/format_reward/centered_abs_mean": 0.404473876953125, "signal/format_reward/group_bin_occupancy": 0.25, "signal/format_reward/group_std_mean": 0.4542974352836609, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2022369384765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.2022369384765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.2967922270298004, "signal/frontier_aurc_reward/group_bin_occupancy": 0.662109375, "signal/frontier_aurc_reward/group_std_mean": 0.34727140665054324, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005312580615282059, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005312580615282059, "signal/frontier_coverage_1/centered_abs_mean": 0.2967922270298004, "signal/frontier_coverage_1/group_bin_occupancy": 0.662109375, "signal/frontier_coverage_1/group_std_mean": 0.34727140665054324, "signal/frontier_coverage_1/group_zero_std_frac": 0.003125, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005312580615282059, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005312580615282059, "signal/frontier_coverage_10/centered_abs_mean": 0.2967922270298004, "signal/frontier_coverage_10/group_bin_occupancy": 0.662109375, "signal/frontier_coverage_10/group_std_mean": 0.34727140665054324, "signal/frontier_coverage_10/group_zero_std_frac": 0.003125, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005312580615282059, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005312580615282059, "signal/frontier_coverage_15/centered_abs_mean": 0.2967922270298004, "signal/frontier_coverage_15/group_bin_occupancy": 0.662109375, "signal/frontier_coverage_15/group_std_mean": 0.34727140665054324, "signal/frontier_coverage_15/group_zero_std_frac": 0.003125, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005312580615282059, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005312580615282059, "signal/frontier_coverage_20/centered_abs_mean": 0.2967922270298004, "signal/frontier_coverage_20/group_bin_occupancy": 0.662109375, "signal/frontier_coverage_20/group_std_mean": 0.34727140665054324, "signal/frontier_coverage_20/group_zero_std_frac": 0.003125, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005312580615282059, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005312580615282059, "signal/frontier_coverage_25/centered_abs_mean": 0.2967922270298004, "signal/frontier_coverage_25/group_bin_occupancy": 0.662109375, "signal/frontier_coverage_25/group_std_mean": 0.34727140665054324, "signal/frontier_coverage_25/group_zero_std_frac": 0.003125, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005312580615282059, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005312580615282059, "signal/frontier_coverage_5/centered_abs_mean": 0.2967922270298004, "signal/frontier_coverage_5/group_bin_occupancy": 0.662109375, "signal/frontier_coverage_5/group_std_mean": 0.34727140665054324, "signal/frontier_coverage_5/group_zero_std_frac": 0.003125, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005312580615282059, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005312580615282059, "signal/frontier_ece_reward/centered_abs_mean": 0.2967922270298004, "signal/frontier_ece_reward/group_bin_occupancy": 0.662109375, "signal/frontier_ece_reward/group_std_mean": 0.34727140665054324, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03709902837872505, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03709902837872505, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.42547852396965025, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.30625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.47201172113418577, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.05318481549620628, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.05318481549620628, "step": 5 }, { "calibration/aurc": 0.6822839581909947, "calibration/batch_distribution_entropy": 0.6427950052801196, "calibration/batch_entropy_100bins": 0.47864642318075556, "calibration/batch_entropy_10bins": 0.6427950052801196, "calibration/batch_entropy_50bins": 0.5601287104037039, "calibration/batch_uniqueness": 0.7195723234061977, "calibration/confidence_entropy": 0.3457734196325082, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5269429998220361, "calibration/mean_confidence": 0.7938687058643216, "calibration/prompt_uniqueness": 0.6126342076368461, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03427734375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1488.2, "completions/mean_length": 255.5763671875, "completions/mean_terminated_length": 210.13367309570313, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 0.017373288050293922, "learning_rate": 6.249999999999999e-07, "loss": 0.0672, "num_tokens": 35348831.0, "reward": 0.5113436818122864, "reward_std": 0.37235715985298157, "rewards/accuracy_reward": 0.21552734375, "rewards/brier_reward": 0.38588611483573915, "rewards/format_reward": 0.733984375, "rewards/frontier_aurc_reward": 0.3045971155166626, "rewards/frontier_coverage_1": 0.3045971155166626, "rewards/frontier_coverage_10": 0.3045971155166626, "rewards/frontier_coverage_15": 0.3045971155166626, "rewards/frontier_coverage_20": 0.3045971155166626, "rewards/frontier_coverage_25": 0.3045971155166626, "rewards/frontier_coverage_5": 0.3045971155166626, "rewards/frontier_ece_reward": 0.3045971155166626, "rewards/frontier_entropy_batch_reward": -0.7031086683273315, "signal/accuracy_reward/centered_abs_mean": 0.225225830078125, "signal/accuracy_reward/group_bin_occupancy": 0.209765625, "signal/accuracy_reward/group_std_mean": 0.2710058391094208, "signal/accuracy_reward/group_zero_std_frac": 0.321875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1126129150390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1126129150390625, "signal/advantage_abs_mean": 0.30710036158561704, "signal/advantage_pre_scale_abs_mean": 0.30710036158561704, "signal/advantage_pre_scale_std": 0.3872555077075958, "signal/advantage_std": 0.3872555077075958, "signal/brier_reward/centered_abs_mean": 0.3064376533031464, "signal/brier_reward/group_bin_occupancy": 0.766796875, "signal/brier_reward/group_std_mean": 0.35422558784484864, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0383047066628933, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0383047066628933, "signal/format_reward/centered_abs_mean": 0.36297607421875, "signal/format_reward/group_bin_occupancy": 0.25, "signal/format_reward/group_std_mean": 0.4295056998729706, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.181488037109375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.181488037109375, "signal/frontier_aurc_reward/centered_abs_mean": 0.27946594953536985, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6734375, "signal/frontier_aurc_reward/group_std_mean": 0.33357922434806825, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005002440419048071, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005002440419048071, "signal/frontier_coverage_1/centered_abs_mean": 0.27946594953536985, "signal/frontier_coverage_1/group_bin_occupancy": 0.6734375, "signal/frontier_coverage_1/group_std_mean": 0.33357922434806825, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005002440419048071, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005002440419048071, "signal/frontier_coverage_10/centered_abs_mean": 0.27946594953536985, "signal/frontier_coverage_10/group_bin_occupancy": 0.6734375, "signal/frontier_coverage_10/group_std_mean": 0.33357922434806825, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005002440419048071, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005002440419048071, "signal/frontier_coverage_15/centered_abs_mean": 0.27946594953536985, "signal/frontier_coverage_15/group_bin_occupancy": 0.6734375, "signal/frontier_coverage_15/group_std_mean": 0.33357922434806825, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005002440419048071, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005002440419048071, "signal/frontier_coverage_20/centered_abs_mean": 0.27946594953536985, "signal/frontier_coverage_20/group_bin_occupancy": 0.6734375, "signal/frontier_coverage_20/group_std_mean": 0.33357922434806825, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005002440419048071, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005002440419048071, "signal/frontier_coverage_25/centered_abs_mean": 0.27946594953536985, "signal/frontier_coverage_25/group_bin_occupancy": 0.6734375, "signal/frontier_coverage_25/group_std_mean": 0.33357922434806825, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005002440419048071, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005002440419048071, "signal/frontier_coverage_5/centered_abs_mean": 0.27946594953536985, "signal/frontier_coverage_5/group_bin_occupancy": 0.6734375, "signal/frontier_coverage_5/group_std_mean": 0.33357922434806825, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005002440419048071, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005002440419048071, "signal/frontier_ece_reward/centered_abs_mean": 0.27946594953536985, "signal/frontier_ece_reward/group_bin_occupancy": 0.6734375, "signal/frontier_ece_reward/group_std_mean": 0.33357922434806825, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03493324369192123, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03493324369192123, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.39019296765327455, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.31015625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.45074942111968996, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04877412095665932, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04877412095665932, "step": 10 }, { "calibration/aurc": 0.5998491425462407, "calibration/batch_distribution_entropy": 0.645822968664817, "calibration/batch_entropy_100bins": 0.4777453417155669, "calibration/batch_entropy_10bins": 0.645822968664817, "calibration/batch_entropy_50bins": 0.5601469875575775, "calibration/batch_uniqueness": 0.706415871160267, "calibration/buffer_distribution_entropy": 0.658197276088968, "calibration/buffer_entropy_100bins": 0.49091085619434366, "calibration/buffer_entropy_10bins": 0.658197276088968, "calibration/buffer_entropy_50bins": 0.5740145738778557, "calibration/confidence_entropy": 0.3450713866685877, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.47860442442795315, "calibration/mean_confidence": 0.8054640124402823, "calibration/prompt_uniqueness": 0.6039787026135109, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01513671875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1480.2, "completions/mean_length": 199.25283203125, "completions/mean_terminated_length": 178.8089813232422, "completions/min_length": 13.6, "completions/min_terminated_length": 13.6, "epoch": 0.048, "grad_norm": 0.013462609611451626, "learning_rate": 9.374999999999999e-07, "loss": 0.0458, "num_tokens": 52437916.0, "reward": 0.6119109511375427, "reward_std": 0.2940455138683319, "rewards/accuracy_reward": 0.2783203125, "rewards/brier_reward": 0.492095947265625, "rewards/format_reward": 0.889453125, "rewards/frontier_aurc_reward": 0.2877124358899891, "rewards/frontier_coverage_1": 0.3070936232805252, "rewards/frontier_coverage_10": 0.3070936232805252, "rewards/frontier_coverage_15": 0.3070936232805252, "rewards/frontier_coverage_20": 0.3070936232805252, "rewards/frontier_coverage_25": 0.3070936232805252, "rewards/frontier_coverage_5": 0.3070936232805252, "rewards/frontier_ece_reward": 0.2710249736905098, "rewards/frontier_entropy_batch_reward": -0.8439823746681213, "signal/accuracy_reward/centered_abs_mean": 0.19747314453125, "signal/accuracy_reward/group_bin_occupancy": 0.203515625, "signal/accuracy_reward/group_std_mean": 0.2440613567829132, "signal/accuracy_reward/group_zero_std_frac": 0.371875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.098736572265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.098736572265625, "signal/advantage_abs_mean": 0.2279714286327362, "signal/advantage_pre_scale_abs_mean": 0.2279714286327362, "signal/advantage_pre_scale_std": 0.31036766767501833, "signal/advantage_std": 0.31036766767501833, "signal/brier_reward/centered_abs_mean": 0.272215747833252, "signal/brier_reward/group_bin_occupancy": 0.807421875, "signal/brier_reward/group_std_mean": 0.32673339247703553, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0340269684791565, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0340269684791565, "signal/format_reward/centered_abs_mean": 0.18416748046875, "signal/format_reward/group_bin_occupancy": 0.240625, "signal/format_reward/group_std_mean": 0.28515345454216, "signal/format_reward/group_zero_std_frac": 0.075, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.092083740234375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.092083740234375, "signal/frontier_aurc_reward/centered_abs_mean": 0.20810934910550713, "signal/frontier_aurc_reward/group_bin_occupancy": 0.728125, "signal/frontier_aurc_reward/group_std_mean": 0.24877500906586647, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003725157254666556, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003725157254666556, "signal/frontier_coverage_1/centered_abs_mean": 0.23256531208753586, "signal/frontier_coverage_1/group_bin_occupancy": 0.708984375, "signal/frontier_coverage_1/group_std_mean": 0.28642610311508176, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004162919009104371, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004162919009104371, "signal/frontier_coverage_10/centered_abs_mean": 0.23256531208753586, "signal/frontier_coverage_10/group_bin_occupancy": 0.708984375, "signal/frontier_coverage_10/group_std_mean": 0.28642610311508176, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004162919009104371, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004162919009104371, "signal/frontier_coverage_15/centered_abs_mean": 0.23256531208753586, "signal/frontier_coverage_15/group_bin_occupancy": 0.708984375, "signal/frontier_coverage_15/group_std_mean": 0.28642610311508176, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004162919009104371, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004162919009104371, "signal/frontier_coverage_20/centered_abs_mean": 0.23256531208753586, "signal/frontier_coverage_20/group_bin_occupancy": 0.708984375, "signal/frontier_coverage_20/group_std_mean": 0.28642610311508176, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004162919009104371, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004162919009104371, "signal/frontier_coverage_25/centered_abs_mean": 0.23256531208753586, "signal/frontier_coverage_25/group_bin_occupancy": 0.708984375, "signal/frontier_coverage_25/group_std_mean": 0.28642610311508176, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004162919009104371, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004162919009104371, "signal/frontier_coverage_5/centered_abs_mean": 0.23256531208753586, "signal/frontier_coverage_5/group_bin_occupancy": 0.708984375, "signal/frontier_coverage_5/group_std_mean": 0.28642610311508176, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004162919009104371, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004162919009104371, "signal/frontier_ece_reward/centered_abs_mean": 0.23592609018087388, "signal/frontier_ece_reward/group_bin_occupancy": 0.6890625, "signal/frontier_ece_reward/group_std_mean": 0.2841936469078064, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.029490761272609235, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.029490761272609235, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24894185066223146, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.319921875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35590378642082215, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0375, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031117731332778932, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031117731332778932, "step": 15 }, { "calibration/aurc": 0.5341530098263937, "calibration/batch_distribution_entropy": 0.6953781856593181, "calibration/batch_entropy_100bins": 0.5009968781781217, "calibration/batch_entropy_10bins": 0.6953781856593181, "calibration/batch_entropy_50bins": 0.5862452573775565, "calibration/batch_uniqueness": 0.7366723501935544, "calibration/buffer_distribution_entropy": 0.6570677642554361, "calibration/buffer_entropy_100bins": 0.49046116304887866, "calibration/buffer_entropy_10bins": 0.6570677642554361, "calibration/buffer_entropy_50bins": 0.5733484379340844, "calibration/confidence_entropy": 0.36679942254955256, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.37407708422530783, "calibration/mean_confidence": 0.7776747983528602, "calibration/prompt_uniqueness": 0.6546207061865514, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005078125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1265.4, "completions/mean_length": 143.1140625, "completions/mean_terminated_length": 136.00784912109376, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "epoch": 0.064, "grad_norm": 0.004953299183398485, "learning_rate": 1e-06, "loss": 0.0132, "num_tokens": 68821804.0, "reward": 0.6139556050300599, "reward_std": 0.2074872225522995, "rewards/accuracy_reward": 0.339453125, "rewards/brier_reward": 0.5684828639030457, "rewards/format_reward": 0.97431640625, "rewards/frontier_aurc_reward": -0.006926297210156918, "rewards/frontier_coverage_1": 0.06462682336568833, "rewards/frontier_coverage_10": 0.06462682336568833, "rewards/frontier_coverage_15": 0.06462682336568833, "rewards/frontier_coverage_20": 0.06462682336568833, "rewards/frontier_coverage_25": 0.06462682336568833, "rewards/frontier_coverage_5": 0.06462682336568833, "rewards/frontier_ece_reward": -0.057490382343530655, "rewards/frontier_entropy_batch_reward": -0.9089613318443298, "signal/accuracy_reward/centered_abs_mean": 0.19891357421875, "signal/accuracy_reward/group_bin_occupancy": 0.206640625, "signal/accuracy_reward/group_std_mean": 0.24994130730628966, "signal/accuracy_reward/group_zero_std_frac": 0.346875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.099456787109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.099456787109375, "signal/advantage_abs_mean": 0.1587139695882797, "signal/advantage_pre_scale_abs_mean": 0.1587139695882797, "signal/advantage_pre_scale_std": 0.22378909289836885, "signal/advantage_std": 0.22378909289836885, "signal/brier_reward/centered_abs_mean": 0.24612878561019896, "signal/brier_reward/group_bin_occupancy": 0.83125, "signal/brier_reward/group_std_mean": 0.30340049862861634, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03076609820127487, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.03076609820127487, "signal/format_reward/centered_abs_mean": 0.048358154296875, "signal/format_reward/group_bin_occupancy": 0.19296875, "signal/format_reward/group_std_mean": 0.11387113332748414, "signal/format_reward/group_zero_std_frac": 0.45625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0241790771484375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0241790771484375, "signal/frontier_aurc_reward/centered_abs_mean": 0.005050728749483824, "signal/frontier_aurc_reward/group_bin_occupancy": 0.74140625, "signal/frontier_aurc_reward/group_std_mean": 0.006901584379374981, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.040803997777403e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.040803997777403e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1083696573972702, "signal/frontier_coverage_1/group_bin_occupancy": 0.669921875, "signal/frontier_coverage_1/group_std_mean": 0.17034714818000793, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019398168195039033, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019398168195039033, "signal/frontier_coverage_10/centered_abs_mean": 0.1083696573972702, "signal/frontier_coverage_10/group_bin_occupancy": 0.669921875, "signal/frontier_coverage_10/group_std_mean": 0.17034714818000793, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019398168195039033, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019398168195039033, "signal/frontier_coverage_15/centered_abs_mean": 0.1083696573972702, "signal/frontier_coverage_15/group_bin_occupancy": 0.669921875, "signal/frontier_coverage_15/group_std_mean": 0.17034714818000793, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019398168195039033, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019398168195039033, "signal/frontier_coverage_20/centered_abs_mean": 0.1083696573972702, "signal/frontier_coverage_20/group_bin_occupancy": 0.669921875, "signal/frontier_coverage_20/group_std_mean": 0.17034714818000793, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019398168195039033, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019398168195039033, "signal/frontier_coverage_25/centered_abs_mean": 0.1083696573972702, "signal/frontier_coverage_25/group_bin_occupancy": 0.669921875, "signal/frontier_coverage_25/group_std_mean": 0.17034714818000793, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019398168195039033, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019398168195039033, "signal/frontier_coverage_5/centered_abs_mean": 0.1083696573972702, "signal/frontier_coverage_5/group_bin_occupancy": 0.669921875, "signal/frontier_coverage_5/group_std_mean": 0.17034714818000793, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019398168195039033, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019398168195039033, "signal/frontier_ece_reward/centered_abs_mean": 0.12835136353969573, "signal/frontier_ece_reward/group_bin_occupancy": 0.6828125, "signal/frontier_ece_reward/group_std_mean": 0.1587873101234436, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.016043920442461966, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.016043920442461966, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1584494709968567, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.34140625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.27752745449543, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1125, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019806183874607086, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019806183874607086, "step": 20 }, { "calibration/aurc": 0.6123685825831842, "calibration/batch_distribution_entropy": 0.7943765238910869, "calibration/batch_entropy_100bins": 0.5589816454054507, "calibration/batch_entropy_10bins": 0.7943765238910869, "calibration/batch_entropy_50bins": 0.6484498837772973, "calibration/batch_uniqueness": 0.8013406362144699, "calibration/buffer_distribution_entropy": 0.6846900609897967, "calibration/buffer_entropy_100bins": 0.5049285356703311, "calibration/buffer_entropy_10bins": 0.6846900609897967, "calibration/buffer_entropy_50bins": 0.5890832546755681, "calibration/confidence_entropy": 0.4306898105190983, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.40301083672013965, "calibration/mean_confidence": 0.7135340481523055, "calibration/prompt_uniqueness": 0.7293804507629489, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00224609375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1015.4, "completions/mean_length": 125.69091796875, "completions/mean_terminated_length": 122.5162124633789, "completions/min_length": 38.2, "completions/min_terminated_length": 38.2, "epoch": 0.08, "grad_norm": 0.01374911330640316, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 85042031.0, "reward": 0.6419196009635926, "reward_std": 0.18055281639099122, "rewards/accuracy_reward": 0.35390625, "rewards/brier_reward": 0.6228940486907959, "rewards/format_reward": 0.9931640625, "rewards/frontier_aurc_reward": -0.005947516486048699, "rewards/frontier_coverage_1": 0.07641823142766953, "rewards/frontier_coverage_10": 0.07641823142766953, "rewards/frontier_coverage_15": 0.07641823142766953, "rewards/frontier_coverage_20": 0.07641823142766953, "rewards/frontier_coverage_25": 0.07641823142766953, "rewards/frontier_coverage_5": 0.07641823142766953, "rewards/frontier_ece_reward": -0.0432446762919426, "rewards/frontier_entropy_batch_reward": -0.8973807573318482, "signal/accuracy_reward/centered_abs_mean": 0.18575439453125, "signal/accuracy_reward/group_bin_occupancy": 0.205078125, "signal/accuracy_reward/group_std_mean": 0.23675628304481505, "signal/accuracy_reward/group_zero_std_frac": 0.359375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.092877197265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.092877197265625, "signal/advantage_abs_mean": 0.13976745307445526, "signal/advantage_pre_scale_abs_mean": 0.13976745307445526, "signal/advantage_pre_scale_std": 0.1967965304851532, "signal/advantage_std": 0.1967965304851532, "signal/brier_reward/centered_abs_mean": 0.22795205116271972, "signal/brier_reward/group_bin_occupancy": 0.8578125, "signal/brier_reward/group_std_mean": 0.28223063349723815, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028494006395339964, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.028494006395339964, "signal/format_reward/centered_abs_mean": 0.01312255859375, "signal/format_reward/group_bin_occupancy": 0.148828125, "signal/format_reward/group_std_mean": 0.03558391332626343, "signal/format_reward/group_zero_std_frac": 0.809375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006561279296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006561279296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0038825439289212225, "signal/frontier_aurc_reward/group_bin_occupancy": 0.72265625, "signal/frontier_aurc_reward/group_std_mean": 0.005549946706742049, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.949753442313522e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.949753442313522e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14139218479394913, "signal/frontier_coverage_1/group_bin_occupancy": 0.740625, "signal/frontier_coverage_1/group_std_mean": 0.2093652755022049, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025309198535978794, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025309198535978794, "signal/frontier_coverage_10/centered_abs_mean": 0.14139218479394913, "signal/frontier_coverage_10/group_bin_occupancy": 0.740625, "signal/frontier_coverage_10/group_std_mean": 0.2093652755022049, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025309198535978794, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025309198535978794, "signal/frontier_coverage_15/centered_abs_mean": 0.14139218479394913, "signal/frontier_coverage_15/group_bin_occupancy": 0.740625, "signal/frontier_coverage_15/group_std_mean": 0.2093652755022049, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025309198535978794, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025309198535978794, "signal/frontier_coverage_20/centered_abs_mean": 0.14139218479394913, "signal/frontier_coverage_20/group_bin_occupancy": 0.740625, "signal/frontier_coverage_20/group_std_mean": 0.2093652755022049, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025309198535978794, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025309198535978794, "signal/frontier_coverage_25/centered_abs_mean": 0.14139218479394913, "signal/frontier_coverage_25/group_bin_occupancy": 0.740625, "signal/frontier_coverage_25/group_std_mean": 0.2093652755022049, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025309198535978794, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025309198535978794, "signal/frontier_coverage_5/centered_abs_mean": 0.14139218479394913, "signal/frontier_coverage_5/group_bin_occupancy": 0.740625, "signal/frontier_coverage_5/group_std_mean": 0.2093652755022049, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025309198535978794, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025309198535978794, "signal/frontier_ece_reward/centered_abs_mean": 0.1143453910946846, "signal/frontier_ece_reward/group_bin_occupancy": 0.75546875, "signal/frontier_ece_reward/group_std_mean": 0.1409228652715683, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.014293173886835575, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.014293173886835575, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1770955890417099, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.37734375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3114177048206329, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.05, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02213694863021374, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02213694863021374, "step": 25 }, { "calibration/aurc": 0.6187899429468231, "calibration/batch_distribution_entropy": 0.903264681119443, "calibration/batch_entropy_100bins": 0.6551592063501281, "calibration/batch_entropy_10bins": 0.903264681119443, "calibration/batch_entropy_50bins": 0.7520692317813698, "calibration/batch_uniqueness": 0.8607905341900063, "calibration/buffer_distribution_entropy": 0.7327899214780806, "calibration/buffer_entropy_100bins": 0.5348155537313921, "calibration/buffer_entropy_10bins": 0.7327899214780806, "calibration/buffer_entropy_50bins": 0.6214034080313888, "calibration/confidence_entropy": 0.4955790028408361, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.03137254901960784, "calibration/coverage@5%": 0.0, "calibration/ece": 0.29642104394376234, "calibration/mean_confidence": 0.585974183555353, "calibration/prompt_uniqueness": 0.7915110704181842, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 1536.0, "completions/max_terminated_length": 762.6, "completions/mean_length": 124.0724609375, "completions/mean_terminated_length": 122.8305679321289, "completions/min_length": 37.2, "completions/min_terminated_length": 37.2, "epoch": 0.096, "grad_norm": 0.004074463155120611, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 101357141.0, "reward": 0.6705445170402526, "reward_std": 0.16784824728965758, "rewards/accuracy_reward": 0.36572265625, "rewards/brier_reward": 0.6802791714668274, "rewards/format_reward": 0.9966796875, "rewards/frontier_aurc_reward": -0.005298045370727778, "rewards/frontier_coverage_1": 0.10815906524658203, "rewards/frontier_coverage_10": 0.10815906524658203, "rewards/frontier_coverage_15": 0.10815906524658203, "rewards/frontier_coverage_20": 0.10815906524658203, "rewards/frontier_coverage_25": 0.10815906524658203, "rewards/frontier_coverage_5": 0.10815906524658203, "rewards/frontier_ece_reward": -0.02196125448681414, "rewards/frontier_entropy_batch_reward": -0.8357429265975952, "signal/accuracy_reward/centered_abs_mean": 0.182061767578125, "signal/accuracy_reward/group_bin_occupancy": 0.204296875, "signal/accuracy_reward/group_std_mean": 0.23303503692150115, "signal/accuracy_reward/group_zero_std_frac": 0.365625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0910308837890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0910308837890625, "signal/advantage_abs_mean": 0.13158320784568786, "signal/advantage_pre_scale_abs_mean": 0.13158320784568786, "signal/advantage_pre_scale_std": 0.18012692034244537, "signal/advantage_std": 0.18012692034244537, "signal/brier_reward/centered_abs_mean": 0.22275688350200654, "signal/brier_reward/group_bin_occupancy": 0.885546875, "signal/brier_reward/group_std_mean": 0.27452688217163085, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027844610437750817, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.027844610437750817, "signal/format_reward/centered_abs_mean": 0.00640869140625, "signal/format_reward/group_bin_occupancy": 0.1375, "signal/format_reward/group_std_mean": 0.018109906651079654, "signal/format_reward/group_zero_std_frac": 0.9, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003204345703125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003204345703125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029932684265077114, "signal/frontier_aurc_reward/group_bin_occupancy": 0.705078125, "signal/frontier_aurc_reward/group_std_mean": 0.004513154737651348, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.357950285542756e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.357950285542756e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.207365944981575, "signal/frontier_coverage_1/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_1/group_std_mean": 0.27984519600868224, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037118502892553805, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037118502892553805, "signal/frontier_coverage_10/centered_abs_mean": 0.207365944981575, "signal/frontier_coverage_10/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_10/group_std_mean": 0.27984519600868224, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037118502892553805, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037118502892553805, "signal/frontier_coverage_15/centered_abs_mean": 0.207365944981575, "signal/frontier_coverage_15/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_15/group_std_mean": 0.27984519600868224, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037118502892553805, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037118502892553805, "signal/frontier_coverage_20/centered_abs_mean": 0.207365944981575, "signal/frontier_coverage_20/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_20/group_std_mean": 0.27984519600868224, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037118502892553805, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037118502892553805, "signal/frontier_coverage_25/centered_abs_mean": 0.207365944981575, "signal/frontier_coverage_25/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_25/group_std_mean": 0.27984519600868224, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037118502892553805, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037118502892553805, "signal/frontier_coverage_5/centered_abs_mean": 0.207365944981575, "signal/frontier_coverage_5/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_5/group_std_mean": 0.27984519600868224, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037118502892553805, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037118502892553805, "signal/frontier_ece_reward/centered_abs_mean": 0.10284390598535538, "signal/frontier_ece_reward/group_bin_occupancy": 0.791796875, "signal/frontier_ece_reward/group_std_mean": 0.12563495337963104, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.012855488248169422, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.012855488248169422, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2653192490339279, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.4625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40386409759521485, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.021875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03316490612924099, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03316490612924099, "step": 30 }, { "calibration/aurc": 0.46203742605027587, "calibration/batch_distribution_entropy": 0.9505007499638772, "calibration/batch_entropy_100bins": 0.8425976231283391, "calibration/batch_entropy_10bins": 0.9505007499638772, "calibration/batch_entropy_50bins": 0.8942506391359825, "calibration/batch_uniqueness": 0.9259175330557399, "calibration/buffer_distribution_entropy": 0.8045500051104121, "calibration/buffer_entropy_100bins": 0.5969997504126264, "calibration/buffer_entropy_10bins": 0.8045500051104121, "calibration/buffer_entropy_50bins": 0.6849823069476149, "calibration/confidence_entropy": 0.49676340907411404, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.01875, "calibration/coverage@20%": 0.026953125, "calibration/coverage@25%": 0.057421875, "calibration/coverage@30%": 0.059375, "calibration/coverage@5%": 0.0, "calibration/ece": 0.1438197834007442, "calibration/mean_confidence": 0.4231594765776811, "calibration/prompt_uniqueness": 0.8550597826970604, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 1096.2, "completions/max_terminated_length": 672.8, "completions/mean_length": 126.68193359375, "completions/mean_terminated_length": 125.71805114746094, "completions/min_length": 37.6, "completions/min_terminated_length": 37.6, "epoch": 0.112, "grad_norm": 0.00284270360134542, "learning_rate": 1e-06, "loss": 0.0018, "num_tokens": 117763836.0, "reward": 0.7385032176971436, "reward_std": 0.1466519594192505, "rewards/accuracy_reward": 0.39775390625, "rewards/brier_reward": 0.7357542634010314, "rewards/format_reward": 0.99775390625, "rewards/frontier_aurc_reward": -0.004414942674338817, "rewards/frontier_coverage_1": 0.14376911520957947, "rewards/frontier_coverage_10": 0.14376911520957947, "rewards/frontier_coverage_15": 0.14376911520957947, "rewards/frontier_coverage_20": 0.14376911520957947, "rewards/frontier_coverage_25": 0.14376911520957947, "rewards/frontier_coverage_5": 0.14376911520957947, "rewards/frontier_ece_reward": 0.004067628551274538, "rewards/frontier_entropy_batch_reward": -0.5367215931415558, "signal/accuracy_reward/centered_abs_mean": 0.178826904296875, "signal/accuracy_reward/group_bin_occupancy": 0.203125, "signal/accuracy_reward/group_std_mean": 0.22884972691535949, "signal/accuracy_reward/group_zero_std_frac": 0.375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0894134521484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0894134521484375, "signal/advantage_abs_mean": 0.11495690047740936, "signal/advantage_pre_scale_abs_mean": 0.11495690047740936, "signal/advantage_pre_scale_std": 0.15609990060329437, "signal/advantage_std": 0.15609990060329437, "signal/brier_reward/centered_abs_mean": 0.19914465844631196, "signal/brier_reward/group_bin_occupancy": 0.871484375, "signal/brier_reward/group_std_mean": 0.25038520991802216, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024893082305788995, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.024893082305788995, "signal/format_reward/centered_abs_mean": 0.004351806640625, "signal/format_reward/group_bin_occupancy": 0.133984375, "signal/format_reward/group_std_mean": 0.012705824710428715, "signal/format_reward/group_zero_std_frac": 0.928125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0021759033203125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0021759033203125, "signal/frontier_aurc_reward/centered_abs_mean": 0.00161303433123976, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7171875, "signal/frontier_aurc_reward/group_std_mean": 0.002583282254636288, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8873312840005382e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8873312840005382e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.28870871663093567, "signal/frontier_coverage_1/group_bin_occupancy": 0.931640625, "signal/frontier_coverage_1/group_std_mean": 0.36129642128944395, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00516788586974144, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00516788586974144, "signal/frontier_coverage_10/centered_abs_mean": 0.28870871663093567, "signal/frontier_coverage_10/group_bin_occupancy": 0.931640625, "signal/frontier_coverage_10/group_std_mean": 0.36129642128944395, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00516788586974144, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00516788586974144, "signal/frontier_coverage_15/centered_abs_mean": 0.28870871663093567, "signal/frontier_coverage_15/group_bin_occupancy": 0.931640625, "signal/frontier_coverage_15/group_std_mean": 0.36129642128944395, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00516788586974144, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00516788586974144, "signal/frontier_coverage_20/centered_abs_mean": 0.28870871663093567, "signal/frontier_coverage_20/group_bin_occupancy": 0.931640625, "signal/frontier_coverage_20/group_std_mean": 0.36129642128944395, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00516788586974144, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00516788586974144, "signal/frontier_coverage_25/centered_abs_mean": 0.28870871663093567, "signal/frontier_coverage_25/group_bin_occupancy": 0.931640625, "signal/frontier_coverage_25/group_std_mean": 0.36129642128944395, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00516788586974144, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00516788586974144, "signal/frontier_coverage_5/centered_abs_mean": 0.28870871663093567, "signal/frontier_coverage_5/group_bin_occupancy": 0.931640625, "signal/frontier_coverage_5/group_std_mean": 0.36129642128944395, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00516788586974144, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00516788586974144, "signal/frontier_ece_reward/centered_abs_mean": 0.06561752930283546, "signal/frontier_ece_reward/group_bin_occupancy": 0.724609375, "signal/frontier_ece_reward/group_std_mean": 0.08880508691072464, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008202191162854432, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008202191162854432, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.455366712808609, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.688671875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5278913855552674, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.05692083910107613, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.05692083910107613, "step": 35 }, { "calibration/aurc": 0.5524540681481819, "calibration/batch_distribution_entropy": 0.8926553482625657, "calibration/batch_entropy_100bins": 0.9168440833945379, "calibration/batch_entropy_10bins": 0.8926553482625657, "calibration/batch_entropy_50bins": 0.920951322294291, "calibration/batch_uniqueness": 0.9371887328581348, "calibration/buffer_distribution_entropy": 0.8752040807438071, "calibration/buffer_entropy_100bins": 0.6906934782719981, "calibration/buffer_entropy_10bins": 0.8752040807438071, "calibration/buffer_entropy_50bins": 0.7684556478443987, "calibration/confidence_entropy": 0.47675208932497054, "calibration/coverage@0%": 0.003919266336671655, "calibration/coverage@1%": 0.003919266336671655, "calibration/coverage@10%": 0.006664364375887341, "calibration/coverage@15%": 0.007840834964122636, "calibration/coverage@20%": 0.008232991826867733, "calibration/coverage@25%": 0.018820459690725606, "calibration/coverage@30%": 0.027445608380338438, "calibration/coverage@5%": 0.003919266336671655, "calibration/ece": 0.1811672126323914, "calibration/mean_confidence": 0.3110680805757154, "calibration/prompt_uniqueness": 0.8780275469888137, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 1536.0, "completions/max_terminated_length": 799.4, "completions/mean_length": 121.43369140625, "completions/mean_terminated_length": 119.91219024658203, "completions/min_length": 40.6, "completions/min_terminated_length": 40.6, "epoch": 0.128, "grad_norm": 0.0024108977522701025, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 133923989.0, "reward": 0.7399052381515503, "reward_std": 0.12542397379875184, "rewards/accuracy_reward": 0.3775390625, "rewards/brier_reward": 0.7327269196510315, "rewards/format_reward": 0.996875, "rewards/frontier_aurc_reward": -0.004320676997303962, "rewards/frontier_coverage_1": 0.1661964625120163, "rewards/frontier_coverage_10": 0.1661964625120163, "rewards/frontier_coverage_15": 0.1661964625120163, "rewards/frontier_coverage_20": 0.1661964625120163, "rewards/frontier_coverage_25": 0.1661964625120163, "rewards/frontier_coverage_5": 0.1661964625120163, "rewards/frontier_ece_reward": 0.00360437398776412, "rewards/frontier_entropy_batch_reward": -0.4569231688976288, "signal/accuracy_reward/centered_abs_mean": 0.1685546875, "signal/accuracy_reward/group_bin_occupancy": 0.1984375, "signal/accuracy_reward/group_std_mean": 0.21551733016967772, "signal/accuracy_reward/group_zero_std_frac": 0.4125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08427734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08427734375, "signal/advantage_abs_mean": 0.09791394621133805, "signal/advantage_pre_scale_abs_mean": 0.09791394621133805, "signal/advantage_pre_scale_std": 0.13817036151885986, "signal/advantage_std": 0.13817036151885986, "signal/brier_reward/centered_abs_mean": 0.1911756455898285, "signal/brier_reward/group_bin_occupancy": 0.863671875, "signal/brier_reward/group_std_mean": 0.24225885570049285, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023896955698728562, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.023896955698728562, "signal/format_reward/centered_abs_mean": 0.0059814453125, "signal/format_reward/group_bin_occupancy": 0.135546875, "signal/format_reward/group_std_mean": 0.015936914831399918, "signal/format_reward/group_zero_std_frac": 0.915625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00299072265625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00299072265625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012180484831333161, "signal/frontier_aurc_reward/group_bin_occupancy": 0.71640625, "signal/frontier_aurc_reward/group_std_mean": 0.001990600279532373, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.180306655645836e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.180306655645836e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.3146185576915741, "signal/frontier_coverage_1/group_bin_occupancy": 0.925, "signal/frontier_coverage_1/group_std_mean": 0.3889928042888641, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005631672125309706, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005631672125309706, "signal/frontier_coverage_10/centered_abs_mean": 0.3146185576915741, "signal/frontier_coverage_10/group_bin_occupancy": 0.925, "signal/frontier_coverage_10/group_std_mean": 0.3889928042888641, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005631672125309706, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005631672125309706, "signal/frontier_coverage_15/centered_abs_mean": 0.3146185576915741, "signal/frontier_coverage_15/group_bin_occupancy": 0.925, "signal/frontier_coverage_15/group_std_mean": 0.3889928042888641, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005631672125309706, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005631672125309706, "signal/frontier_coverage_20/centered_abs_mean": 0.3146185576915741, "signal/frontier_coverage_20/group_bin_occupancy": 0.925, "signal/frontier_coverage_20/group_std_mean": 0.3889928042888641, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005631672125309706, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005631672125309706, "signal/frontier_coverage_25/centered_abs_mean": 0.3146185576915741, "signal/frontier_coverage_25/group_bin_occupancy": 0.925, "signal/frontier_coverage_25/group_std_mean": 0.3889928042888641, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005631672125309706, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005631672125309706, "signal/frontier_coverage_5/centered_abs_mean": 0.3146185576915741, "signal/frontier_coverage_5/group_bin_occupancy": 0.925, "signal/frontier_coverage_5/group_std_mean": 0.3889928042888641, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005631672125309706, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005631672125309706, "signal/frontier_ece_reward/centered_abs_mean": 0.0448478564620018, "signal/frontier_ece_reward/group_bin_occupancy": 0.6765625, "signal/frontier_ece_reward/group_std_mean": 0.06723327487707138, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005605982057750225, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005605982057750225, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.44531151056289675, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5023255228996277, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.055663938820362094, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.055663938820362094, "step": 40 }, { "calibration/aurc": 0.3908736488625225, "calibration/batch_distribution_entropy": 0.952870268039504, "calibration/batch_entropy_100bins": 0.9511110787504482, "calibration/batch_entropy_10bins": 0.952870268039504, "calibration/batch_entropy_50bins": 0.9596622843607211, "calibration/batch_uniqueness": 0.9485456533751936, "calibration/buffer_distribution_entropy": 0.9173490790989061, "calibration/buffer_entropy_100bins": 0.7607477388436942, "calibration/buffer_entropy_10bins": 0.9173490790989061, "calibration/buffer_entropy_50bins": 0.8259944052519161, "calibration/confidence_entropy": 0.5360820790331252, "calibration/coverage@0%": 0.000390625, "calibration/coverage@1%": 0.000390625, "calibration/coverage@10%": 0.025145045432220035, "calibration/coverage@15%": 0.07660240667976424, "calibration/coverage@20%": 0.12021733791748526, "calibration/coverage@25%": 0.19523871778573904, "calibration/coverage@30%": 0.23445159313725492, "calibration/coverage@5%": 0.000390625, "calibration/ece": 0.21712174109019186, "calibration/mean_confidence": 0.4122595098549266, "calibration/prompt_uniqueness": 0.8860017234651405, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 1536.0, "completions/max_terminated_length": 854.2, "completions/mean_length": 137.2712890625, "completions/mean_terminated_length": 135.76856079101563, "completions/min_length": 45.2, "completions/min_terminated_length": 45.2, "epoch": 0.144, "grad_norm": 0.0018301416421309114, "learning_rate": 1e-06, "loss": 0.0035, "num_tokens": 150280079.0, "reward": 0.8004841685295105, "reward_std": 0.1398836553096771, "rewards/accuracy_reward": 0.48779296875, "rewards/brier_reward": 0.7173955202102661, "rewards/format_reward": 0.99775390625, "rewards/frontier_aurc_reward": -0.003834694530814886, "rewards/frontier_coverage_1": 0.0504607018083334, "rewards/frontier_coverage_10": 0.0504607018083334, "rewards/frontier_coverage_15": 0.0504607018083334, "rewards/frontier_coverage_20": 0.0504607018083334, "rewards/frontier_coverage_25": 0.0504607018083334, "rewards/frontier_coverage_5": 0.0504607018083334, "rewards/frontier_ece_reward": 0.009193889400921761, "rewards/frontier_entropy_batch_reward": -0.30771028995513916, "signal/accuracy_reward/centered_abs_mean": 0.172930908203125, "signal/accuracy_reward/group_bin_occupancy": 0.20703125, "signal/accuracy_reward/group_std_mean": 0.22917729318141938, "signal/accuracy_reward/group_zero_std_frac": 0.34375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0864654541015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0864654541015625, "signal/advantage_abs_mean": 0.1103538304567337, "signal/advantage_pre_scale_abs_mean": 0.1103538304567337, "signal/advantage_pre_scale_std": 0.14888681769371032, "signal/advantage_std": 0.14888681769371032, "signal/brier_reward/centered_abs_mean": 0.19215757250785828, "signal/brier_reward/group_bin_occupancy": 0.915234375, "signal/brier_reward/group_std_mean": 0.24102371633052827, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024019696563482285, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.024019696563482285, "signal/format_reward/centered_abs_mean": 0.004351806640625, "signal/format_reward/group_bin_occupancy": 0.133984375, "signal/format_reward/group_std_mean": 0.012705824431031942, "signal/format_reward/group_zero_std_frac": 0.928125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0021759033203125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0021759033203125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016577748814597727, "signal/frontier_aurc_reward/group_bin_occupancy": 0.787109375, "signal/frontier_aurc_reward/group_std_mean": 0.002490155445411801, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9674168763449417e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9674168763449417e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2725175261497498, "signal/frontier_coverage_1/group_bin_occupancy": 0.9375, "signal/frontier_coverage_1/group_std_mean": 0.3421742796897888, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004878063499927521, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004878063499927521, "signal/frontier_coverage_10/centered_abs_mean": 0.2725175261497498, "signal/frontier_coverage_10/group_bin_occupancy": 0.9375, "signal/frontier_coverage_10/group_std_mean": 0.3421742796897888, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004878063499927521, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004878063499927521, "signal/frontier_coverage_15/centered_abs_mean": 0.2725175261497498, "signal/frontier_coverage_15/group_bin_occupancy": 0.9375, "signal/frontier_coverage_15/group_std_mean": 0.3421742796897888, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004878063499927521, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004878063499927521, "signal/frontier_coverage_20/centered_abs_mean": 0.2725175261497498, "signal/frontier_coverage_20/group_bin_occupancy": 0.9375, "signal/frontier_coverage_20/group_std_mean": 0.3421742796897888, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004878063499927521, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004878063499927521, "signal/frontier_coverage_25/centered_abs_mean": 0.2725175261497498, "signal/frontier_coverage_25/group_bin_occupancy": 0.9375, "signal/frontier_coverage_25/group_std_mean": 0.3421742796897888, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004878063499927521, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004878063499927521, "signal/frontier_coverage_5/centered_abs_mean": 0.2725175261497498, "signal/frontier_coverage_5/group_bin_occupancy": 0.9375, "signal/frontier_coverage_5/group_std_mean": 0.3421742796897888, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004878063499927521, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004878063499927521, "signal/frontier_ece_reward/centered_abs_mean": 0.05783376470208168, "signal/frontier_ece_reward/group_bin_occupancy": 0.755078125, "signal/frontier_ece_reward/group_std_mean": 0.07939041703939438, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00722922058776021, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00722922058776021, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.37824747562408445, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.766015625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.44698449969291687, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.047280934453010556, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.047280934453010556, "step": 45 }, { "calibration/aurc": 0.44512130583689513, "calibration/batch_distribution_entropy": 0.9777809946831368, "calibration/batch_entropy_100bins": 0.9621848460602968, "calibration/batch_entropy_10bins": 0.9777809946831368, "calibration/batch_entropy_50bins": 0.9734038075969862, "calibration/batch_uniqueness": 0.9527174185369095, "calibration/buffer_distribution_entropy": 0.9389729288749795, "calibration/buffer_entropy_100bins": 0.8098340055908284, "calibration/buffer_entropy_10bins": 0.9389729288749795, "calibration/buffer_entropy_50bins": 0.8648510922973776, "calibration/confidence_entropy": 0.5341879274317181, "calibration/coverage@0%": 0.0007827788649706457, "calibration/coverage@1%": 0.0007827788649706457, "calibration/coverage@10%": 0.0007827788649706457, "calibration/coverage@15%": 0.0007827788649706457, "calibration/coverage@20%": 0.0136986301369863, "calibration/coverage@25%": 0.026996697651663404, "calibration/coverage@30%": 0.06065236668297456, "calibration/coverage@5%": 0.0007827788649706457, "calibration/ece": 0.12611413936588028, "calibration/mean_confidence": 0.5199908538561552, "calibration/prompt_uniqueness": 0.8914967420818158, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1256.2, "completions/max_terminated_length": 726.6, "completions/mean_length": 151.9931640625, "completions/mean_terminated_length": 151.31725463867187, "completions/min_length": 54.6, "completions/min_terminated_length": 54.6, "epoch": 0.16, "grad_norm": 0.002915080636739731, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 166857417.0, "reward": 0.7953056693077087, "reward_std": 0.1445574551820755, "rewards/accuracy_reward": 0.444921875, "rewards/brier_reward": 0.7283108472824097, "rewards/format_reward": 0.99892578125, "rewards/frontier_aurc_reward": -0.004147487320005893, "rewards/frontier_coverage_1": 0.08064137399196625, "rewards/frontier_coverage_10": 0.08064137399196625, "rewards/frontier_coverage_15": 0.08064137399196625, "rewards/frontier_coverage_20": 0.08064137399196625, "rewards/frontier_coverage_25": 0.08064137399196625, "rewards/frontier_coverage_5": 0.08064137399196625, "rewards/frontier_ece_reward": 0.009373257122933864, "rewards/frontier_entropy_batch_reward": -0.2193224996328354, "signal/accuracy_reward/centered_abs_mean": 0.1618408203125, "signal/accuracy_reward/group_bin_occupancy": 0.1984375, "signal/accuracy_reward/group_std_mean": 0.20846819281578063, "signal/accuracy_reward/group_zero_std_frac": 0.4125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08092041015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08092041015625, "signal/advantage_abs_mean": 0.1161547839641571, "signal/advantage_pre_scale_abs_mean": 0.1161547839641571, "signal/advantage_pre_scale_std": 0.15687225759029388, "signal/advantage_std": 0.15687225759029388, "signal/brier_reward/centered_abs_mean": 0.19412323236465454, "signal/brier_reward/group_bin_occupancy": 0.912109375, "signal/brier_reward/group_std_mean": 0.24180429577827453, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024265404045581817, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.024265404045581817, "signal/format_reward/centered_abs_mean": 0.002081298828125, "signal/format_reward/group_bin_occupancy": 0.129296875, "signal/format_reward/group_std_mean": 0.006076698750257492, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010406494140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010406494140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002387148514389992, "signal/frontier_aurc_reward/group_bin_occupancy": 0.79453125, "signal/frontier_aurc_reward/group_std_mean": 0.0034649877808988093, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2729955748654905e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2729955748654905e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22005172967910766, "signal/frontier_coverage_1/group_bin_occupancy": 0.925390625, "signal/frontier_coverage_1/group_std_mean": 0.28260610103607176, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003938925778493285, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003938925778493285, "signal/frontier_coverage_10/centered_abs_mean": 0.22005172967910766, "signal/frontier_coverage_10/group_bin_occupancy": 0.925390625, "signal/frontier_coverage_10/group_std_mean": 0.28260610103607176, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003938925778493285, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003938925778493285, "signal/frontier_coverage_15/centered_abs_mean": 0.22005172967910766, "signal/frontier_coverage_15/group_bin_occupancy": 0.925390625, "signal/frontier_coverage_15/group_std_mean": 0.28260610103607176, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003938925778493285, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003938925778493285, "signal/frontier_coverage_20/centered_abs_mean": 0.22005172967910766, "signal/frontier_coverage_20/group_bin_occupancy": 0.925390625, "signal/frontier_coverage_20/group_std_mean": 0.28260610103607176, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003938925778493285, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003938925778493285, "signal/frontier_coverage_25/centered_abs_mean": 0.22005172967910766, "signal/frontier_coverage_25/group_bin_occupancy": 0.925390625, "signal/frontier_coverage_25/group_std_mean": 0.28260610103607176, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003938925778493285, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003938925778493285, "signal/frontier_coverage_5/centered_abs_mean": 0.22005172967910766, "signal/frontier_coverage_5/group_bin_occupancy": 0.925390625, "signal/frontier_coverage_5/group_std_mean": 0.28260610103607176, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003938925778493285, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003938925778493285, "signal/frontier_ece_reward/centered_abs_mean": 0.07324738055467606, "signal/frontier_ece_reward/group_bin_occupancy": 0.7953125, "signal/frontier_ece_reward/group_std_mean": 0.09497761726379395, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009155922569334507, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009155922569334507, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.310670405626297, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.757421875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3890035688877106, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.038833800703287125, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038833800703287125, "step": 50 }, { "epoch": 0.16, "eval_calibration/aurc": 0.64462876440125, "eval_calibration/batch_distribution_entropy": 0.9172303138520053, "eval_calibration/batch_entropy_100bins": 0.6999477876337099, "eval_calibration/batch_entropy_10bins": 0.9172303138520053, "eval_calibration/batch_entropy_50bins": 0.7876103883743395, "eval_calibration/batch_uniqueness": 0.892578125, "eval_calibration/buffer_distribution_entropy": 0.9464992351760915, "eval_calibration/buffer_entropy_100bins": 0.8332314601674196, "eval_calibration/buffer_entropy_10bins": 0.9464992351760915, "eval_calibration/buffer_entropy_50bins": 0.882575922510453, "eval_calibration/confidence_entropy": 0.5330085179766757, "eval_calibration/coverage@0%": 0.0078125, "eval_calibration/coverage@1%": 0.0078125, "eval_calibration/coverage@10%": 0.0078125, "eval_calibration/coverage@15%": 0.0078125, "eval_calibration/coverage@20%": 0.0078125, "eval_calibration/coverage@25%": 0.0078125, "eval_calibration/coverage@30%": 0.0078125, "eval_calibration/coverage@5%": 0.0078125, "eval_calibration/ece": 0.3476830269533962, "eval_calibration/mean_confidence": 0.5751843204816087, "eval_calibration/prompt_uniqueness": 0.892578125, "eval_completions/clipped_ratio": 0.002155172413793094, "eval_completions/max_length": 638.5, "eval_completions/max_terminated_length": 338.25, "eval_completions/mean_length": 167.50525283813477, "eval_completions/mean_terminated_length": 164.54486846923828, "eval_completions/min_length": 69.5, "eval_completions/min_terminated_length": 69.5, "eval_loss": 0.0, "eval_num_tokens": 166857417.0, "eval_reward": 0.6432019472122192, "eval_reward_std": 0.24113430455327034, "eval_rewards/accuracy_reward": 0.345703125, "eval_rewards/brier_reward": 0.6860938370227814, "eval_rewards/format_reward": 0.998046875, "eval_rewards/frontier_aurc_reward": -0.00537203811109066, "eval_rewards/frontier_coverage_1": 0.11216729879379272, "eval_rewards/frontier_coverage_10": 0.11216729879379272, "eval_rewards/frontier_coverage_15": 0.11216729879379272, "eval_rewards/frontier_coverage_20": 0.11216729879379272, "eval_rewards/frontier_coverage_25": 0.11216729879379272, "eval_rewards/frontier_coverage_5": 0.11216729879379272, "eval_rewards/frontier_ece_reward": -0.01303649484179914, "eval_rewards/frontier_entropy_batch_reward": -0.998046875, "eval_runtime": 27.8074, "eval_samples_per_second": 17.981, "eval_signal/accuracy_reward/centered_abs_mean": 0.4376220703125, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4738186076283455, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21881103515625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21881103515625, "eval_signal/advantage_abs_mean": 0.21178173646330833, "eval_signal/advantage_pre_scale_abs_mean": 0.21178173646330833, "eval_signal/advantage_pre_scale_std": 0.23889374360442162, "eval_signal/advantage_std": 0.23889374360442162, "eval_signal/brier_reward/centered_abs_mean": 0.23696352913975716, "eval_signal/brier_reward/group_bin_occupancy": 0.9375, "eval_signal/brier_reward/group_std_mean": 0.28708796203136444, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029620441142469645, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.029620441142469645, "eval_signal/format_reward/centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/group_bin_occupancy": 0.1328125, "eval_signal/format_reward/group_std_mean": 0.011048543266952038, "eval_signal/format_reward/group_zero_std_frac": 0.9375, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003871684370096773, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.859375, "eval_signal/frontier_aurc_reward/group_std_mean": 0.005378421046771109, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.930314702913165e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.930314702913165e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.26474981755018234, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_1/group_std_mean": 0.3581102788448334, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004739021649584174, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004739021649584174, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.26474981755018234, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_10/group_std_mean": 0.3581102788448334, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004739021649584174, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004739021649584174, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.26474981755018234, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_15/group_std_mean": 0.3581102788448334, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004739021649584174, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004739021649584174, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.26474981755018234, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_20/group_std_mean": 0.3581102788448334, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004739021649584174, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004739021649584174, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.26474981755018234, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_25/group_std_mean": 0.3581102788448334, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004739021649584174, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004739021649584174, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.26474981755018234, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_5/group_std_mean": 0.3581102788448334, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004739021649584174, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004739021649584174, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.08822193928062916, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8515625, "eval_signal/frontier_ece_reward/group_std_mean": 0.12208670750260353, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.011027742410078645, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.011027742410078645, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0037841796875, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.1328125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.011048543266952038, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9375, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "eval_signal/frontier_entropy_batch_reward/weight": 0.125, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0004730224609375, "eval_steps_per_second": 0.144, "step": 50 }, { "calibration/aurc": 0.4433609500645401, "calibration/batch_distribution_entropy": 0.9786514493906904, "calibration/batch_entropy_100bins": 0.9673268063104924, "calibration/batch_entropy_10bins": 0.9786514493906904, "calibration/batch_entropy_50bins": 0.9765609238602323, "calibration/batch_uniqueness": 0.9542412727001033, "calibration/buffer_distribution_entropy": 0.9496313104637549, "calibration/buffer_entropy_100bins": 0.8458478577532829, "calibration/buffer_entropy_10bins": 0.9496313104637549, "calibration/buffer_entropy_50bins": 0.8919244765075639, "calibration/confidence_entropy": 0.5027312367502492, "calibration/coverage@0%": 0.00078125, "calibration/coverage@1%": 0.00078125, "calibration/coverage@10%": 0.00078125, "calibration/coverage@15%": 0.00078125, "calibration/coverage@20%": 0.00078125, "calibration/coverage@25%": 0.07697150735294118, "calibration/coverage@30%": 0.10665900735294118, "calibration/coverage@5%": 0.00078125, "calibration/ece": 0.18526691978044169, "calibration/mean_confidence": 0.5750017950436274, "calibration/prompt_uniqueness": 0.889430057394641, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 1196.4, "completions/max_terminated_length": 714.2, "completions/mean_length": 172.89833984375, "completions/mean_terminated_length": 171.70108032226562, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.176, "grad_norm": 0.0018691306468099356, "learning_rate": 1e-06, "loss": 0.002, "num_tokens": 183865016.0, "reward": 0.8038440227508545, "reward_std": 0.14429736733436585, "rewards/accuracy_reward": 0.4546875, "rewards/brier_reward": 0.7278488039970398, "rewards/format_reward": 0.99814453125, "rewards/frontier_aurc_reward": -0.004136397829279303, "rewards/frontier_coverage_1": 0.08787006139755249, "rewards/frontier_coverage_10": 0.08787006139755249, "rewards/frontier_coverage_15": 0.08787006139755249, "rewards/frontier_coverage_20": 0.08787006139755249, "rewards/frontier_coverage_25": 0.08787006139755249, "rewards/frontier_coverage_5": 0.08787006139755249, "rewards/frontier_ece_reward": 0.013153896108269692, "rewards/frontier_entropy_batch_reward": -0.19648434817790986, "signal/accuracy_reward/centered_abs_mean": 0.15616455078125, "signal/accuracy_reward/group_bin_occupancy": 0.196875, "signal/accuracy_reward/group_std_mean": 0.20412348508834838, "signal/accuracy_reward/group_zero_std_frac": 0.425, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.078082275390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.078082275390625, "signal/advantage_abs_mean": 0.11150152832269669, "signal/advantage_pre_scale_abs_mean": 0.11150152832269669, "signal/advantage_pre_scale_std": 0.15599793791770936, "signal/advantage_std": 0.15599793791770936, "signal/brier_reward/centered_abs_mean": 0.1965700715780258, "signal/brier_reward/group_bin_occupancy": 0.896875, "signal/brier_reward/group_std_mean": 0.24470431506633758, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024571258947253226, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.024571258947253226, "signal/format_reward/centered_abs_mean": 0.003594970703125, "signal/format_reward/group_bin_occupancy": 0.132421875, "signal/format_reward/group_std_mean": 0.010496115870773792, "signal/format_reward/group_zero_std_frac": 0.940625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0017974853515625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0017974853515625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002715344587340951, "signal/frontier_aurc_reward/group_bin_occupancy": 0.78515625, "signal/frontier_aurc_reward/group_std_mean": 0.003948622290045023, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8604665062157436e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8604665062157436e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2161078006029129, "signal/frontier_coverage_1/group_bin_occupancy": 0.902734375, "signal/frontier_coverage_1/group_std_mean": 0.2793154060840607, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038683294784277677, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038683294784277677, "signal/frontier_coverage_10/centered_abs_mean": 0.2161078006029129, "signal/frontier_coverage_10/group_bin_occupancy": 0.902734375, "signal/frontier_coverage_10/group_std_mean": 0.2793154060840607, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038683294784277677, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038683294784277677, "signal/frontier_coverage_15/centered_abs_mean": 0.2161078006029129, "signal/frontier_coverage_15/group_bin_occupancy": 0.902734375, "signal/frontier_coverage_15/group_std_mean": 0.2793154060840607, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038683294784277677, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038683294784277677, "signal/frontier_coverage_20/centered_abs_mean": 0.2161078006029129, "signal/frontier_coverage_20/group_bin_occupancy": 0.902734375, "signal/frontier_coverage_20/group_std_mean": 0.2793154060840607, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038683294784277677, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038683294784277677, "signal/frontier_coverage_25/centered_abs_mean": 0.2161078006029129, "signal/frontier_coverage_25/group_bin_occupancy": 0.902734375, "signal/frontier_coverage_25/group_std_mean": 0.2793154060840607, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038683294784277677, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038683294784277677, "signal/frontier_coverage_5/centered_abs_mean": 0.2161078006029129, "signal/frontier_coverage_5/group_bin_occupancy": 0.902734375, "signal/frontier_coverage_5/group_std_mean": 0.2793154060840607, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038683294784277677, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038683294784277677, "signal/frontier_ece_reward/centered_abs_mean": 0.07303946614265441, "signal/frontier_ece_reward/group_bin_occupancy": 0.757421875, "signal/frontier_ece_reward/group_std_mean": 0.0940830409526825, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009129933267831802, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009129933267831802, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2854499340057373, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7484375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3648853302001953, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035681241750717164, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035681241750717164, "step": 55 }, { "calibration/aurc": 0.3634893904079156, "calibration/batch_distribution_entropy": 0.979375848885876, "calibration/batch_entropy_100bins": 0.9688071121704119, "calibration/batch_entropy_10bins": 0.979375848885876, "calibration/batch_entropy_50bins": 0.9775960671362709, "calibration/batch_uniqueness": 0.9546913035214375, "calibration/buffer_distribution_entropy": 0.9549641064676517, "calibration/buffer_entropy_100bins": 0.870291774493005, "calibration/buffer_entropy_10bins": 0.9549641064676517, "calibration/buffer_entropy_50bins": 0.9096286521366295, "calibration/confidence_entropy": 0.47387102535633635, "calibration/coverage@0%": 0.003125, "calibration/coverage@1%": 0.003125, "calibration/coverage@10%": 0.005078125, "calibration/coverage@15%": 0.008203125, "calibration/coverage@20%": 0.032421875, "calibration/coverage@25%": 0.06640930772994129, "calibration/coverage@30%": 0.33067056017612523, "calibration/coverage@5%": 0.003125, "calibration/ece": 0.12527432286914647, "calibration/mean_confidence": 0.5426209280769687, "calibration/prompt_uniqueness": 0.8840015994894641, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 1356.0, "completions/max_terminated_length": 532.8, "completions/mean_length": 186.98125, "completions/mean_terminated_length": 185.92561950683594, "completions/min_length": 73.6, "completions/min_terminated_length": 73.6, "epoch": 0.192, "grad_norm": 0.0012723851250484586, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 200594520.0, "reward": 0.8214977860450745, "reward_std": 0.13184674382209777, "rewards/accuracy_reward": 0.47822265625, "rewards/brier_reward": 0.7468044757843018, "rewards/format_reward": 0.998828125, "rewards/frontier_aurc_reward": -0.00359484669752419, "rewards/frontier_coverage_1": 0.1034425899386406, "rewards/frontier_coverage_10": 0.1034425899386406, "rewards/frontier_coverage_15": 0.1034425899386406, "rewards/frontier_coverage_20": 0.1034425899386406, "rewards/frontier_coverage_25": 0.1034425899386406, "rewards/frontier_coverage_5": 0.1034425899386406, "rewards/frontier_ece_reward": 0.022514346055686474, "rewards/frontier_entropy_batch_reward": -0.193902850151062, "signal/accuracy_reward/centered_abs_mean": 0.144403076171875, "signal/accuracy_reward/group_bin_occupancy": 0.19140625, "signal/accuracy_reward/group_std_mean": 0.1877404749393463, "signal/accuracy_reward/group_zero_std_frac": 0.46875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0722015380859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0722015380859375, "signal/advantage_abs_mean": 0.10249822586774826, "signal/advantage_pre_scale_abs_mean": 0.10249822586774826, "signal/advantage_pre_scale_std": 0.14616102278232573, "signal/advantage_std": 0.14616102278232573, "signal/brier_reward/centered_abs_mean": 0.1951207399368286, "signal/brier_reward/group_bin_occupancy": 0.887109375, "signal/brier_reward/group_std_mean": 0.24402922093868257, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024390092492103575, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.024390092492103575, "signal/format_reward/centered_abs_mean": 0.00225830078125, "signal/format_reward/group_bin_occupancy": 0.129296875, "signal/format_reward/group_std_mean": 0.0062928175088018175, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001129150390625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001129150390625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002580239251255989, "signal/frontier_aurc_reward/group_bin_occupancy": 0.78125, "signal/frontier_aurc_reward/group_std_mean": 0.003768660081550479, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.618627863237634e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.618627863237634e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22825982868671418, "signal/frontier_coverage_1/group_bin_occupancy": 0.8921875, "signal/frontier_coverage_1/group_std_mean": 0.2953463554382324, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004085850715637207, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004085850715637207, "signal/frontier_coverage_10/centered_abs_mean": 0.22825982868671418, "signal/frontier_coverage_10/group_bin_occupancy": 0.8921875, "signal/frontier_coverage_10/group_std_mean": 0.2953463554382324, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004085850715637207, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004085850715637207, "signal/frontier_coverage_15/centered_abs_mean": 0.22825982868671418, "signal/frontier_coverage_15/group_bin_occupancy": 0.8921875, "signal/frontier_coverage_15/group_std_mean": 0.2953463554382324, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004085850715637207, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004085850715637207, "signal/frontier_coverage_20/centered_abs_mean": 0.22825982868671418, "signal/frontier_coverage_20/group_bin_occupancy": 0.8921875, "signal/frontier_coverage_20/group_std_mean": 0.2953463554382324, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004085850715637207, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004085850715637207, "signal/frontier_coverage_25/centered_abs_mean": 0.22825982868671418, "signal/frontier_coverage_25/group_bin_occupancy": 0.8921875, "signal/frontier_coverage_25/group_std_mean": 0.2953463554382324, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004085850715637207, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004085850715637207, "signal/frontier_coverage_5/centered_abs_mean": 0.22825982868671418, "signal/frontier_coverage_5/group_bin_occupancy": 0.8921875, "signal/frontier_coverage_5/group_std_mean": 0.2953463554382324, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004085850715637207, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004085850715637207, "signal/frontier_ece_reward/centered_abs_mean": 0.06816109567880631, "signal/frontier_ece_reward/group_bin_occupancy": 0.735546875, "signal/frontier_ece_reward/group_std_mean": 0.08766969740390777, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008520136959850788, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008520136959850788, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28305876851081846, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3631249308586121, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03538234606385231, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03538234606385231, "step": 60 }, { "calibration/aurc": 0.2962042663019113, "calibration/batch_distribution_entropy": 0.9784319986138323, "calibration/batch_entropy_100bins": 0.966199879916589, "calibration/batch_entropy_10bins": 0.9784319986138323, "calibration/batch_entropy_50bins": 0.9744431551149368, "calibration/batch_uniqueness": 0.9526550054551084, "calibration/buffer_distribution_entropy": 0.9601725002568273, "calibration/buffer_entropy_100bins": 0.888484851018059, "calibration/buffer_entropy_10bins": 0.9601725002568273, "calibration/buffer_entropy_50bins": 0.9226145962963195, "calibration/confidence_entropy": 0.47267274675605747, "calibration/coverage@0%": 0.010947437622309198, "calibration/coverage@1%": 0.010947437622309198, "calibration/coverage@10%": 0.12052042563600782, "calibration/coverage@15%": 0.2706022199119374, "calibration/coverage@20%": 0.3628806873776908, "calibration/coverage@25%": 0.4660821306262231, "calibration/coverage@30%": 0.552443126223092, "calibration/coverage@5%": 0.026603014921722113, "calibration/ece": 0.17455930370135442, "calibration/mean_confidence": 0.48808305500723426, "calibration/prompt_uniqueness": 0.8768258975026015, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1138.4, "completions/max_terminated_length": 574.2, "completions/mean_length": 199.851171875, "completions/mean_terminated_length": 199.32947998046876, "completions/min_length": 84.2, "completions/min_terminated_length": 84.2, "epoch": 0.208, "grad_norm": 0.0013960555661469698, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 217673220.0, "reward": 0.8450250387191772, "reward_std": 0.12305467575788498, "rewards/accuracy_reward": 0.524609375, "rewards/brier_reward": 0.7498408198356629, "rewards/format_reward": 0.99912109375, "rewards/frontier_aurc_reward": -0.003026116266846657, "rewards/frontier_coverage_1": 0.07876687720417977, "rewards/frontier_coverage_10": 0.07876687720417977, "rewards/frontier_coverage_15": 0.07876687720417977, "rewards/frontier_coverage_20": 0.07876687720417977, "rewards/frontier_coverage_25": 0.07876687720417977, "rewards/frontier_coverage_5": 0.07876687720417977, "rewards/frontier_ece_reward": 0.025368864834308624, "rewards/frontier_entropy_batch_reward": -0.17717448472976685, "signal/accuracy_reward/centered_abs_mean": 0.13885498046875, "signal/accuracy_reward/group_bin_occupancy": 0.1890625, "signal/accuracy_reward/group_std_mean": 0.18212647438049318, "signal/accuracy_reward/group_zero_std_frac": 0.4875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.069427490234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.069427490234375, "signal/advantage_abs_mean": 0.09637551605701447, "signal/advantage_pre_scale_abs_mean": 0.09637551605701447, "signal/advantage_pre_scale_std": 0.1368851602077484, "signal/advantage_std": 0.1368851602077484, "signal/brier_reward/centered_abs_mean": 0.19150430560112, "signal/brier_reward/group_bin_occupancy": 0.884765625, "signal/brier_reward/group_std_mean": 0.23957839012145996, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02393803820014, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02393803820014, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_bin_occupancy": 0.128515625, "signal/format_reward/group_std_mean": 0.004971844423562288, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021365312393754722, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7796875, "signal/frontier_aurc_reward/group_std_mean": 0.003165799472481012, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.824390878435224e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.824390878435224e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2470179468393326, "signal/frontier_coverage_1/group_bin_occupancy": 0.89609375, "signal/frontier_coverage_1/group_std_mean": 0.31373026967048645, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004421621095389128, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004421621095389128, "signal/frontier_coverage_10/centered_abs_mean": 0.2470179468393326, "signal/frontier_coverage_10/group_bin_occupancy": 0.89609375, "signal/frontier_coverage_10/group_std_mean": 0.31373026967048645, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004421621095389128, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004421621095389128, "signal/frontier_coverage_15/centered_abs_mean": 0.2470179468393326, "signal/frontier_coverage_15/group_bin_occupancy": 0.89609375, "signal/frontier_coverage_15/group_std_mean": 0.31373026967048645, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004421621095389128, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004421621095389128, "signal/frontier_coverage_20/centered_abs_mean": 0.2470179468393326, "signal/frontier_coverage_20/group_bin_occupancy": 0.89609375, "signal/frontier_coverage_20/group_std_mean": 0.31373026967048645, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004421621095389128, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004421621095389128, "signal/frontier_coverage_25/centered_abs_mean": 0.2470179468393326, "signal/frontier_coverage_25/group_bin_occupancy": 0.89609375, "signal/frontier_coverage_25/group_std_mean": 0.31373026967048645, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004421621095389128, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004421621095389128, "signal/frontier_coverage_5/centered_abs_mean": 0.2470179468393326, "signal/frontier_coverage_5/group_bin_occupancy": 0.89609375, "signal/frontier_coverage_5/group_std_mean": 0.31373026967048645, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004421621095389128, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004421621095389128, "signal/frontier_ece_reward/centered_abs_mean": 0.057305699586868285, "signal/frontier_ece_reward/group_bin_occupancy": 0.706640625, "signal/frontier_ece_reward/group_std_mean": 0.0751513734459877, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007163212448358536, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007163212448358536, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2632014513015747, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.744140625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3395733177661896, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032900181412696836, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032900181412696836, "step": 65 }, { "calibration/aurc": 0.34124345710586457, "calibration/batch_distribution_entropy": 0.9802328569497316, "calibration/batch_entropy_100bins": 0.9677623361060274, "calibration/batch_entropy_10bins": 0.9802328569497316, "calibration/batch_entropy_50bins": 0.9763784251673779, "calibration/batch_uniqueness": 0.9531273263345412, "calibration/buffer_distribution_entropy": 0.9665878977970547, "calibration/buffer_entropy_100bins": 0.903459766372482, "calibration/buffer_entropy_10bins": 0.9665878977970547, "calibration/buffer_entropy_50bins": 0.9337556437523503, "calibration/confidence_entropy": 0.46980353579934553, "calibration/coverage@0%": 0.0015663251602010667, "calibration/coverage@1%": 0.0015663251602010667, "calibration/coverage@10%": 0.06016007516020107, "calibration/coverage@15%": 0.13458331234891216, "calibration/coverage@20%": 0.25736270193008715, "calibration/coverage@25%": 0.37164490881969225, "calibration/coverage@30%": 0.48739008959748287, "calibration/coverage@5%": 0.0015663251602010667, "calibration/ece": 0.17054890157844843, "calibration/mean_confidence": 0.449686680982038, "calibration/prompt_uniqueness": 0.8756768868691467, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 1536.0, "completions/max_terminated_length": 729.8, "completions/mean_length": 209.3384765625, "completions/mean_terminated_length": 208.0431701660156, "completions/min_length": 86.8, "completions/min_terminated_length": 86.8, "epoch": 0.224, "grad_norm": 0.0011869962327182293, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 234970030.0, "reward": 0.8263303160667419, "reward_std": 0.11979700475931168, "rewards/accuracy_reward": 0.4796875, "rewards/brier_reward": 0.7604617238044739, "rewards/format_reward": 0.99853515625, "rewards/frontier_aurc_reward": -0.0031618389301002027, "rewards/frontier_coverage_1": 0.1220865547657013, "rewards/frontier_coverage_10": 0.1220865547657013, "rewards/frontier_coverage_15": 0.1220865547657013, "rewards/frontier_coverage_20": 0.1220865547657013, "rewards/frontier_coverage_25": 0.1220865547657013, "rewards/frontier_coverage_5": 0.1220865547657013, "rewards/frontier_ece_reward": 0.022188258543610572, "rewards/frontier_entropy_batch_reward": -0.1893421322107315, "signal/accuracy_reward/centered_abs_mean": 0.1328857421875, "signal/accuracy_reward/group_bin_occupancy": 0.187890625, "signal/accuracy_reward/group_std_mean": 0.17681021988391876, "signal/accuracy_reward/group_zero_std_frac": 0.496875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06644287109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06644287109375, "signal/advantage_abs_mean": 0.09216942489147187, "signal/advantage_pre_scale_abs_mean": 0.09216942489147187, "signal/advantage_pre_scale_std": 0.13417203724384308, "signal/advantage_std": 0.13417203724384308, "signal/brier_reward/centered_abs_mean": 0.18361150324344636, "signal/brier_reward/group_bin_occupancy": 0.86953125, "signal/brier_reward/group_std_mean": 0.23139148950576782, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022951437905430794, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.022951437905430794, "signal/format_reward/centered_abs_mean": 0.002838134765625, "signal/format_reward/group_bin_occupancy": 0.130859375, "signal/format_reward/group_std_mean": 0.008286407357081771, "signal/format_reward/group_zero_std_frac": 0.953125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0014190673828125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0014190673828125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021024826914072038, "signal/frontier_aurc_reward/group_bin_occupancy": 0.768359375, "signal/frontier_aurc_reward/group_std_mean": 0.003143219882622361, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.763444037758745e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.763444037758745e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2386508047580719, "signal/frontier_coverage_1/group_bin_occupancy": 0.895703125, "signal/frontier_coverage_1/group_std_mean": 0.3026322960853577, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00427184933796525, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00427184933796525, "signal/frontier_coverage_10/centered_abs_mean": 0.2386508047580719, "signal/frontier_coverage_10/group_bin_occupancy": 0.895703125, "signal/frontier_coverage_10/group_std_mean": 0.3026322960853577, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00427184933796525, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00427184933796525, "signal/frontier_coverage_15/centered_abs_mean": 0.2386508047580719, "signal/frontier_coverage_15/group_bin_occupancy": 0.895703125, "signal/frontier_coverage_15/group_std_mean": 0.3026322960853577, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00427184933796525, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00427184933796525, "signal/frontier_coverage_20/centered_abs_mean": 0.2386508047580719, "signal/frontier_coverage_20/group_bin_occupancy": 0.895703125, "signal/frontier_coverage_20/group_std_mean": 0.3026322960853577, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00427184933796525, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00427184933796525, "signal/frontier_coverage_25/centered_abs_mean": 0.2386508047580719, "signal/frontier_coverage_25/group_bin_occupancy": 0.895703125, "signal/frontier_coverage_25/group_std_mean": 0.3026322960853577, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00427184933796525, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00427184933796525, "signal/frontier_coverage_5/centered_abs_mean": 0.2386508047580719, "signal/frontier_coverage_5/group_bin_occupancy": 0.895703125, "signal/frontier_coverage_5/group_std_mean": 0.3026322960853577, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00427184933796525, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00427184933796525, "signal/frontier_ece_reward/centered_abs_mean": 0.05160396620631218, "signal/frontier_ece_reward/group_bin_occupancy": 0.694140625, "signal/frontier_ece_reward/group_std_mean": 0.0683397501707077, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006450495775789022, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006450495775789022, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2719322979450226, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.746484375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3513746976852417, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033991537243127826, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033991537243127826, "step": 70 }, { "calibration/aurc": 0.3860749528714358, "calibration/batch_distribution_entropy": 0.9809446834041381, "calibration/batch_entropy_100bins": 0.9698001713708365, "calibration/batch_entropy_10bins": 0.9809446834041381, "calibration/batch_entropy_50bins": 0.9782092600318476, "calibration/batch_uniqueness": 0.9534891725132038, "calibration/buffer_distribution_entropy": 0.9715373687156422, "calibration/buffer_entropy_100bins": 0.9155967161839724, "calibration/buffer_entropy_10bins": 0.9715373687156422, "calibration/buffer_entropy_50bins": 0.942549278644391, "calibration/confidence_entropy": 0.49962341310661385, "calibration/coverage@0%": 0.0023452788649706456, "calibration/coverage@1%": 0.0023452788649706456, "calibration/coverage@10%": 0.08906402886497064, "calibration/coverage@15%": 0.15312652886497063, "calibration/coverage@20%": 0.20859527886497062, "calibration/coverage@25%": 0.22343902886497063, "calibration/coverage@30%": 0.32201412671232876, "calibration/coverage@5%": 0.010157778864970646, "calibration/ece": 0.17762837176592952, "calibration/mean_confidence": 0.4960856168122013, "calibration/prompt_uniqueness": 0.8812828738781218, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 1358.0, "completions/max_terminated_length": 703.6, "completions/mean_length": 217.19853515625, "completions/mean_terminated_length": 216.16774291992186, "completions/min_length": 96.8, "completions/min_terminated_length": 96.8, "epoch": 0.24, "grad_norm": 0.0012256160844117403, "learning_rate": 1e-06, "loss": 0.0022, "num_tokens": 252445823.0, "reward": 0.8490519523620605, "reward_std": 0.1284138709306717, "rewards/accuracy_reward": 0.5345703125, "rewards/brier_reward": 0.7593809008598328, "rewards/format_reward": 0.9990234375, "rewards/frontier_aurc_reward": -0.0028817789163440464, "rewards/frontier_coverage_1": 0.07307546683587134, "rewards/frontier_coverage_10": 0.07307546683587134, "rewards/frontier_coverage_15": 0.07307546683587134, "rewards/frontier_coverage_20": 0.07307546683587134, "rewards/frontier_coverage_25": 0.07307546683587134, "rewards/frontier_coverage_5": 0.07307546683587134, "rewards/frontier_ece_reward": 0.023964449018239974, "rewards/frontier_entropy_batch_reward": -0.18767853379249572, "signal/accuracy_reward/centered_abs_mean": 0.14945068359375, "signal/accuracy_reward/group_bin_occupancy": 0.1953125, "signal/accuracy_reward/group_std_mean": 0.19708451330661775, "signal/accuracy_reward/group_zero_std_frac": 0.4375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.074725341796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.074725341796875, "signal/advantage_abs_mean": 0.10107299536466599, "signal/advantage_pre_scale_abs_mean": 0.10107299536466599, "signal/advantage_pre_scale_std": 0.14294905364513397, "signal/advantage_std": 0.14294905364513397, "signal/brier_reward/centered_abs_mean": 0.1782033383846283, "signal/brier_reward/group_bin_occupancy": 0.876171875, "signal/brier_reward/group_std_mean": 0.22389057874679566, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022275417298078536, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.022275417298078536, "signal/format_reward/centered_abs_mean": 0.0018798828125, "signal/format_reward/group_bin_occupancy": 0.128515625, "signal/format_reward/group_std_mean": 0.0051879632286727425, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00093994140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00093994140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002186433505266905, "signal/frontier_aurc_reward/group_bin_occupancy": 0.79296875, "signal/frontier_aurc_reward/group_std_mean": 0.00316581423394382, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9137157000368464e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9137157000368464e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22060461044311525, "signal/frontier_coverage_1/group_bin_occupancy": 0.896484375, "signal/frontier_coverage_1/group_std_mean": 0.2838852107524872, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003948822524398566, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003948822524398566, "signal/frontier_coverage_10/centered_abs_mean": 0.22060461044311525, "signal/frontier_coverage_10/group_bin_occupancy": 0.896484375, "signal/frontier_coverage_10/group_std_mean": 0.2838852107524872, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003948822524398566, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003948822524398566, "signal/frontier_coverage_15/centered_abs_mean": 0.22060461044311525, "signal/frontier_coverage_15/group_bin_occupancy": 0.896484375, "signal/frontier_coverage_15/group_std_mean": 0.2838852107524872, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003948822524398566, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003948822524398566, "signal/frontier_coverage_20/centered_abs_mean": 0.22060461044311525, "signal/frontier_coverage_20/group_bin_occupancy": 0.896484375, "signal/frontier_coverage_20/group_std_mean": 0.2838852107524872, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003948822524398566, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003948822524398566, "signal/frontier_coverage_25/centered_abs_mean": 0.22060461044311525, "signal/frontier_coverage_25/group_bin_occupancy": 0.896484375, "signal/frontier_coverage_25/group_std_mean": 0.2838852107524872, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003948822524398566, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003948822524398566, "signal/frontier_coverage_5/centered_abs_mean": 0.22060461044311525, "signal/frontier_coverage_5/group_bin_occupancy": 0.896484375, "signal/frontier_coverage_5/group_std_mean": 0.2838852107524872, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003948822524398566, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003948822524398566, "signal/frontier_ece_reward/centered_abs_mean": 0.050569846481084826, "signal/frontier_ece_reward/group_bin_occupancy": 0.68359375, "signal/frontier_ece_reward/group_std_mean": 0.06721205115318299, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006321230810135603, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006321230810135603, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2772687911987305, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.748046875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3573911190032959, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03465859889984131, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03465859889984131, "step": 75 }, { "calibration/aurc": 0.3027865730158209, "calibration/batch_distribution_entropy": 0.9824120975656007, "calibration/batch_entropy_100bins": 0.9675954439907024, "calibration/batch_entropy_10bins": 0.9824120975656007, "calibration/batch_entropy_50bins": 0.9768153214786937, "calibration/batch_uniqueness": 0.9534858712115677, "calibration/buffer_distribution_entropy": 0.9745029867503969, "calibration/buffer_entropy_100bins": 0.9254029563560598, "calibration/buffer_entropy_10bins": 0.9745029867503969, "calibration/buffer_entropy_50bins": 0.9494087491638338, "calibration/confidence_entropy": 0.47803596521726666, "calibration/coverage@0%": 0.015629586594911937, "calibration/coverage@1%": 0.015629586594911937, "calibration/coverage@10%": 0.14150486179060665, "calibration/coverage@15%": 0.28024859344422703, "calibration/coverage@20%": 0.3670330846379647, "calibration/coverage@25%": 0.4843245474559687, "calibration/coverage@30%": 0.5863885151663405, "calibration/coverage@5%": 0.03712695694716243, "calibration/ece": 0.1304594987473791, "calibration/mean_confidence": 0.5039093234803077, "calibration/prompt_uniqueness": 0.8684847225383715, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 1332.2, "completions/max_terminated_length": 770.2, "completions/mean_length": 211.239453125, "completions/mean_terminated_length": 210.33331604003905, "completions/min_length": 97.4, "completions/min_terminated_length": 97.4, "epoch": 0.256, "grad_norm": 0.001191093702800572, "learning_rate": 1e-06, "loss": 0.0018, "num_tokens": 269663731.0, "reward": 0.8402328372001648, "reward_std": 0.11854993999004364, "rewards/accuracy_reward": 0.50908203125, "rewards/brier_reward": 0.7678213238716125, "rewards/format_reward": 0.9990234375, "rewards/frontier_aurc_reward": -0.002967528020963073, "rewards/frontier_coverage_1": 0.10275040753185749, "rewards/frontier_coverage_10": 0.10275040753185749, "rewards/frontier_coverage_15": 0.10275040753185749, "rewards/frontier_coverage_20": 0.10275040753185749, "rewards/frontier_coverage_25": 0.10275040753185749, "rewards/frontier_coverage_5": 0.10275040753185749, "rewards/frontier_ece_reward": 0.024521516263484956, "rewards/frontier_entropy_batch_reward": -0.19076020121574402, "signal/accuracy_reward/centered_abs_mean": 0.136175537109375, "signal/accuracy_reward/group_bin_occupancy": 0.18671875, "signal/accuracy_reward/group_std_mean": 0.17651076018810272, "signal/accuracy_reward/group_zero_std_frac": 0.50625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0680877685546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0680877685546875, "signal/advantage_abs_mean": 0.0925136923789978, "signal/advantage_pre_scale_abs_mean": 0.0925136923789978, "signal/advantage_pre_scale_std": 0.13497910499572754, "signal/advantage_std": 0.13497910499572754, "signal/brier_reward/centered_abs_mean": 0.16773454546928407, "signal/brier_reward/group_bin_occupancy": 0.859375, "signal/brier_reward/group_std_mean": 0.21297150552272798, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02096681818366051, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02096681818366051, "signal/format_reward/centered_abs_mean": 0.00189208984375, "signal/format_reward/group_bin_occupancy": 0.12890625, "signal/format_reward/group_std_mean": 0.005524271540343762, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000946044921875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002278644498437643, "signal/frontier_aurc_reward/group_bin_occupancy": 0.781640625, "signal/frontier_aurc_reward/group_std_mean": 0.0033373693004250526, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.078773708897643e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.078773708897643e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2098309278488159, "signal/frontier_coverage_1/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_1/group_std_mean": 0.26889588236808776, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037559733726084234, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037559733726084234, "signal/frontier_coverage_10/centered_abs_mean": 0.2098309278488159, "signal/frontier_coverage_10/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_10/group_std_mean": 0.26889588236808776, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037559733726084234, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037559733726084234, "signal/frontier_coverage_15/centered_abs_mean": 0.2098309278488159, "signal/frontier_coverage_15/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_15/group_std_mean": 0.26889588236808776, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037559733726084234, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037559733726084234, "signal/frontier_coverage_20/centered_abs_mean": 0.2098309278488159, "signal/frontier_coverage_20/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_20/group_std_mean": 0.26889588236808776, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037559733726084234, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037559733726084234, "signal/frontier_coverage_25/centered_abs_mean": 0.2098309278488159, "signal/frontier_coverage_25/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_25/group_std_mean": 0.26889588236808776, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037559733726084234, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037559733726084234, "signal/frontier_coverage_5/centered_abs_mean": 0.2098309278488159, "signal/frontier_coverage_5/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_5/group_std_mean": 0.26889588236808776, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037559733726084234, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037559733726084234, "signal/frontier_ece_reward/centered_abs_mean": 0.04780852794647217, "signal/frontier_ece_reward/group_bin_occupancy": 0.66171875, "signal/frontier_ece_reward/group_std_mean": 0.06307629272341728, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005976065993309021, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005976065993309021, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.271970134973526, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.755859375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3501406848430634, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03399626687169075, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03399626687169075, "step": 80 }, { "calibration/aurc": 0.367258270235761, "calibration/batch_distribution_entropy": 0.9895644463823899, "calibration/batch_entropy_100bins": 0.971964415223642, "calibration/batch_entropy_10bins": 0.9895644463823899, "calibration/batch_entropy_50bins": 0.9832667329307185, "calibration/batch_uniqueness": 0.9549774753110185, "calibration/buffer_distribution_entropy": 0.9773499248067615, "calibration/buffer_entropy_100bins": 0.9333172418219796, "calibration/buffer_entropy_10bins": 0.9773499248067615, "calibration/buffer_entropy_50bins": 0.9548425574225243, "calibration/confidence_entropy": 0.49009995581254717, "calibration/coverage@0%": 0.003126528864970646, "calibration/coverage@1%": 0.003126528864970646, "calibration/coverage@10%": 0.11253975048923678, "calibration/coverage@15%": 0.144580938111546, "calibration/coverage@20%": 0.20554748654598826, "calibration/coverage@25%": 0.2856638331702544, "calibration/coverage@30%": 0.3716640166340509, "calibration/coverage@5%": 0.06562652886497064, "calibration/ece": 0.14631884515051413, "calibration/mean_confidence": 0.498405006869299, "calibration/prompt_uniqueness": 0.8753438292631373, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1144.6, "completions/max_terminated_length": 571.2, "completions/mean_length": 214.196875, "completions/mean_terminated_length": 213.6815643310547, "completions/min_length": 91.2, "completions/min_terminated_length": 91.2, "epoch": 0.272, "grad_norm": 0.0008510042098350823, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 286822803.0, "reward": 0.8385721206665039, "reward_std": 0.11488556414842606, "rewards/accuracy_reward": 0.49951171875, "rewards/brier_reward": 0.7645434498786926, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.0029727344401180742, "rewards/frontier_coverage_1": 0.10659078769385814, "rewards/frontier_coverage_10": 0.10659078769385814, "rewards/frontier_coverage_15": 0.10659078769385814, "rewards/frontier_coverage_20": 0.10659078769385814, "rewards/frontier_coverage_25": 0.10659078769385814, "rewards/frontier_coverage_5": 0.10659078769385814, "rewards/frontier_ece_reward": 0.021371402591466904, "rewards/frontier_entropy_batch_reward": -0.16341689825057984, "signal/accuracy_reward/centered_abs_mean": 0.124920654296875, "signal/accuracy_reward/group_bin_occupancy": 0.184375, "signal/accuracy_reward/group_std_mean": 0.16582859456539153, "signal/accuracy_reward/group_zero_std_frac": 0.525, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0624603271484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0624603271484375, "signal/advantage_abs_mean": 0.08883939385414123, "signal/advantage_pre_scale_abs_mean": 0.08883939385414123, "signal/advantage_pre_scale_std": 0.13017976582050322, "signal/advantage_std": 0.13017976582050322, "signal/brier_reward/centered_abs_mean": 0.1681692123413086, "signal/brier_reward/group_bin_occupancy": 0.858984375, "signal/brier_reward/group_std_mean": 0.2134801924228668, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021021151542663576, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021021151542663576, "signal/format_reward/centered_abs_mean": 0.001513671875, "signal/format_reward/group_bin_occupancy": 0.128125, "signal/format_reward/group_std_mean": 0.004419417306780815, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002199468924663961, "signal/frontier_aurc_reward/group_bin_occupancy": 0.768359375, "signal/frontier_aurc_reward/group_std_mean": 0.0032290446572005747, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9370492595480754e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9370492595480754e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.21055666208267212, "signal/frontier_coverage_1/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_1/group_std_mean": 0.2709640562534332, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037689640186727045, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037689640186727045, "signal/frontier_coverage_10/centered_abs_mean": 0.21055666208267212, "signal/frontier_coverage_10/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_10/group_std_mean": 0.2709640562534332, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037689640186727045, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037689640186727045, "signal/frontier_coverage_15/centered_abs_mean": 0.21055666208267212, "signal/frontier_coverage_15/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_15/group_std_mean": 0.2709640562534332, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037689640186727045, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037689640186727045, "signal/frontier_coverage_20/centered_abs_mean": 0.21055666208267212, "signal/frontier_coverage_20/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_20/group_std_mean": 0.2709640562534332, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037689640186727045, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037689640186727045, "signal/frontier_coverage_25/centered_abs_mean": 0.21055666208267212, "signal/frontier_coverage_25/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_25/group_std_mean": 0.2709640562534332, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037689640186727045, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037689640186727045, "signal/frontier_coverage_5/centered_abs_mean": 0.21055666208267212, "signal/frontier_coverage_5/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_5/group_std_mean": 0.2709640562534332, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037689640186727045, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037689640186727045, "signal/frontier_ece_reward/centered_abs_mean": 0.044014541804790495, "signal/frontier_ece_reward/group_bin_occupancy": 0.654296875, "signal/frontier_ece_reward/group_std_mean": 0.058340293169021604, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005501817725598812, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005501817725598812, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2418614625930786, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3228378236293793, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030232682824134827, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030232682824134827, "step": 85 }, { "calibration/aurc": 0.3467141361919698, "calibration/batch_distribution_entropy": 0.9916170507697315, "calibration/batch_entropy_100bins": 0.9717505719323067, "calibration/batch_entropy_10bins": 0.9916170507697315, "calibration/batch_entropy_50bins": 0.9835024232131098, "calibration/batch_uniqueness": 0.9547727550018493, "calibration/buffer_distribution_entropy": 0.9800138866676088, "calibration/buffer_entropy_100bins": 0.9401036418916597, "calibration/buffer_entropy_10bins": 0.9800138866676088, "calibration/buffer_entropy_50bins": 0.959772586517515, "calibration/confidence_entropy": 0.4993253403838624, "calibration/coverage@0%": 0.011344178082191781, "calibration/coverage@1%": 0.011344178082191781, "calibration/coverage@10%": 0.055174443493150684, "calibration/coverage@15%": 0.0911792135518591, "calibration/coverage@20%": 0.14161111790606654, "calibration/coverage@25%": 0.17874495474559687, "calibration/coverage@30%": 0.3023085861056751, "calibration/coverage@5%": 0.035218933463796474, "calibration/ece": 0.11480474764719235, "calibration/mean_confidence": 0.5161338636836956, "calibration/prompt_uniqueness": 0.8751561382186525, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 1536.0, "completions/max_terminated_length": 714.0, "completions/mean_length": 205.28056640625, "completions/mean_terminated_length": 204.50087890625, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.288, "grad_norm": 0.0010703956941142678, "learning_rate": 1e-06, "loss": 0.0017, "num_tokens": 303883052.0, "reward": 0.8444351196289063, "reward_std": 0.11515617072582245, "rewards/accuracy_reward": 0.51435546875, "rewards/brier_reward": 0.7651584386825562, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.002866340894252062, "rewards/frontier_coverage_1": 0.09787596613168717, "rewards/frontier_coverage_10": 0.09787596613168717, "rewards/frontier_coverage_15": 0.09787596613168717, "rewards/frontier_coverage_20": 0.09787596613168717, "rewards/frontier_coverage_25": 0.09787596613168717, "rewards/frontier_coverage_5": 0.09787596613168717, "rewards/frontier_ece_reward": 0.020884520187973977, "rewards/frontier_entropy_batch_reward": -0.1689342439174652, "signal/accuracy_reward/centered_abs_mean": 0.134161376953125, "signal/accuracy_reward/group_bin_occupancy": 0.186328125, "signal/accuracy_reward/group_std_mean": 0.17561693191528321, "signal/accuracy_reward/group_zero_std_frac": 0.509375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0670806884765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0670806884765625, "signal/advantage_abs_mean": 0.09000947773456573, "signal/advantage_pre_scale_abs_mean": 0.09000947773456573, "signal/advantage_pre_scale_std": 0.13045729398727418, "signal/advantage_std": 0.13045729398727418, "signal/brier_reward/centered_abs_mean": 0.16578397750854493, "signal/brier_reward/group_bin_occupancy": 0.86015625, "signal/brier_reward/group_std_mean": 0.20985957980155945, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020722997188568116, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020722997188568116, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_bin_occupancy": 0.127734375, "signal/format_reward/group_std_mean": 0.003866990143433213, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021198820788413284, "signal/frontier_aurc_reward/group_bin_occupancy": 0.778125, "signal/frontier_aurc_reward/group_std_mean": 0.003104550950229168, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.794588847085834e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.794588847085834e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2139565408229828, "signal/frontier_coverage_1/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_1/group_std_mean": 0.2742366850376129, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038298218045383693, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038298218045383693, "signal/frontier_coverage_10/centered_abs_mean": 0.2139565408229828, "signal/frontier_coverage_10/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_10/group_std_mean": 0.2742366850376129, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038298218045383693, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038298218045383693, "signal/frontier_coverage_15/centered_abs_mean": 0.2139565408229828, "signal/frontier_coverage_15/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_15/group_std_mean": 0.2742366850376129, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038298218045383693, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038298218045383693, "signal/frontier_coverage_20/centered_abs_mean": 0.2139565408229828, "signal/frontier_coverage_20/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_20/group_std_mean": 0.2742366850376129, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038298218045383693, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038298218045383693, "signal/frontier_coverage_25/centered_abs_mean": 0.2139565408229828, "signal/frontier_coverage_25/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_25/group_std_mean": 0.2742366850376129, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038298218045383693, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038298218045383693, "signal/frontier_coverage_5/centered_abs_mean": 0.2139565408229828, "signal/frontier_coverage_5/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_5/group_std_mean": 0.2742366850376129, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038298218045383693, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038298218045383693, "signal/frontier_ece_reward/centered_abs_mean": 0.04083863347768783, "signal/frontier_ece_reward/group_bin_occupancy": 0.6359375, "signal/frontier_ece_reward/group_std_mean": 0.053824542462825774, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005104829184710979, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005104829184710979, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25178911685943606, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7578125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3293434023857117, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03147363960742951, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03147363960742951, "step": 90 }, { "calibration/aurc": 0.3012958766581849, "calibration/batch_distribution_entropy": 0.9837421655310596, "calibration/batch_entropy_100bins": 0.9707956283872885, "calibration/batch_entropy_10bins": 0.9837421655310596, "calibration/batch_entropy_50bins": 0.9794131696452247, "calibration/batch_uniqueness": 0.9540138412528385, "calibration/buffer_distribution_entropy": 0.9819037876792042, "calibration/buffer_entropy_100bins": 0.9457814405734298, "calibration/buffer_entropy_10bins": 0.9819037876792042, "calibration/buffer_entropy_50bins": 0.9637111958737059, "calibration/confidence_entropy": 0.4894476112277997, "calibration/coverage@0%": 0.0054825367647058825, "calibration/coverage@1%": 0.0054825367647058825, "calibration/coverage@10%": 0.04701746323529411, "calibration/coverage@15%": 0.1557919730392157, "calibration/coverage@20%": 0.2575536151960784, "calibration/coverage@25%": 0.39245251225490196, "calibration/coverage@30%": 0.5761427696078432, "calibration/coverage@5%": 0.010580575980392156, "calibration/ece": 0.12106669425909085, "calibration/mean_confidence": 0.5282264856879203, "calibration/prompt_uniqueness": 0.8732020399305556, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1072.8, "completions/max_terminated_length": 670.4, "completions/mean_length": 207.81123046875, "completions/mean_terminated_length": 207.42286987304686, "completions/min_length": 88.6, "completions/min_terminated_length": 88.6, "epoch": 0.304, "grad_norm": 0.000941499718464911, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 320940991.0, "reward": 0.8401289582252502, "reward_std": 0.10886461585760117, "rewards/accuracy_reward": 0.51181640625, "rewards/brier_reward": 0.7542804956436158, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0029725372325628994, "rewards/frontier_coverage_1": 0.08972824737429619, "rewards/frontier_coverage_10": 0.08972824737429619, "rewards/frontier_coverage_15": 0.08972824737429619, "rewards/frontier_coverage_20": 0.08972824737429619, "rewards/frontier_coverage_25": 0.08972824737429619, "rewards/frontier_coverage_5": 0.08972824737429619, "rewards/frontier_ece_reward": 0.017498020455241202, "rewards/frontier_entropy_batch_reward": -0.17311883568763733, "signal/accuracy_reward/centered_abs_mean": 0.122198486328125, "signal/accuracy_reward/group_bin_occupancy": 0.183203125, "signal/accuracy_reward/group_std_mean": 0.16242018342018127, "signal/accuracy_reward/group_zero_std_frac": 0.534375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0610992431640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0610992431640625, "signal/advantage_abs_mean": 0.08474316000938416, "signal/advantage_pre_scale_abs_mean": 0.08474316000938416, "signal/advantage_pre_scale_std": 0.12225746810436249, "signal/advantage_std": 0.12225746810436249, "signal/brier_reward/centered_abs_mean": 0.1670261949300766, "signal/brier_reward/group_bin_occupancy": 0.86796875, "signal/brier_reward/group_std_mean": 0.21082913279533386, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020878274366259574, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020878274366259574, "signal/format_reward/centered_abs_mean": 0.00074462890625, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0018734002020210027, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000372314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021015216829255224, "signal/frontier_aurc_reward/group_bin_occupancy": 0.775390625, "signal/frontier_aurc_reward/group_std_mean": 0.003054375061765313, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.761723637580871e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.761723637580871e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.21436746120452882, "signal/frontier_coverage_1/group_bin_occupancy": 0.88125, "signal/frontier_coverage_1/group_std_mean": 0.2753202378749847, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038371775299310684, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038371775299310684, "signal/frontier_coverage_10/centered_abs_mean": 0.21436746120452882, "signal/frontier_coverage_10/group_bin_occupancy": 0.88125, "signal/frontier_coverage_10/group_std_mean": 0.2753202378749847, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038371775299310684, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038371775299310684, "signal/frontier_coverage_15/centered_abs_mean": 0.21436746120452882, "signal/frontier_coverage_15/group_bin_occupancy": 0.88125, "signal/frontier_coverage_15/group_std_mean": 0.2753202378749847, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038371775299310684, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038371775299310684, "signal/frontier_coverage_20/centered_abs_mean": 0.21436746120452882, "signal/frontier_coverage_20/group_bin_occupancy": 0.88125, "signal/frontier_coverage_20/group_std_mean": 0.2753202378749847, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038371775299310684, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038371775299310684, "signal/frontier_coverage_25/centered_abs_mean": 0.21436746120452882, "signal/frontier_coverage_25/group_bin_occupancy": 0.88125, "signal/frontier_coverage_25/group_std_mean": 0.2753202378749847, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038371775299310684, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038371775299310684, "signal/frontier_coverage_5/centered_abs_mean": 0.21436746120452882, "signal/frontier_coverage_5/group_bin_occupancy": 0.88125, "signal/frontier_coverage_5/group_std_mean": 0.2753202378749847, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038371775299310684, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038371775299310684, "signal/frontier_ece_reward/centered_abs_mean": 0.03961938172578812, "signal/frontier_ece_reward/group_bin_occupancy": 0.6265625, "signal/frontier_ece_reward/group_std_mean": 0.051925134658813474, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004952422715723515, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004952422715723515, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25431135296821594, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74609375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3349409639835358, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03178891912102699, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03178891912102699, "step": 95 }, { "calibration/aurc": 0.2567548892732471, "calibration/batch_distribution_entropy": 0.9855417888920972, "calibration/batch_entropy_100bins": 0.9704664005737822, "calibration/batch_entropy_10bins": 0.9855417888920972, "calibration/batch_entropy_50bins": 0.9802541786549019, "calibration/batch_uniqueness": 0.9540855714765243, "calibration/buffer_distribution_entropy": 0.983232282815165, "calibration/buffer_entropy_100bins": 0.9505687986075818, "calibration/buffer_entropy_10bins": 0.983232282815165, "calibration/buffer_entropy_50bins": 0.9668949759737334, "calibration/confidence_entropy": 0.48253534328172953, "calibration/coverage@0%": 0.013282778864970646, "calibration/coverage@1%": 0.013282778864970646, "calibration/coverage@10%": 0.21737555039138945, "calibration/coverage@15%": 0.34399844055772993, "calibration/coverage@20%": 0.44441964285714286, "calibration/coverage@25%": 0.5225859222113503, "calibration/coverage@30%": 0.6793251590019569, "calibration/coverage@5%": 0.07500152886497065, "calibration/ece": 0.1256929928477562, "calibration/mean_confidence": 0.5410886092142749, "calibration/prompt_uniqueness": 0.8685091111960197, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1152.6, "completions/max_terminated_length": 535.2, "completions/mean_length": 207.54091796875, "completions/mean_terminated_length": 206.8915283203125, "completions/min_length": 94.8, "completions/min_terminated_length": 94.8, "epoch": 0.32, "grad_norm": 0.0010245247976854444, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 338154914.0, "reward": 0.8519093155860901, "reward_std": 0.10203693956136703, "rewards/accuracy_reward": 0.5287109375, "rewards/brier_reward": 0.7753150343894959, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0027055200189352036, "rewards/frontier_coverage_1": 0.09903865978121758, "rewards/frontier_coverage_10": 0.09903865978121758, "rewards/frontier_coverage_15": 0.09903865978121758, "rewards/frontier_coverage_20": 0.09903865978121758, "rewards/frontier_coverage_25": 0.09903865978121758, "rewards/frontier_coverage_5": 0.09903865978121758, "rewards/frontier_ece_reward": 0.022906527668237687, "rewards/frontier_entropy_batch_reward": -0.1797630488872528, "signal/accuracy_reward/centered_abs_mean": 0.09708251953125, "signal/accuracy_reward/group_bin_occupancy": 0.177734375, "signal/accuracy_reward/group_std_mean": 0.13628645241260529, "signal/accuracy_reward/group_zero_std_frac": 0.578125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.048541259765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.048541259765625, "signal/advantage_abs_mean": 0.07726810723543168, "signal/advantage_pre_scale_abs_mean": 0.07726810723543168, "signal/advantage_pre_scale_std": 0.11642331779003143, "signal/advantage_std": 0.11642331779003143, "signal/brier_reward/centered_abs_mean": 0.15678012669086455, "signal/brier_reward/group_bin_occupancy": 0.85234375, "signal/brier_reward/group_std_mean": 0.1998385190963745, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01959751583635807, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01959751583635807, "signal/format_reward/centered_abs_mean": 0.001312255859375, "signal/format_reward/group_bin_occupancy": 0.12734375, "signal/format_reward/group_std_mean": 0.0035306816920638085, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002104542893357575, "signal/frontier_aurc_reward/group_bin_occupancy": 0.777734375, "signal/frontier_aurc_reward/group_std_mean": 0.0030782954767346383, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.76713156583719e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.76713156583719e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19006343185901642, "signal/frontier_coverage_1/group_bin_occupancy": 0.874609375, "signal/frontier_coverage_1/group_std_mean": 0.24371300339698793, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00340213542804122, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00340213542804122, "signal/frontier_coverage_10/centered_abs_mean": 0.19006343185901642, "signal/frontier_coverage_10/group_bin_occupancy": 0.874609375, "signal/frontier_coverage_10/group_std_mean": 0.24371300339698793, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00340213542804122, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00340213542804122, "signal/frontier_coverage_15/centered_abs_mean": 0.19006343185901642, "signal/frontier_coverage_15/group_bin_occupancy": 0.874609375, "signal/frontier_coverage_15/group_std_mean": 0.24371300339698793, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00340213542804122, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00340213542804122, "signal/frontier_coverage_20/centered_abs_mean": 0.19006343185901642, "signal/frontier_coverage_20/group_bin_occupancy": 0.874609375, "signal/frontier_coverage_20/group_std_mean": 0.24371300339698793, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00340213542804122, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00340213542804122, "signal/frontier_coverage_25/centered_abs_mean": 0.19006343185901642, "signal/frontier_coverage_25/group_bin_occupancy": 0.874609375, "signal/frontier_coverage_25/group_std_mean": 0.24371300339698793, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00340213542804122, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00340213542804122, "signal/frontier_coverage_5/centered_abs_mean": 0.19006343185901642, "signal/frontier_coverage_5/group_bin_occupancy": 0.874609375, "signal/frontier_coverage_5/group_std_mean": 0.24371300339698793, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00340213542804122, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00340213542804122, "signal/frontier_ece_reward/centered_abs_mean": 0.04008113071322441, "signal/frontier_ece_reward/group_bin_occupancy": 0.603515625, "signal/frontier_ece_reward/group_std_mean": 0.05185698121786118, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005010141339153051, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005010141339153051, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2621337234973907, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7578125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33962839245796206, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03276671543717384, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03276671543717384, "step": 100 }, { "epoch": 0.32, "eval_calibration/aurc": 0.4795200196771078, "eval_calibration/batch_distribution_entropy": 0.9021095312089411, "eval_calibration/batch_entropy_100bins": 0.6837504483206038, "eval_calibration/batch_entropy_10bins": 0.9021095312089411, "eval_calibration/batch_entropy_50bins": 0.7600994124001819, "eval_calibration/batch_uniqueness": 0.8837890625, "eval_calibration/buffer_distribution_entropy": 0.9836716950770678, "eval_calibration/buffer_entropy_100bins": 0.9532928632962285, "eval_calibration/buffer_entropy_10bins": 0.9836716950770678, "eval_calibration/buffer_entropy_50bins": 0.968683184468552, "eval_calibration/confidence_entropy": 0.45694869462366755, "eval_calibration/coverage@0%": 0.078125, "eval_calibration/coverage@1%": 0.078125, "eval_calibration/coverage@10%": 0.078125, "eval_calibration/coverage@15%": 0.078125, "eval_calibration/coverage@20%": 0.1015625, "eval_calibration/coverage@25%": 0.1015625, "eval_calibration/coverage@30%": 0.21875, "eval_calibration/coverage@5%": 0.078125, "eval_calibration/ece": 0.2528412410504649, "eval_calibration/mean_confidence": 0.4613864587309076, "eval_calibration/prompt_uniqueness": 0.8837890625, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 422.0, "eval_completions/max_terminated_length": 422.0, "eval_completions/mean_length": 207.25538635253906, "eval_completions/mean_terminated_length": 207.25538635253906, "eval_completions/min_length": 107.25, "eval_completions/min_terminated_length": 107.25, "eval_loss": 0.0, "eval_num_tokens": 338154914.0, "eval_reward": 0.7035073935985565, "eval_reward_std": 0.2254948876798153, "eval_rewards/accuracy_reward": 0.41796875, "eval_rewards/brier_reward": 0.7835361808538437, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.003143564856145531, "eval_rewards/frontier_coverage_1": 0.18091510236263275, "eval_rewards/frontier_coverage_10": 0.18091510236263275, "eval_rewards/frontier_coverage_15": 0.18091510236263275, "eval_rewards/frontier_coverage_20": 0.18091510236263275, "eval_rewards/frontier_coverage_25": 0.18091510236263275, "eval_rewards/frontier_coverage_5": 0.18091510236263275, "eval_rewards/frontier_ece_reward": 0.017655907664448023, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 21.7452, "eval_samples_per_second": 22.994, "eval_signal/accuracy_reward/centered_abs_mean": 0.4716796875, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.49268144369125366, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23583984375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23583984375, "eval_signal/advantage_abs_mean": 0.2067052647471428, "eval_signal/advantage_pre_scale_abs_mean": 0.2067052647471428, "eval_signal/advantage_pre_scale_std": 0.22346897423267365, "eval_signal/advantage_std": 0.22346897423267365, "eval_signal/brier_reward/centered_abs_mean": 0.19929831847548485, "eval_signal/brier_reward/group_bin_occupancy": 0.8984375, "eval_signal/brier_reward/group_std_mean": 0.24902449920773506, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024912289809435606, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.024912289809435606, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0030411332263611257, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.7734375, "eval_signal/frontier_aurc_reward/group_std_mean": 0.004569516517221928, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.443628197099315e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.443628197099315e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.35146621614694595, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_1/group_std_mean": 0.43268968909978867, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006291245226748288, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006291245226748288, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.35146621614694595, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_10/group_std_mean": 0.43268968909978867, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006291245226748288, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006291245226748288, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.35146621614694595, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_15/group_std_mean": 0.43268968909978867, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006291245226748288, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006291245226748288, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.35146621614694595, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_20/group_std_mean": 0.43268968909978867, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006291245226748288, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006291245226748288, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.35146621614694595, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_25/group_std_mean": 0.43268968909978867, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006291245226748288, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006291245226748288, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.35146621614694595, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_5/group_std_mean": 0.43268968909978867, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006291245226748288, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006291245226748288, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.04135385248810053, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.7421875, "eval_signal/frontier_ece_reward/group_std_mean": 0.062202571891248226, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005169231561012566, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005169231561012566, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.125, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.184, "step": 100 }, { "calibration/aurc": 0.30765736387397496, "calibration/batch_distribution_entropy": 0.9848037323472848, "calibration/batch_entropy_100bins": 0.9693385191204799, "calibration/batch_entropy_10bins": 0.9848037323472848, "calibration/batch_entropy_50bins": 0.9781402406586969, "calibration/batch_uniqueness": 0.9536346435546875, "calibration/buffer_distribution_entropy": 0.9854686951779537, "calibration/buffer_entropy_100bins": 0.9569212673153616, "calibration/buffer_entropy_10bins": 0.9854686951779537, "calibration/buffer_entropy_50bins": 0.9713440007079225, "calibration/confidence_entropy": 0.4995682667114387, "calibration/coverage@0%": 0.019140625, "calibration/coverage@1%": 0.019140625, "calibration/coverage@10%": 0.040625, "calibration/coverage@15%": 0.075, "calibration/coverage@20%": 0.15234375, "calibration/coverage@25%": 0.30859375, "calibration/coverage@30%": 0.529296875, "calibration/coverage@5%": 0.019921875, "calibration/ece": 0.12052893017349497, "calibration/mean_confidence": 0.48753457746559115, "calibration/prompt_uniqueness": 0.867578125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 982.4, "completions/max_terminated_length": 562.6, "completions/mean_length": 209.47412109375, "completions/mean_terminated_length": 209.2148651123047, "completions/min_length": 91.6, "completions/min_terminated_length": 91.6, "epoch": 0.336, "grad_norm": 0.0010945890098810196, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 355022361.0, "reward": 0.8489757418632508, "reward_std": 0.10516680479049682, "rewards/accuracy_reward": 0.52822265625, "rewards/brier_reward": 0.7679716944694519, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0026710231322795153, "rewards/frontier_coverage_1": 0.08716111201792956, "rewards/frontier_coverage_10": 0.08716111201792956, "rewards/frontier_coverage_15": 0.08716111201792956, "rewards/frontier_coverage_20": 0.08716111201792956, "rewards/frontier_coverage_25": 0.08716111201792956, "rewards/frontier_coverage_5": 0.08716111201792956, "rewards/frontier_ece_reward": 0.01877461187541485, "rewards/frontier_entropy_batch_reward": -0.18116532862186432, "signal/accuracy_reward/centered_abs_mean": 0.112652587890625, "signal/accuracy_reward/group_bin_occupancy": 0.18125, "signal/accuracy_reward/group_std_mean": 0.15273889005184174, "signal/accuracy_reward/group_zero_std_frac": 0.55, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0563262939453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0563262939453125, "signal/advantage_abs_mean": 0.08183809071779251, "signal/advantage_pre_scale_abs_mean": 0.08183809071779251, "signal/advantage_pre_scale_std": 0.12064377218484879, "signal/advantage_std": 0.12064377218484879, "signal/brier_reward/centered_abs_mean": 0.15700196623802185, "signal/brier_reward/group_bin_occupancy": 0.870703125, "signal/brier_reward/group_std_mean": 0.19847926795482634, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01962524577975273, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01962524577975273, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002023302251473069, "signal/frontier_aurc_reward/group_bin_occupancy": 0.76171875, "signal/frontier_aurc_reward/group_std_mean": 0.0030010143760591745, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.621711148298346e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.621711148298346e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19928583800792693, "signal/frontier_coverage_1/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_1/group_std_mean": 0.25434514582157136, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035672161728143694, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035672161728143694, "signal/frontier_coverage_10/centered_abs_mean": 0.19928583800792693, "signal/frontier_coverage_10/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_10/group_std_mean": 0.25434514582157136, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035672161728143694, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035672161728143694, "signal/frontier_coverage_15/centered_abs_mean": 0.19928583800792693, "signal/frontier_coverage_15/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_15/group_std_mean": 0.25434514582157136, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035672161728143694, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035672161728143694, "signal/frontier_coverage_20/centered_abs_mean": 0.19928583800792693, "signal/frontier_coverage_20/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_20/group_std_mean": 0.25434514582157136, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035672161728143694, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035672161728143694, "signal/frontier_coverage_25/centered_abs_mean": 0.19928583800792693, "signal/frontier_coverage_25/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_25/group_std_mean": 0.25434514582157136, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035672161728143694, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035672161728143694, "signal/frontier_coverage_5/centered_abs_mean": 0.19928583800792693, "signal/frontier_coverage_5/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_5/group_std_mean": 0.25434514582157136, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035672161728143694, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035672161728143694, "signal/frontier_ece_reward/centered_abs_mean": 0.03437858745455742, "signal/frontier_ece_reward/group_bin_occupancy": 0.616015625, "signal/frontier_ece_reward/group_std_mean": 0.04594796299934387, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004297323431819678, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004297323431819678, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26069518327713015, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.741015625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3357880413532257, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03258689790964127, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03258689790964127, "step": 105 }, { "calibration/aurc": 0.3248327389426379, "calibration/batch_distribution_entropy": 0.9694191613572741, "calibration/batch_entropy_100bins": 0.9653426900496431, "calibration/batch_entropy_10bins": 0.9694191613572741, "calibration/batch_entropy_50bins": 0.9723249920791073, "calibration/batch_uniqueness": 0.9502899169921875, "calibration/buffer_distribution_entropy": 0.9915301414778608, "calibration/buffer_entropy_100bins": 0.9701647014936114, "calibration/buffer_entropy_10bins": 0.9915301414778608, "calibration/buffer_entropy_50bins": 0.9807902497768815, "calibration/confidence_entropy": 0.46782397627976635, "calibration/coverage@0%": 0.01640625, "calibration/coverage@1%": 0.01640625, "calibration/coverage@10%": 0.158984375, "calibration/coverage@15%": 0.28203125, "calibration/coverage@20%": 0.358984375, "calibration/coverage@25%": 0.437109375, "calibration/coverage@30%": 0.49609375, "calibration/coverage@5%": 0.019140625, "calibration/ece": 0.12978758350246267, "calibration/mean_confidence": 0.43861533967578464, "calibration/prompt_uniqueness": 0.862841796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1178.4, "completions/max_terminated_length": 645.2, "completions/mean_length": 213.62646484375, "completions/mean_terminated_length": 213.23961181640624, "completions/min_length": 89.6, "completions/min_terminated_length": 89.6, "epoch": 0.352, "grad_norm": 0.0010587095748633146, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 372470312.0, "reward": 0.8264712929725647, "reward_std": 0.10371551960706711, "rewards/accuracy_reward": 0.47275390625, "rewards/brier_reward": 0.7754327297210694, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.002917947107926011, "rewards/frontier_coverage_1": 0.13510125279426574, "rewards/frontier_coverage_10": 0.13510125279426574, "rewards/frontier_coverage_15": 0.13510125279426574, "rewards/frontier_coverage_20": 0.13510125279426574, "rewards/frontier_coverage_25": 0.13510125279426574, "rewards/frontier_coverage_5": 0.13510125279426574, "rewards/frontier_ece_reward": 0.015004617348313331, "rewards/frontier_entropy_batch_reward": -0.18378140330314635, "signal/accuracy_reward/centered_abs_mean": 0.112518310546875, "signal/accuracy_reward/group_bin_occupancy": 0.178515625, "signal/accuracy_reward/group_std_mean": 0.14888902008533478, "signal/accuracy_reward/group_zero_std_frac": 0.571875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0562591552734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0562591552734375, "signal/advantage_abs_mean": 0.08059937804937363, "signal/advantage_pre_scale_abs_mean": 0.08059937804937363, "signal/advantage_pre_scale_std": 0.11854567229747773, "signal/advantage_std": 0.11854567229747773, "signal/brier_reward/centered_abs_mean": 0.15186150074005128, "signal/brier_reward/group_bin_occupancy": 0.857421875, "signal/brier_reward/group_std_mean": 0.19348691403865814, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01898268759250641, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01898268759250641, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002148870355449617, "signal/frontier_aurc_reward/group_bin_occupancy": 0.76796875, "signal/frontier_aurc_reward/group_std_mean": 0.0032697335351258515, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.846477629849687e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.846477629849687e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.20343527495861052, "signal/frontier_coverage_1/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_1/group_std_mean": 0.25838565826416016, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036414912901818752, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036414912901818752, "signal/frontier_coverage_10/centered_abs_mean": 0.20343527495861052, "signal/frontier_coverage_10/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_10/group_std_mean": 0.25838565826416016, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036414912901818752, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036414912901818752, "signal/frontier_coverage_15/centered_abs_mean": 0.20343527495861052, "signal/frontier_coverage_15/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_15/group_std_mean": 0.25838565826416016, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036414912901818752, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036414912901818752, "signal/frontier_coverage_20/centered_abs_mean": 0.20343527495861052, "signal/frontier_coverage_20/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_20/group_std_mean": 0.25838565826416016, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036414912901818752, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036414912901818752, "signal/frontier_coverage_25/centered_abs_mean": 0.20343527495861052, "signal/frontier_coverage_25/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_25/group_std_mean": 0.25838565826416016, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036414912901818752, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036414912901818752, "signal/frontier_coverage_5/centered_abs_mean": 0.20343527495861052, "signal/frontier_coverage_5/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_5/group_std_mean": 0.25838565826416016, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036414912901818752, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036414912901818752, "signal/frontier_ece_reward/centered_abs_mean": 0.028419509530067444, "signal/frontier_ece_reward/group_bin_occupancy": 0.628515625, "signal/frontier_ece_reward/group_std_mean": 0.03753211200237274, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035524386912584305, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035524386912584305, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.256140798330307, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75078125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3320682287216187, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032017599791288376, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032017599791288376, "step": 110 }, { "calibration/aurc": 0.3685978316456159, "calibration/batch_distribution_entropy": 0.9841220587243489, "calibration/batch_entropy_100bins": 0.9711259650561594, "calibration/batch_entropy_10bins": 0.9841220587243489, "calibration/batch_entropy_50bins": 0.9806752929552862, "calibration/batch_uniqueness": 0.9539794921875, "calibration/buffer_distribution_entropy": 0.9960558216143583, "calibration/buffer_entropy_100bins": 0.982010151103973, "calibration/buffer_entropy_10bins": 0.9960558216143583, "calibration/buffer_entropy_50bins": 0.9888810910867181, "calibration/confidence_entropy": 0.47978596346982316, "calibration/coverage@0%": 0.01015625, "calibration/coverage@1%": 0.01015625, "calibration/coverage@10%": 0.01875, "calibration/coverage@15%": 0.069921875, "calibration/coverage@20%": 0.158203125, "calibration/coverage@25%": 0.3125, "calibration/coverage@30%": 0.39921875, "calibration/coverage@5%": 0.01015625, "calibration/ece": 0.11309242941504012, "calibration/mean_confidence": 0.5008828550670226, "calibration/prompt_uniqueness": 0.862109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1536.0, "completions/max_terminated_length": 633.2, "completions/mean_length": 213.10068359375, "completions/mean_terminated_length": 212.4544189453125, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.368, "grad_norm": 0.0011026038555428386, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 389717935.0, "reward": 0.8334917783737182, "reward_std": 0.10164395570755005, "rewards/accuracy_reward": 0.4896484375, "rewards/brier_reward": 0.7806977868080139, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0031225197948515416, "rewards/frontier_coverage_1": 0.1296861067414284, "rewards/frontier_coverage_10": 0.1296861067414284, "rewards/frontier_coverage_15": 0.1296861067414284, "rewards/frontier_coverage_20": 0.1296861067414284, "rewards/frontier_coverage_25": 0.1296861067414284, "rewards/frontier_coverage_5": 0.1296861067414284, "rewards/frontier_ece_reward": 0.01422354057431221, "rewards/frontier_entropy_batch_reward": -0.19460689425468444, "signal/accuracy_reward/centered_abs_mean": 0.1107421875, "signal/accuracy_reward/group_bin_occupancy": 0.175390625, "signal/accuracy_reward/group_std_mean": 0.14366783648729325, "signal/accuracy_reward/group_zero_std_frac": 0.596875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05537109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05537109375, "signal/advantage_abs_mean": 0.07986960113048554, "signal/advantage_pre_scale_abs_mean": 0.07986960113048554, "signal/advantage_pre_scale_std": 0.118281988799572, "signal/advantage_std": 0.118281988799572, "signal/brier_reward/centered_abs_mean": 0.14576351642608643, "signal/brier_reward/group_bin_occupancy": 0.859375, "signal/brier_reward/group_std_mean": 0.18720718026161193, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018220439553260803, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018220439553260803, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002540640765801072, "signal/frontier_aurc_reward/group_bin_occupancy": 0.760546875, "signal/frontier_aurc_reward/group_std_mean": 0.0037694845348596575, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.547746866592206e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.547746866592206e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1892180174589157, "signal/frontier_coverage_1/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_1/group_std_mean": 0.2430718183517456, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003387002367526293, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003387002367526293, "signal/frontier_coverage_10/centered_abs_mean": 0.1892180174589157, "signal/frontier_coverage_10/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_10/group_std_mean": 0.2430718183517456, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003387002367526293, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003387002367526293, "signal/frontier_coverage_15/centered_abs_mean": 0.1892180174589157, "signal/frontier_coverage_15/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_15/group_std_mean": 0.2430718183517456, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003387002367526293, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003387002367526293, "signal/frontier_coverage_20/centered_abs_mean": 0.1892180174589157, "signal/frontier_coverage_20/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_20/group_std_mean": 0.2430718183517456, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003387002367526293, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003387002367526293, "signal/frontier_coverage_25/centered_abs_mean": 0.1892180174589157, "signal/frontier_coverage_25/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_25/group_std_mean": 0.2430718183517456, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003387002367526293, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003387002367526293, "signal/frontier_coverage_5/centered_abs_mean": 0.1892180174589157, "signal/frontier_coverage_5/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_5/group_std_mean": 0.2430718183517456, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003387002367526293, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003387002367526293, "signal/frontier_ece_reward/centered_abs_mean": 0.0239554800093174, "signal/frontier_ece_reward/group_bin_occupancy": 0.63671875, "signal/frontier_ece_reward/group_std_mean": 0.031465400382876395, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002994435001164675, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002994435001164675, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26627758145332336, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33881971836090086, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03328469768166542, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03328469768166542, "step": 115 }, { "calibration/aurc": 0.3288035707028756, "calibration/batch_distribution_entropy": 0.972391171131245, "calibration/batch_entropy_100bins": 0.9643182066779674, "calibration/batch_entropy_10bins": 0.972391171131245, "calibration/batch_entropy_50bins": 0.9728774568300317, "calibration/batch_uniqueness": 0.9514887123619662, "calibration/buffer_distribution_entropy": 0.998363598881251, "calibration/buffer_entropy_100bins": 0.9907127922569062, "calibration/buffer_entropy_10bins": 0.998363598881251, "calibration/buffer_entropy_50bins": 0.9943556436848473, "calibration/confidence_entropy": 0.467932900808412, "calibration/coverage@0%": 0.026954653864970646, "calibration/coverage@1%": 0.026954653864970646, "calibration/coverage@10%": 0.17734527886497065, "calibration/coverage@15%": 0.24492340386497063, "calibration/coverage@20%": 0.29843902886497065, "calibration/coverage@25%": 0.3417984038649706, "calibration/coverage@30%": 0.3917984038649706, "calibration/coverage@5%": 0.12304840386497065, "calibration/ece": 0.13064203539088265, "calibration/mean_confidence": 0.45519123283717455, "calibration/prompt_uniqueness": 0.8569748512291884, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 1157.2, "completions/max_terminated_length": 602.4, "completions/mean_length": 209.76728515625, "completions/mean_terminated_length": 208.9923522949219, "completions/min_length": 95.2, "completions/min_terminated_length": 95.2, "epoch": 0.384, "grad_norm": 0.0010048962431028485, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 406722464.0, "reward": 0.8486302971839905, "reward_std": 0.10181694477796555, "rewards/accuracy_reward": 0.52177734375, "rewards/brier_reward": 0.7872754693031311, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0028854547068476676, "rewards/frontier_coverage_1": 0.11500916741788388, "rewards/frontier_coverage_10": 0.11500916741788388, "rewards/frontier_coverage_15": 0.11500916741788388, "rewards/frontier_coverage_20": 0.11500916741788388, "rewards/frontier_coverage_25": 0.11500916741788388, "rewards/frontier_coverage_5": 0.11500916741788388, "rewards/frontier_ece_reward": 0.013547521643340587, "rewards/frontier_entropy_batch_reward": -0.19455830454826356, "signal/accuracy_reward/centered_abs_mean": 0.109857177734375, "signal/accuracy_reward/group_bin_occupancy": 0.18359375, "signal/accuracy_reward/group_std_mean": 0.15272603631019593, "signal/accuracy_reward/group_zero_std_frac": 0.53125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0549285888671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0549285888671875, "signal/advantage_abs_mean": 0.07775198072195053, "signal/advantage_pre_scale_abs_mean": 0.07775198072195053, "signal/advantage_pre_scale_std": 0.11655086725950241, "signal/advantage_std": 0.11655086725950241, "signal/brier_reward/centered_abs_mean": 0.14183862507343292, "signal/brier_reward/group_bin_occupancy": 0.846875, "signal/brier_reward/group_std_mean": 0.1818255215883255, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017729828134179115, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017729828134179115, "signal/format_reward/centered_abs_mean": 0.001312255859375, "signal/format_reward/group_bin_occupancy": 0.12734375, "signal/format_reward/group_std_mean": 0.0035306816920638085, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002797884680330753, "signal/frontier_aurc_reward/group_bin_occupancy": 0.762109375, "signal/frontier_aurc_reward/group_std_mean": 0.00430455575697124, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.008213483961299e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.008213483961299e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18613593876361847, "signal/frontier_coverage_1/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_1/group_std_mean": 0.23912187218666076, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003331833053380251, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003331833053380251, "signal/frontier_coverage_10/centered_abs_mean": 0.18613593876361847, "signal/frontier_coverage_10/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_10/group_std_mean": 0.23912187218666076, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003331833053380251, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003331833053380251, "signal/frontier_coverage_15/centered_abs_mean": 0.18613593876361847, "signal/frontier_coverage_15/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_15/group_std_mean": 0.23912187218666076, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003331833053380251, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003331833053380251, "signal/frontier_coverage_20/centered_abs_mean": 0.18613593876361847, "signal/frontier_coverage_20/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_20/group_std_mean": 0.23912187218666076, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003331833053380251, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003331833053380251, "signal/frontier_coverage_25/centered_abs_mean": 0.18613593876361847, "signal/frontier_coverage_25/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_25/group_std_mean": 0.23912187218666076, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003331833053380251, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003331833053380251, "signal/frontier_coverage_5/centered_abs_mean": 0.18613593876361847, "signal/frontier_coverage_5/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_5/group_std_mean": 0.23912187218666076, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003331833053380251, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003331833053380251, "signal/frontier_ece_reward/centered_abs_mean": 0.02027609832584858, "signal/frontier_ece_reward/group_bin_occupancy": 0.671484375, "signal/frontier_ece_reward/group_std_mean": 0.026138320937752722, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025345122907310725, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025345122907310725, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25942595601081847, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.735546875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33479920625686643, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03242824450135231, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03242824450135231, "step": 120 }, { "calibration/aurc": 0.44117263123313394, "calibration/batch_distribution_entropy": 0.9888037543083327, "calibration/batch_entropy_100bins": 0.9728081260995414, "calibration/batch_entropy_10bins": 0.9888037543083327, "calibration/batch_entropy_50bins": 0.9824201332031917, "calibration/batch_uniqueness": 0.95474853515625, "calibration/buffer_distribution_entropy": 0.9988600772928141, "calibration/buffer_entropy_100bins": 0.9957624928547298, "calibration/buffer_entropy_10bins": 0.9988600772928141, "calibration/buffer_entropy_50bins": 0.9972569067670085, "calibration/confidence_entropy": 0.495948138479487, "calibration/coverage@0%": 0.003125, "calibration/coverage@1%": 0.003125, "calibration/coverage@10%": 0.005859375, "calibration/coverage@15%": 0.005859375, "calibration/coverage@20%": 0.026953125, "calibration/coverage@25%": 0.07890625, "calibration/coverage@30%": 0.1234375, "calibration/coverage@5%": 0.003125, "calibration/ece": 0.16232330180334845, "calibration/mean_confidence": 0.49218618777158946, "calibration/prompt_uniqueness": 0.8701171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 940.8, "completions/max_terminated_length": 539.8, "completions/mean_length": 209.08955078125, "completions/mean_terminated_length": 208.8299774169922, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.4, "grad_norm": 0.0011378073832020164, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 423899989.0, "reward": 0.8323954820632935, "reward_std": 0.11341892182826996, "rewards/accuracy_reward": 0.49619140625, "rewards/brier_reward": 0.7690539121627807, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.003590290108695626, "rewards/frontier_coverage_1": 0.1151683684438467, "rewards/frontier_coverage_10": 0.1151683684438467, "rewards/frontier_coverage_15": 0.1151683684438467, "rewards/frontier_coverage_20": 0.1151683684438467, "rewards/frontier_coverage_25": 0.1151683684438467, "rewards/frontier_coverage_5": 0.1151683684438467, "rewards/frontier_ece_reward": 0.009640791360288859, "rewards/frontier_entropy_batch_reward": -0.20078192055225372, "signal/accuracy_reward/centered_abs_mean": 0.135638427734375, "signal/accuracy_reward/group_bin_occupancy": 0.18828125, "signal/accuracy_reward/group_std_mean": 0.1780630737543106, "signal/accuracy_reward/group_zero_std_frac": 0.49375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0678192138671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0678192138671875, "signal/advantage_abs_mean": 0.0883943647146225, "signal/advantage_pre_scale_abs_mean": 0.0883943647146225, "signal/advantage_pre_scale_std": 0.13039152324199677, "signal/advantage_std": 0.13039152324199677, "signal/brier_reward/centered_abs_mean": 0.1547802209854126, "signal/brier_reward/group_bin_occupancy": 0.855859375, "signal/brier_reward/group_std_mean": 0.19734705090522767, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019347527623176576, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.019347527623176576, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.003553676325827837, "signal/frontier_aurc_reward/group_bin_occupancy": 0.737109375, "signal/frontier_aurc_reward/group_std_mean": 0.00554114431142807, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.361080304486677e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.361080304486677e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19457192122936248, "signal/frontier_coverage_1/group_bin_occupancy": 0.87734375, "signal/frontier_coverage_1/group_std_mean": 0.25038447678089143, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003482837276533246, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003482837276533246, "signal/frontier_coverage_10/centered_abs_mean": 0.19457192122936248, "signal/frontier_coverage_10/group_bin_occupancy": 0.87734375, "signal/frontier_coverage_10/group_std_mean": 0.25038447678089143, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003482837276533246, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003482837276533246, "signal/frontier_coverage_15/centered_abs_mean": 0.19457192122936248, "signal/frontier_coverage_15/group_bin_occupancy": 0.87734375, "signal/frontier_coverage_15/group_std_mean": 0.25038447678089143, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003482837276533246, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003482837276533246, "signal/frontier_coverage_20/centered_abs_mean": 0.19457192122936248, "signal/frontier_coverage_20/group_bin_occupancy": 0.87734375, "signal/frontier_coverage_20/group_std_mean": 0.25038447678089143, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003482837276533246, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003482837276533246, "signal/frontier_coverage_25/centered_abs_mean": 0.19457192122936248, "signal/frontier_coverage_25/group_bin_occupancy": 0.87734375, "signal/frontier_coverage_25/group_std_mean": 0.25038447678089143, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003482837276533246, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003482837276533246, "signal/frontier_coverage_5/centered_abs_mean": 0.19457192122936248, "signal/frontier_coverage_5/group_bin_occupancy": 0.87734375, "signal/frontier_coverage_5/group_std_mean": 0.25038447678089143, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003482837276533246, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003482837276533246, "signal/frontier_ece_reward/centered_abs_mean": 0.01716732941567898, "signal/frontier_ece_reward/group_bin_occupancy": 0.703515625, "signal/frontier_ece_reward/group_std_mean": 0.02221398986876011, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021459161769598724, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021459161769598724, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2748726367950439, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.735546875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3535886824131012, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03435907959938049, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03435907959938049, "step": 125 }, { "calibration/aurc": 0.3402747101814266, "calibration/batch_distribution_entropy": 0.982515497589976, "calibration/batch_entropy_100bins": 0.9682872609351033, "calibration/batch_entropy_10bins": 0.982515497589976, "calibration/batch_entropy_50bins": 0.9768947206180159, "calibration/batch_uniqueness": 0.9531341552734375, "calibration/buffer_distribution_entropy": 0.9987716981394495, "calibration/buffer_entropy_100bins": 0.9977724602459563, "calibration/buffer_entropy_10bins": 0.9987716981394495, "calibration/buffer_entropy_50bins": 0.9983235617956613, "calibration/confidence_entropy": 0.5100973588451903, "calibration/coverage@0%": 0.009765625, "calibration/coverage@1%": 0.009765625, "calibration/coverage@10%": 0.0328125, "calibration/coverage@15%": 0.0578125, "calibration/coverage@20%": 0.109375, "calibration/coverage@25%": 0.216015625, "calibration/coverage@30%": 0.336328125, "calibration/coverage@5%": 0.0140625, "calibration/ece": 0.11401476367502783, "calibration/mean_confidence": 0.49950840361909454, "calibration/prompt_uniqueness": 0.86982421875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 1027.2, "completions/max_terminated_length": 647.4, "completions/mean_length": 209.46474609375, "completions/mean_terminated_length": 209.2056671142578, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.416, "grad_norm": 0.000968018255662173, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 440926092.0, "reward": 0.8401611685752869, "reward_std": 0.10533516258001327, "rewards/accuracy_reward": 0.50615234375, "rewards/brier_reward": 0.7812270641326904, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0030852626543492077, "rewards/frontier_coverage_1": 0.11723168343305587, "rewards/frontier_coverage_10": 0.11723168343305587, "rewards/frontier_coverage_15": 0.11723168343305587, "rewards/frontier_coverage_20": 0.11723168343305587, "rewards/frontier_coverage_25": 0.11723168343305587, "rewards/frontier_coverage_5": 0.11723168343305587, "rewards/frontier_ece_reward": 0.008592206053435802, "rewards/frontier_entropy_batch_reward": -0.19225098192691803, "signal/accuracy_reward/centered_abs_mean": 0.126690673828125, "signal/accuracy_reward/group_bin_occupancy": 0.180078125, "signal/accuracy_reward/group_std_mean": 0.16168214678764342, "signal/accuracy_reward/group_zero_std_frac": 0.559375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0633453369140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0633453369140625, "signal/advantage_abs_mean": 0.08324380666017532, "signal/advantage_pre_scale_abs_mean": 0.08324380666017532, "signal/advantage_pre_scale_std": 0.12175452709197998, "signal/advantage_std": 0.12175452709197998, "signal/brier_reward/centered_abs_mean": 0.14609736502170562, "signal/brier_reward/group_bin_occupancy": 0.859375, "signal/brier_reward/group_std_mean": 0.1860405534505844, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018262170627713202, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018262170627713202, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029900921043008568, "signal/frontier_aurc_reward/group_bin_occupancy": 0.74140625, "signal/frontier_aurc_reward/group_std_mean": 0.004770136158913374, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.352264633984305e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.352264633984305e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.20230944752693175, "signal/frontier_coverage_1/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_1/group_std_mean": 0.25601867139339446, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036213390529155733, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036213390529155733, "signal/frontier_coverage_10/centered_abs_mean": 0.20230944752693175, "signal/frontier_coverage_10/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_10/group_std_mean": 0.25601867139339446, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036213390529155733, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036213390529155733, "signal/frontier_coverage_15/centered_abs_mean": 0.20230944752693175, "signal/frontier_coverage_15/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_15/group_std_mean": 0.25601867139339446, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036213390529155733, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036213390529155733, "signal/frontier_coverage_20/centered_abs_mean": 0.20230944752693175, "signal/frontier_coverage_20/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_20/group_std_mean": 0.25601867139339446, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036213390529155733, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036213390529155733, "signal/frontier_coverage_25/centered_abs_mean": 0.20230944752693175, "signal/frontier_coverage_25/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_25/group_std_mean": 0.25601867139339446, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036213390529155733, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036213390529155733, "signal/frontier_coverage_5/centered_abs_mean": 0.20230944752693175, "signal/frontier_coverage_5/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_5/group_std_mean": 0.25601867139339446, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036213390529155733, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036213390529155733, "signal/frontier_ece_reward/centered_abs_mean": 0.01373654417693615, "signal/frontier_ece_reward/group_bin_occupancy": 0.695703125, "signal/frontier_ece_reward/group_std_mean": 0.017755693942308425, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017170680221170187, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017170680221170187, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2682903289794922, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34430991411209105, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03353629112243652, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03353629112243652, "step": 130 }, { "calibration/aurc": 0.2828474082475755, "calibration/batch_distribution_entropy": 0.9857383661471948, "calibration/batch_entropy_100bins": 0.9731037393328108, "calibration/batch_entropy_10bins": 0.9857383661471948, "calibration/batch_entropy_50bins": 0.9798947749352924, "calibration/batch_uniqueness": 0.9544403076171875, "calibration/buffer_distribution_entropy": 0.998966911938475, "calibration/buffer_entropy_100bins": 0.9983970981633485, "calibration/buffer_entropy_10bins": 0.998966911938475, "calibration/buffer_entropy_50bins": 0.9987812332687316, "calibration/confidence_entropy": 0.477685401183353, "calibration/coverage@0%": 0.015625, "calibration/coverage@1%": 0.015625, "calibration/coverage@10%": 0.132421875, "calibration/coverage@15%": 0.248046875, "calibration/coverage@20%": 0.333203125, "calibration/coverage@25%": 0.408984375, "calibration/coverage@30%": 0.49609375, "calibration/coverage@5%": 0.0484375, "calibration/ece": 0.10728726491884, "calibration/mean_confidence": 0.5202790137028369, "calibration/prompt_uniqueness": 0.853662109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 575.0, "completions/max_terminated_length": 575.0, "completions/mean_length": 204.96689453125, "completions/mean_terminated_length": 204.96689453125, "completions/min_length": 97.2, "completions/min_terminated_length": 97.2, "epoch": 0.432, "grad_norm": 0.001178718637675047, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 458039289.0, "reward": 0.8579501390457154, "reward_std": 0.09831726402044297, "rewards/accuracy_reward": 0.5365234375, "rewards/brier_reward": 0.7933999061584472, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002635437436401844, "rewards/frontier_coverage_1": 0.1119713842868805, "rewards/frontier_coverage_10": 0.1119713842868805, "rewards/frontier_coverage_15": 0.1119713842868805, "rewards/frontier_coverage_20": 0.1119713842868805, "rewards/frontier_coverage_25": 0.11118775270879269, "rewards/frontier_coverage_5": 0.1119713842868805, "rewards/frontier_ece_reward": 0.00932666277512908, "rewards/frontier_entropy_batch_reward": -0.18015409708023072, "signal/accuracy_reward/centered_abs_mean": 0.1129638671875, "signal/accuracy_reward/group_bin_occupancy": 0.178515625, "signal/accuracy_reward/group_std_mean": 0.1487947881221771, "signal/accuracy_reward/group_zero_std_frac": 0.571875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05648193359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05648193359375, "signal/advantage_abs_mean": 0.07683975026011466, "signal/advantage_pre_scale_abs_mean": 0.07683975026011466, "signal/advantage_pre_scale_std": 0.11419809311628341, "signal/advantage_std": 0.11419809311628341, "signal/brier_reward/centered_abs_mean": 0.13628783226013183, "signal/brier_reward/group_bin_occupancy": 0.845703125, "signal/brier_reward/group_std_mean": 0.1745894193649292, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01703597903251648, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01703597903251648, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002808052161708474, "signal/frontier_aurc_reward/group_bin_occupancy": 0.72109375, "signal/frontier_aurc_reward/group_std_mean": 0.004525643587112427, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.0264131277799604e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.0264131277799604e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18720480799674988, "signal/frontier_coverage_1/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_1/group_std_mean": 0.23972273170948027, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003350965864956379, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003350965864956379, "signal/frontier_coverage_10/centered_abs_mean": 0.18720480799674988, "signal/frontier_coverage_10/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_10/group_std_mean": 0.23972273170948027, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003350965864956379, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003350965864956379, "signal/frontier_coverage_15/centered_abs_mean": 0.18720480799674988, "signal/frontier_coverage_15/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_15/group_std_mean": 0.23972273170948027, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003350965864956379, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003350965864956379, "signal/frontier_coverage_20/centered_abs_mean": 0.18720480799674988, "signal/frontier_coverage_20/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_20/group_std_mean": 0.23972273170948027, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003350965864956379, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003350965864956379, "signal/frontier_coverage_25/centered_abs_mean": 0.182588392496109, "signal/frontier_coverage_25/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_25/group_std_mean": 0.2337813049554825, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032683320809155703, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032683320809155703, "signal/frontier_coverage_5/centered_abs_mean": 0.18720480799674988, "signal/frontier_coverage_5/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_5/group_std_mean": 0.23972273170948027, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003350965864956379, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003350965864956379, "signal/frontier_ece_reward/centered_abs_mean": 0.012440289743244648, "signal/frontier_ece_reward/group_bin_occupancy": 0.68828125, "signal/frontier_ece_reward/group_std_mean": 0.015995739214122295, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001555036217905581, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001555036217905581, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2538039118051529, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7359375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.32851467132568357, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03172548897564411, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03172548897564411, "step": 135 }, { "calibration/aurc": 0.30295920649554897, "calibration/batch_distribution_entropy": 0.9762808168631931, "calibration/batch_entropy_100bins": 0.9670601797644209, "calibration/batch_entropy_10bins": 0.9762808168631931, "calibration/batch_entropy_50bins": 0.9758043195231133, "calibration/batch_uniqueness": 0.9525875723374158, "calibration/buffer_distribution_entropy": 0.9992497597924999, "calibration/buffer_entropy_100bins": 0.9987190075433615, "calibration/buffer_entropy_10bins": 0.9992497597924999, "calibration/buffer_entropy_50bins": 0.9990799529370424, "calibration/confidence_entropy": 0.5108151699420393, "calibration/coverage@0%": 0.02461243272994129, "calibration/coverage@1%": 0.02461243272994129, "calibration/coverage@10%": 0.08008118272994129, "calibration/coverage@15%": 0.11719285102739727, "calibration/coverage@20%": 0.19414597602739728, "calibration/coverage@25%": 0.2558815435420744, "calibration/coverage@30%": 0.3833361362524462, "calibration/coverage@5%": 0.05195618272994129, "calibration/ece": 0.11912945503053644, "calibration/mean_confidence": 0.5507951330679218, "calibration/prompt_uniqueness": 0.8687448174102498, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1153.2, "completions/max_terminated_length": 614.4, "completions/mean_length": 210.47998046875, "completions/mean_terminated_length": 210.09168395996093, "completions/min_length": 100.6, "completions/min_terminated_length": 100.6, "epoch": 0.448, "grad_norm": 0.0009407022153027356, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 475147404.0, "reward": 0.842948317527771, "reward_std": 0.10061811208724976, "rewards/accuracy_reward": 0.50947265625, "rewards/brier_reward": 0.7875774383544922, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.003255841927602887, "rewards/frontier_coverage_1": 0.11736078262329101, "rewards/frontier_coverage_10": 0.11736078262329101, "rewards/frontier_coverage_15": 0.11736078262329101, "rewards/frontier_coverage_20": 0.11736078262329101, "rewards/frontier_coverage_25": 0.11350735127925873, "rewards/frontier_coverage_5": 0.11736078262329101, "rewards/frontier_ece_reward": 0.007119755912572146, "rewards/frontier_entropy_batch_reward": -0.187257120013237, "signal/accuracy_reward/centered_abs_mean": 0.111285400390625, "signal/accuracy_reward/group_bin_occupancy": 0.173828125, "signal/accuracy_reward/group_std_mean": 0.1417740285396576, "signal/accuracy_reward/group_zero_std_frac": 0.609375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0556427001953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0556427001953125, "signal/advantage_abs_mean": 0.07906914353370667, "signal/advantage_pre_scale_abs_mean": 0.07906914353370667, "signal/advantage_pre_scale_std": 0.1164934977889061, "signal/advantage_std": 0.1164934977889061, "signal/brier_reward/centered_abs_mean": 0.13891042470932008, "signal/brier_reward/group_bin_occupancy": 0.863671875, "signal/brier_reward/group_std_mean": 0.17801333963871002, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01736380308866501, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01736380308866501, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.003165739495307207, "signal/frontier_aurc_reward/group_bin_occupancy": 0.707421875, "signal/frontier_aurc_reward/group_std_mean": 0.005288008600473404, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.6666734599275516e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.6666734599275516e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18679011166095733, "signal/frontier_coverage_1/group_bin_occupancy": 0.89921875, "signal/frontier_coverage_1/group_std_mean": 0.23579410016536712, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003343542804941535, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003343542804941535, "signal/frontier_coverage_10/centered_abs_mean": 0.18679011166095733, "signal/frontier_coverage_10/group_bin_occupancy": 0.89921875, "signal/frontier_coverage_10/group_std_mean": 0.23579410016536712, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003343542804941535, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003343542804941535, "signal/frontier_coverage_15/centered_abs_mean": 0.18679011166095733, "signal/frontier_coverage_15/group_bin_occupancy": 0.89921875, "signal/frontier_coverage_15/group_std_mean": 0.23579410016536712, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003343542804941535, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003343542804941535, "signal/frontier_coverage_20/centered_abs_mean": 0.18679011166095733, "signal/frontier_coverage_20/group_bin_occupancy": 0.89921875, "signal/frontier_coverage_20/group_std_mean": 0.23579410016536712, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003343542804941535, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003343542804941535, "signal/frontier_coverage_25/centered_abs_mean": 0.17116012871265412, "signal/frontier_coverage_25/group_bin_occupancy": 0.8921875, "signal/frontier_coverage_25/group_std_mean": 0.21680428385734557, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030637661926448344, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030637661926448344, "signal/frontier_coverage_5/centered_abs_mean": 0.18679011166095733, "signal/frontier_coverage_5/group_bin_occupancy": 0.89921875, "signal/frontier_coverage_5/group_std_mean": 0.23579410016536712, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003343542804941535, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003343542804941535, "signal/frontier_ece_reward/centered_abs_mean": 0.011517700739204884, "signal/frontier_ece_reward/group_bin_occupancy": 0.701171875, "signal/frontier_ece_reward/group_std_mean": 0.014944654144346713, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014397125924006104, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014397125924006104, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26602200865745546, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.748046875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34339261054992676, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03325275108218193, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03325275108218193, "step": 140 }, { "calibration/aurc": 0.4176907351056336, "calibration/batch_distribution_entropy": 0.9858941408112836, "calibration/batch_entropy_100bins": 0.9737808315282928, "calibration/batch_entropy_10bins": 0.9858941408112836, "calibration/batch_entropy_50bins": 0.9818068448300299, "calibration/batch_uniqueness": 0.9548906392026806, "calibration/buffer_distribution_entropy": 0.9989634378896353, "calibration/buffer_entropy_100bins": 0.9986770693112248, "calibration/buffer_entropy_10bins": 0.9989634378896353, "calibration/buffer_entropy_50bins": 0.9989321635624518, "calibration/confidence_entropy": 0.5133265562346525, "calibration/coverage@0%": 0.003910836594911937, "calibration/coverage@1%": 0.003910836594911937, "calibration/coverage@10%": 0.014478351272015655, "calibration/coverage@15%": 0.02074058219178082, "calibration/coverage@20%": 0.06727540973581213, "calibration/coverage@25%": 0.1275272137964775, "calibration/coverage@30%": 0.19516725782778865, "calibration/coverage@5%": 0.003910836594911937, "calibration/ece": 0.13511346945023558, "calibration/mean_confidence": 0.48324365347240567, "calibration/prompt_uniqueness": 0.8706947413989333, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 1045.6, "completions/max_terminated_length": 775.0, "completions/mean_length": 212.744140625, "completions/mean_terminated_length": 212.48613586425782, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.464, "grad_norm": 0.0010246856836602092, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 492496720.0, "reward": 0.8157252669334412, "reward_std": 0.09751812219619752, "rewards/accuracy_reward": 0.45556640625, "rewards/brier_reward": 0.7689092993736267, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.003909493004903198, "rewards/frontier_coverage_1": 0.13721695691347122, "rewards/frontier_coverage_10": 0.13721695691347122, "rewards/frontier_coverage_15": 0.13721695691347122, "rewards/frontier_coverage_20": 0.13721695691347122, "rewards/frontier_coverage_25": 0.126965943723917, "rewards/frontier_coverage_5": 0.13721695691347122, "rewards/frontier_ece_reward": 0.005434584524482489, "rewards/frontier_entropy_batch_reward": -0.18550443947315215, "signal/accuracy_reward/centered_abs_mean": 0.098260498046875, "signal/accuracy_reward/group_bin_occupancy": 0.175, "signal/accuracy_reward/group_std_mean": 0.13415665179491043, "signal/accuracy_reward/group_zero_std_frac": 0.6, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0491302490234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0491302490234375, "signal/advantage_abs_mean": 0.07577944248914718, "signal/advantage_pre_scale_abs_mean": 0.07577944248914718, "signal/advantage_pre_scale_std": 0.11422829777002334, "signal/advantage_std": 0.11422829777002334, "signal/brier_reward/centered_abs_mean": 0.14183038473129272, "signal/brier_reward/group_bin_occupancy": 0.86953125, "signal/brier_reward/group_std_mean": 0.17996921241283417, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01772879809141159, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01772879809141159, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.003397146938368678, "signal/frontier_aurc_reward/group_bin_occupancy": 0.71171875, "signal/frontier_aurc_reward/group_std_mean": 0.005560421571135521, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.080892926547676e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.080892926547676e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17900091111660005, "signal/frontier_coverage_1/group_bin_occupancy": 0.88984375, "signal/frontier_coverage_1/group_std_mean": 0.23006309568881989, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032041160855442287, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032041160855442287, "signal/frontier_coverage_10/centered_abs_mean": 0.17900091111660005, "signal/frontier_coverage_10/group_bin_occupancy": 0.88984375, "signal/frontier_coverage_10/group_std_mean": 0.23006309568881989, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032041160855442287, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032041160855442287, "signal/frontier_coverage_15/centered_abs_mean": 0.17900091111660005, "signal/frontier_coverage_15/group_bin_occupancy": 0.88984375, "signal/frontier_coverage_15/group_std_mean": 0.23006309568881989, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032041160855442287, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032041160855442287, "signal/frontier_coverage_20/centered_abs_mean": 0.17900091111660005, "signal/frontier_coverage_20/group_bin_occupancy": 0.88984375, "signal/frontier_coverage_20/group_std_mean": 0.23006309568881989, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032041160855442287, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032041160855442287, "signal/frontier_coverage_25/centered_abs_mean": 0.16153070628643035, "signal/frontier_coverage_25/group_bin_occupancy": 0.885546875, "signal/frontier_coverage_25/group_std_mean": 0.2079919010400772, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002891399711370468, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002891399711370468, "signal/frontier_coverage_5/centered_abs_mean": 0.17900091111660005, "signal/frontier_coverage_5/group_bin_occupancy": 0.88984375, "signal/frontier_coverage_5/group_std_mean": 0.23006309568881989, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032041160855442287, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032041160855442287, "signal/frontier_ece_reward/centered_abs_mean": 0.010474135167896748, "signal/frontier_ece_reward/group_bin_occupancy": 0.6859375, "signal/frontier_ece_reward/group_std_mean": 0.013670408725738525, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013092668959870934, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013092668959870934, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2555492341518402, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3300695061683655, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03194365426898003, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03194365426898003, "step": 145 }, { "calibration/aurc": 0.28135508395814696, "calibration/batch_distribution_entropy": 0.9787142260851314, "calibration/batch_entropy_100bins": 0.9686388153723096, "calibration/batch_entropy_10bins": 0.9787142260851314, "calibration/batch_entropy_50bins": 0.9765269277569176, "calibration/batch_uniqueness": 0.9529508764845552, "calibration/buffer_distribution_entropy": 0.9985915112971814, "calibration/buffer_entropy_100bins": 0.9985480404161521, "calibration/buffer_entropy_10bins": 0.9985915112971814, "calibration/buffer_entropy_50bins": 0.9987196165320225, "calibration/confidence_entropy": 0.4845285132161723, "calibration/coverage@0%": 0.003515625, "calibration/coverage@1%": 0.003515625, "calibration/coverage@10%": 0.049001225490196074, "calibration/coverage@15%": 0.09084405637254903, "calibration/coverage@20%": 0.3448452818627451, "calibration/coverage@25%": 0.47707720588235303, "calibration/coverage@30%": 0.587483149509804, "calibration/coverage@5%": 0.01371170343137255, "calibration/ece": 0.11707550508132455, "calibration/mean_confidence": 0.52203246818975, "calibration/prompt_uniqueness": 0.8567325810787374, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 1324.2, "completions/max_terminated_length": 653.6, "completions/mean_length": 213.4330078125, "completions/mean_terminated_length": 212.65774536132812, "completions/min_length": 95.2, "completions/min_terminated_length": 95.2, "epoch": 0.48, "grad_norm": 0.0011695589637383819, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 509730306.0, "reward": 0.8438830256462098, "reward_std": 0.1029052346944809, "rewards/accuracy_reward": 0.51396484375, "rewards/brier_reward": 0.7825690031051635, "rewards/format_reward": 0.99912109375, "rewards/frontier_aurc_reward": -0.0032245948910713195, "rewards/frontier_coverage_1": 0.11764285415410995, "rewards/frontier_coverage_10": 0.11764285415410995, "rewards/frontier_coverage_15": 0.11764285415410995, "rewards/frontier_coverage_20": 0.11763967126607895, "rewards/frontier_coverage_25": 0.10978160202503204, "rewards/frontier_coverage_5": 0.11764285415410995, "rewards/frontier_ece_reward": 0.007182773388922215, "rewards/frontier_entropy_batch_reward": -0.1905221700668335, "signal/accuracy_reward/centered_abs_mean": 0.122174072265625, "signal/accuracy_reward/group_bin_occupancy": 0.180859375, "signal/accuracy_reward/group_std_mean": 0.15882102251052857, "signal/accuracy_reward/group_zero_std_frac": 0.553125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0610870361328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0610870361328125, "signal/advantage_abs_mean": 0.07921299338340759, "signal/advantage_pre_scale_abs_mean": 0.07921299338340759, "signal/advantage_pre_scale_std": 0.1196265995502472, "signal/advantage_std": 0.1196265995502472, "signal/brier_reward/centered_abs_mean": 0.14188800156116485, "signal/brier_reward/group_bin_occupancy": 0.84609375, "signal/brier_reward/group_std_mean": 0.18255722522735596, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017736000195145606, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017736000195145606, "signal/format_reward/centered_abs_mean": 0.001690673828125, "signal/format_reward/group_bin_occupancy": 0.128125, "signal/format_reward/group_std_mean": 0.004635536018759013, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008453369140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008453369140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0032735579181462525, "signal/frontier_aurc_reward/group_bin_occupancy": 0.716015625, "signal/frontier_aurc_reward/group_std_mean": 0.0054481208324432375, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.859668599441648e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.859668599441648e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19356184601783752, "signal/frontier_coverage_1/group_bin_occupancy": 0.862109375, "signal/frontier_coverage_1/group_std_mean": 0.24662669599056244, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034647570922970773, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034647570922970773, "signal/frontier_coverage_10/centered_abs_mean": 0.19356184601783752, "signal/frontier_coverage_10/group_bin_occupancy": 0.862109375, "signal/frontier_coverage_10/group_std_mean": 0.24662669599056244, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034647570922970773, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034647570922970773, "signal/frontier_coverage_15/centered_abs_mean": 0.19356184601783752, "signal/frontier_coverage_15/group_bin_occupancy": 0.862109375, "signal/frontier_coverage_15/group_std_mean": 0.24662669599056244, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034647570922970773, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034647570922970773, "signal/frontier_coverage_20/centered_abs_mean": 0.19355521202087403, "signal/frontier_coverage_20/group_bin_occupancy": 0.862109375, "signal/frontier_coverage_20/group_std_mean": 0.24661834239959718, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003464638348668814, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003464638348668814, "signal/frontier_coverage_25/centered_abs_mean": 0.17109472453594207, "signal/frontier_coverage_25/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_25/group_std_mean": 0.21914859712123871, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030625955201685428, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030625955201685428, "signal/frontier_coverage_5/centered_abs_mean": 0.19356184601783752, "signal/frontier_coverage_5/group_bin_occupancy": 0.862109375, "signal/frontier_coverage_5/group_std_mean": 0.24662669599056244, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034647570922970773, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034647570922970773, "signal/frontier_ece_reward/centered_abs_mean": 0.010768765956163407, "signal/frontier_ece_reward/group_bin_occupancy": 0.670703125, "signal/frontier_ece_reward/group_std_mean": 0.013872439600527286, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013460957445204258, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013460957445204258, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26164351403713226, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.750390625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33615357279777525, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03270543925464153, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03270543925464153, "step": 150 }, { "epoch": 0.48, "eval_calibration/aurc": 0.501575210515319, "eval_calibration/batch_distribution_entropy": 0.9228523420660562, "eval_calibration/batch_entropy_100bins": 0.7070031781570842, "eval_calibration/batch_entropy_10bins": 0.9228523420660562, "eval_calibration/batch_entropy_50bins": 0.7799834900668859, "eval_calibration/batch_uniqueness": 0.896484375, "eval_calibration/buffer_distribution_entropy": 0.998397887423804, "eval_calibration/buffer_entropy_100bins": 0.9984792581609957, "eval_calibration/buffer_entropy_10bins": 0.998397887423804, "eval_calibration/buffer_entropy_50bins": 0.9986163202908288, "eval_calibration/confidence_entropy": 0.4588275516098055, "eval_calibration/coverage@0%": 0.0703125, "eval_calibration/coverage@1%": 0.0703125, "eval_calibration/coverage@10%": 0.0703125, "eval_calibration/coverage@15%": 0.1328125, "eval_calibration/coverage@20%": 0.140625, "eval_calibration/coverage@25%": 0.1484375, "eval_calibration/coverage@30%": 0.15625, "eval_calibration/coverage@5%": 0.0703125, "eval_calibration/ece": 0.24121247296918133, "eval_calibration/mean_confidence": 0.47050686804175734, "eval_calibration/prompt_uniqueness": 0.896484375, "eval_completions/clipped_ratio": 0.001953125, "eval_completions/max_length": 663.5, "eval_completions/max_terminated_length": 373.25, "eval_completions/mean_length": 217.01414489746094, "eval_completions/mean_terminated_length": 214.43864822387695, "eval_completions/min_length": 119.75, "eval_completions/min_terminated_length": 119.75, "eval_loss": 0.0, "eval_num_tokens": 509730306.0, "eval_reward": 0.6953702718019485, "eval_reward_std": 0.22602055966854095, "eval_rewards/accuracy_reward": 0.40234375, "eval_rewards/brier_reward": 0.7895104587078094, "eval_rewards/format_reward": 0.998046875, "eval_rewards/frontier_aurc_reward": -0.004140242876019329, "eval_rewards/frontier_coverage_1": 0.19547064229846, "eval_rewards/frontier_coverage_10": 0.19547064229846, "eval_rewards/frontier_coverage_15": 0.1954706460237503, "eval_rewards/frontier_coverage_20": 0.19546055048704147, "eval_rewards/frontier_coverage_25": 0.16091356799006462, "eval_rewards/frontier_coverage_5": 0.19547064229846, "eval_rewards/frontier_ece_reward": 0.007530742208473384, "eval_rewards/frontier_entropy_batch_reward": -0.998046875, "eval_runtime": 29.3853, "eval_samples_per_second": 17.015, "eval_signal/accuracy_reward/centered_abs_mean": 0.4638671875, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.48863568156957626, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23193359375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23193359375, "eval_signal/advantage_abs_mean": 0.20584385097026825, "eval_signal/advantage_pre_scale_abs_mean": 0.20584385097026825, "eval_signal/advantage_pre_scale_std": 0.2239415980875492, "eval_signal/advantage_std": 0.2239415980875492, "eval_signal/brier_reward/centered_abs_mean": 0.1952781230211258, "eval_signal/brier_reward/group_bin_occupancy": 0.921875, "eval_signal/brier_reward/group_std_mean": 0.24598337337374687, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024409765377640724, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.024409765377640724, "eval_signal/format_reward/centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/group_bin_occupancy": 0.1328125, "eval_signal/format_reward/group_std_mean": 0.011048543266952038, "eval_signal/format_reward/group_zero_std_frac": 0.9375, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0055227604461833835, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.703125, "eval_signal/frontier_aurc_reward/group_std_mean": 0.01063884247560054, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.88574065559078e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.88574065559078e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.33650386333465576, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_1/group_std_mean": 0.4147630110383034, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0060234187403693795, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0060234187403693795, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.33650386333465576, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_10/group_std_mean": 0.4147630110383034, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0060234187403693795, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0060234187403693795, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.33650386333465576, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_15/group_std_mean": 0.4147630110383034, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0060234187403693795, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0060234187403693795, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.33647605031728745, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_20/group_std_mean": 0.4147294908761978, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006022921064868569, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006022921064868569, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.2806103155016899, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_25/group_std_mean": 0.34814976155757904, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005022924277000129, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005022924277000129, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.33650386333465576, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_5/group_std_mean": 0.4147630110383034, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0060234187403693795, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0060234187403693795, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.013130403822287917, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.828125, "eval_signal/frontier_ece_reward/group_std_mean": 0.017393003683537245, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016413004777859896, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016413004777859896, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0037841796875, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.1328125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.011048543266952038, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9375, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "eval_signal/frontier_entropy_batch_reward/weight": 0.125, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0004730224609375, "eval_steps_per_second": 0.136, "step": 150 }, { "calibration/aurc": 0.3772412508603652, "calibration/batch_distribution_entropy": 0.9858011213469208, "calibration/batch_entropy_100bins": 0.9743770131797682, "calibration/batch_entropy_10bins": 0.9858011213469208, "calibration/batch_entropy_50bins": 0.981834889265462, "calibration/batch_uniqueness": 0.95457763671875, "calibration/buffer_distribution_entropy": 0.9983367910852584, "calibration/buffer_entropy_100bins": 0.9984834597266543, "calibration/buffer_entropy_10bins": 0.9983367910852584, "calibration/buffer_entropy_50bins": 0.9985992857477267, "calibration/confidence_entropy": 0.49097018484416743, "calibration/coverage@0%": 0.008984375, "calibration/coverage@1%": 0.008984375, "calibration/coverage@10%": 0.096484375, "calibration/coverage@15%": 0.157421875, "calibration/coverage@20%": 0.1921875, "calibration/coverage@25%": 0.24140625, "calibration/coverage@30%": 0.323046875, "calibration/coverage@5%": 0.04375, "calibration/ece": 0.1415806258155156, "calibration/mean_confidence": 0.5163618468152427, "calibration/prompt_uniqueness": 0.857763671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 549.8, "completions/max_terminated_length": 549.8, "completions/mean_length": 213.34248046875, "completions/mean_terminated_length": 213.34248046875, "completions/min_length": 96.4, "completions/min_terminated_length": 96.4, "epoch": 0.496, "grad_norm": 0.0009145310032181442, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 527222773.0, "reward": 0.8564480781555176, "reward_std": 0.09648310244083405, "rewards/accuracy_reward": 0.53896484375, "rewards/brier_reward": 0.7830032706260681, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.003137602610513568, "rewards/frontier_coverage_1": 0.0957273930311203, "rewards/frontier_coverage_10": 0.0957273930311203, "rewards/frontier_coverage_15": 0.09572737365961075, "rewards/frontier_coverage_20": 0.09494999200105667, "rewards/frontier_coverage_25": 0.073183061927557, "rewards/frontier_coverage_5": 0.0957273930311203, "rewards/frontier_ece_reward": 0.006351951695978642, "rewards/frontier_entropy_batch_reward": -0.17130873203277588, "signal/accuracy_reward/centered_abs_mean": 0.099688720703125, "signal/accuracy_reward/group_bin_occupancy": 0.175, "signal/accuracy_reward/group_std_mean": 0.13495108485221863, "signal/accuracy_reward/group_zero_std_frac": 0.6, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0498443603515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0498443603515625, "signal/advantage_abs_mean": 0.07473702281713486, "signal/advantage_pre_scale_abs_mean": 0.07473702281713486, "signal/advantage_pre_scale_std": 0.1126061201095581, "signal/advantage_std": 0.1126061201095581, "signal/brier_reward/centered_abs_mean": 0.13330003023147582, "signal/brier_reward/group_bin_occupancy": 0.850390625, "signal/brier_reward/group_std_mean": 0.17171771228313445, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016662503778934478, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016662503778934478, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.003219319973140955, "signal/frontier_aurc_reward/group_bin_occupancy": 0.708203125, "signal/frontier_aurc_reward/group_std_mean": 0.0053937271237373356, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.762582732131705e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.762582732131705e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17066286504268646, "signal/frontier_coverage_1/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_1/group_std_mean": 0.2204454004764557, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003054865123704076, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003054865123704076, "signal/frontier_coverage_10/centered_abs_mean": 0.17066286504268646, "signal/frontier_coverage_10/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_10/group_std_mean": 0.2204454004764557, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003054865123704076, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003054865123704076, "signal/frontier_coverage_15/centered_abs_mean": 0.17066278159618378, "signal/frontier_coverage_15/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_15/group_std_mean": 0.22044530212879182, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003054863726720214, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003054863726720214, "signal/frontier_coverage_20/centered_abs_mean": 0.16988441944122315, "signal/frontier_coverage_20/group_bin_occupancy": 0.8734375, "signal/frontier_coverage_20/group_std_mean": 0.21948779225349427, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003040931047871709, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003040931047871709, "signal/frontier_coverage_25/centered_abs_mean": 0.13763956129550933, "signal/frontier_coverage_25/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_25/group_std_mean": 0.17838802933692932, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024637479800730944, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024637479800730944, "signal/frontier_coverage_5/centered_abs_mean": 0.17066286504268646, "signal/frontier_coverage_5/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_5/group_std_mean": 0.2204454004764557, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003054865123704076, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003054865123704076, "signal/frontier_ece_reward/centered_abs_mean": 0.009669752418994903, "signal/frontier_ece_reward/group_bin_occupancy": 0.671484375, "signal/frontier_ece_reward/group_std_mean": 0.012576807662844658, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001208719052374363, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001208719052374363, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24946886897087098, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73828125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.32889034748077395, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031183608621358872, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031183608621358872, "step": 155 }, { "calibration/aurc": 0.3124343045271322, "calibration/batch_distribution_entropy": 0.9821167478401351, "calibration/batch_entropy_100bins": 0.9707625540350067, "calibration/batch_entropy_10bins": 0.9821167478401351, "calibration/batch_entropy_50bins": 0.9800767408019752, "calibration/batch_uniqueness": 0.953835017942229, "calibration/buffer_distribution_entropy": 0.9983246583833173, "calibration/buffer_entropy_100bins": 0.9985049021749475, "calibration/buffer_entropy_10bins": 0.9983246583833173, "calibration/buffer_entropy_50bins": 0.998608856762916, "calibration/confidence_entropy": 0.512842485457416, "calibration/coverage@0%": 0.02736744740704501, "calibration/coverage@1%": 0.02736744740704501, "calibration/coverage@10%": 0.22644936399217222, "calibration/coverage@15%": 0.33432378302348337, "calibration/coverage@20%": 0.3933394997553816, "calibration/coverage@25%": 0.45234375, "calibration/coverage@30%": 0.491796875, "calibration/coverage@5%": 0.12906525195694715, "calibration/ece": 0.14509790879365198, "calibration/mean_confidence": 0.49541147618364095, "calibration/prompt_uniqueness": 0.8591433077523414, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 916.0, "completions/max_terminated_length": 579.6, "completions/mean_length": 209.73173828125, "completions/mean_terminated_length": 209.47265625, "completions/min_length": 95.4, "completions/min_terminated_length": 95.4, "epoch": 0.512, "grad_norm": 0.0008832181338220835, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 544516090.0, "reward": 0.8589070677757263, "reward_std": 0.09946933686733246, "rewards/accuracy_reward": 0.5400390625, "rewards/brier_reward": 0.7991329669952393, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0025545242242515087, "rewards/frontier_coverage_1": 0.11153013110160828, "rewards/frontier_coverage_10": 0.11153013110160828, "rewards/frontier_coverage_15": 0.11152207553386688, "rewards/frontier_coverage_20": 0.11059406325221062, "rewards/frontier_coverage_25": 0.08714946508407592, "rewards/frontier_coverage_5": 0.11153013110160828, "rewards/frontier_ece_reward": 0.006665406748652458, "rewards/frontier_entropy_batch_reward": -0.1849699854850769, "signal/accuracy_reward/centered_abs_mean": 0.10582275390625, "signal/accuracy_reward/group_bin_occupancy": 0.178125, "signal/accuracy_reward/group_std_mean": 0.1435195803642273, "signal/accuracy_reward/group_zero_std_frac": 0.575, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052911376953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.052911376953125, "signal/advantage_abs_mean": 0.0774817332625389, "signal/advantage_pre_scale_abs_mean": 0.0774817332625389, "signal/advantage_pre_scale_std": 0.11714037954807281, "signal/advantage_std": 0.11714037954807281, "signal/brier_reward/centered_abs_mean": 0.1255343437194824, "signal/brier_reward/group_bin_occupancy": 0.844140625, "signal/brier_reward/group_std_mean": 0.16391099691390992, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0156917929649353, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0156917929649353, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027026119641959667, "signal/frontier_aurc_reward/group_bin_occupancy": 0.712109375, "signal/frontier_aurc_reward/group_std_mean": 0.004470287868753075, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8376752238254996e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8376752238254996e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16258499324321746, "signal/frontier_coverage_1/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_1/group_std_mean": 0.21444275677204133, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002910271333530545, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002910271333530545, "signal/frontier_coverage_10/centered_abs_mean": 0.16258499324321746, "signal/frontier_coverage_10/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_10/group_std_mean": 0.21444275677204133, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002910271333530545, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002910271333530545, "signal/frontier_coverage_15/centered_abs_mean": 0.16256897747516633, "signal/frontier_coverage_15/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_15/group_std_mean": 0.2144217312335968, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029099844861775635, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029099844861775635, "signal/frontier_coverage_20/centered_abs_mean": 0.16080774068832399, "signal/frontier_coverage_20/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_20/group_std_mean": 0.21214835047721864, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00287845847196877, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00287845847196877, "signal/frontier_coverage_25/centered_abs_mean": 0.11882531195878983, "signal/frontier_coverage_25/group_bin_occupancy": 0.858984375, "signal/frontier_coverage_25/group_std_mean": 0.157957324385643, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021269729593768718, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021269729593768718, "signal/frontier_coverage_5/centered_abs_mean": 0.16258499324321746, "signal/frontier_coverage_5/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_5/group_std_mean": 0.21444275677204133, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002910271333530545, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002910271333530545, "signal/frontier_ece_reward/centered_abs_mean": 0.008545207604765893, "signal/frontier_ece_reward/group_bin_occupancy": 0.658203125, "signal/frontier_ece_reward/group_std_mean": 0.011142410896718502, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010681509505957366, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010681509505957366, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2622275412082672, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.720703125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34102784395217894, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0327784426510334, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0327784426510334, "step": 160 }, { "calibration/aurc": 0.22197753458783573, "calibration/batch_distribution_entropy": 0.9872636635687544, "calibration/batch_entropy_100bins": 0.9717745831016632, "calibration/batch_entropy_10bins": 0.9872636635687544, "calibration/batch_entropy_50bins": 0.9817316279365228, "calibration/batch_uniqueness": 0.9543426513671875, "calibration/buffer_distribution_entropy": 0.998607875057014, "calibration/buffer_entropy_100bins": 0.9986924219957551, "calibration/buffer_entropy_10bins": 0.998607875057014, "calibration/buffer_entropy_50bins": 0.9987976735559277, "calibration/confidence_entropy": 0.484241145901464, "calibration/coverage@0%": 0.02734375, "calibration/coverage@1%": 0.02734375, "calibration/coverage@10%": 0.273046875, "calibration/coverage@15%": 0.334375, "calibration/coverage@20%": 0.46015625, "calibration/coverage@25%": 0.627734375, "calibration/coverage@30%": 0.740234375, "calibration/coverage@5%": 0.116796875, "calibration/ece": 0.11655803249046533, "calibration/mean_confidence": 0.5053498338953919, "calibration/prompt_uniqueness": 0.849365234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 1177.8, "completions/max_terminated_length": 759.8, "completions/mean_length": 211.98857421875, "completions/mean_terminated_length": 211.72989807128906, "completions/min_length": 104.4, "completions/min_terminated_length": 104.4, "epoch": 0.528, "grad_norm": 0.0009716249769553542, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 561716389.0, "reward": 0.8563125967979431, "reward_std": 0.09635183066129685, "rewards/accuracy_reward": 0.5341796875, "rewards/brier_reward": 0.8004718899726868, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0024792027892544866, "rewards/frontier_coverage_1": 0.12539061158895493, "rewards/frontier_coverage_10": 0.12539061158895493, "rewards/frontier_coverage_15": 0.12535874992609025, "rewards/frontier_coverage_20": 0.12457017600536346, "rewards/frontier_coverage_25": 0.0941608265042305, "rewards/frontier_coverage_5": 0.12539061158895493, "rewards/frontier_ece_reward": 0.0064132180996239185, "rewards/frontier_entropy_batch_reward": -0.19510821104049683, "signal/accuracy_reward/centered_abs_mean": 0.11046142578125, "signal/accuracy_reward/group_bin_occupancy": 0.176953125, "signal/accuracy_reward/group_std_mean": 0.1455621302127838, "signal/accuracy_reward/group_zero_std_frac": 0.584375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.055230712890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.055230712890625, "signal/advantage_abs_mean": 0.07561022490262985, "signal/advantage_pre_scale_abs_mean": 0.07561022490262985, "signal/advantage_pre_scale_std": 0.11381375342607498, "signal/advantage_std": 0.11381375342607498, "signal/brier_reward/centered_abs_mean": 0.12420621514320374, "signal/brier_reward/group_bin_occupancy": 0.838671875, "signal/brier_reward/group_std_mean": 0.16060249507427216, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015525776892900467, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015525776892900467, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002632760489359498, "signal/frontier_aurc_reward/group_bin_occupancy": 0.712109375, "signal/frontier_aurc_reward/group_std_mean": 0.004449516860768199, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.712641093647107e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.712641093647107e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.178127783536911, "signal/frontier_coverage_1/group_bin_occupancy": 0.872265625, "signal/frontier_coverage_1/group_std_mean": 0.2289435774087906, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031884873285889627, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031884873285889627, "signal/frontier_coverage_10/centered_abs_mean": 0.178127783536911, "signal/frontier_coverage_10/group_bin_occupancy": 0.872265625, "signal/frontier_coverage_10/group_std_mean": 0.2289435774087906, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031884873285889627, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031884873285889627, "signal/frontier_coverage_15/centered_abs_mean": 0.17806898057460785, "signal/frontier_coverage_15/group_bin_occupancy": 0.872265625, "signal/frontier_coverage_15/group_std_mean": 0.2288702607154846, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003187434747815132, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003187434747815132, "signal/frontier_coverage_20/centered_abs_mean": 0.17593927085399627, "signal/frontier_coverage_20/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_20/group_std_mean": 0.22620816826820372, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003149312874302268, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003149312874302268, "signal/frontier_coverage_25/centered_abs_mean": 0.1212777316570282, "signal/frontier_coverage_25/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_25/group_std_mean": 0.15683144927024842, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021708713844418525, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021708713844418525, "signal/frontier_coverage_5/centered_abs_mean": 0.178127783536911, "signal/frontier_coverage_5/group_bin_occupancy": 0.872265625, "signal/frontier_coverage_5/group_std_mean": 0.2289435774087906, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031884873285889627, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031884873285889627, "signal/frontier_ece_reward/centered_abs_mean": 0.007738732825964689, "signal/frontier_ece_reward/group_bin_occupancy": 0.648046875, "signal/frontier_ece_reward/group_std_mean": 0.009942644834518432, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009673416032455861, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009673416032455861, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2633429080247879, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7296875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3415177345275879, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032917863503098485, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032917863503098485, "step": 165 }, { "calibration/aurc": 0.2246998145590191, "calibration/batch_distribution_entropy": 0.9828267726341278, "calibration/batch_entropy_100bins": 0.9699359138709003, "calibration/batch_entropy_10bins": 0.9828267726341278, "calibration/batch_entropy_50bins": 0.9776825192493742, "calibration/batch_uniqueness": 0.9534942626953125, "calibration/buffer_distribution_entropy": 0.9986446644020803, "calibration/buffer_entropy_100bins": 0.9987341361318449, "calibration/buffer_entropy_10bins": 0.9986446644020803, "calibration/buffer_entropy_50bins": 0.9988464842427364, "calibration/confidence_entropy": 0.4779419414178685, "calibration/coverage@0%": 0.05390625, "calibration/coverage@1%": 0.058984375, "calibration/coverage@10%": 0.214453125, "calibration/coverage@15%": 0.3390625, "calibration/coverage@20%": 0.50390625, "calibration/coverage@25%": 0.621875, "calibration/coverage@30%": 0.729296875, "calibration/coverage@5%": 0.1109375, "calibration/ece": 0.09684584717108229, "calibration/mean_confidence": 0.5264456085979624, "calibration/prompt_uniqueness": 0.85, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 761.4, "completions/max_terminated_length": 552.0, "completions/mean_length": 216.10869140625, "completions/mean_terminated_length": 215.97977294921876, "completions/min_length": 102.8, "completions/min_terminated_length": 102.8, "epoch": 0.544, "grad_norm": 0.0008750662091188133, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 579092926.0, "reward": 0.865496826171875, "reward_std": 0.10031740814447403, "rewards/accuracy_reward": 0.56474609375, "rewards/brier_reward": 0.7900220870971679, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002628612471744418, "rewards/frontier_coverage_1": 0.08344129025936127, "rewards/frontier_coverage_10": 0.08344129025936127, "rewards/frontier_coverage_15": 0.08342344760894775, "rewards/frontier_coverage_20": 0.08116559684276581, "rewards/frontier_coverage_25": 0.059722674638032915, "rewards/frontier_coverage_5": 0.08344129025936127, "rewards/frontier_ece_reward": 0.005261074285954237, "rewards/frontier_entropy_batch_reward": -0.19749387800693513, "signal/accuracy_reward/centered_abs_mean": 0.117864990234375, "signal/accuracy_reward/group_bin_occupancy": 0.1828125, "signal/accuracy_reward/group_std_mean": 0.15820194482803346, "signal/accuracy_reward/group_zero_std_frac": 0.5375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0589324951171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0589324951171875, "signal/advantage_abs_mean": 0.07773261219263077, "signal/advantage_pre_scale_abs_mean": 0.07773261219263077, "signal/advantage_pre_scale_std": 0.11587968170642853, "signal/advantage_std": 0.11587968170642853, "signal/brier_reward/centered_abs_mean": 0.1324952781200409, "signal/brier_reward/group_bin_occupancy": 0.84609375, "signal/brier_reward/group_std_mean": 0.17098439037799834, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01656190976500511, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01656190976500511, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027288103476166723, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6984375, "signal/frontier_aurc_reward/group_std_mean": 0.004612684063613415, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.884570444119163e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.884570444119163e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17990356385707856, "signal/frontier_coverage_1/group_bin_occupancy": 0.859765625, "signal/frontier_coverage_1/group_std_mean": 0.23137963116168975, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032202736940234898, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032202736940234898, "signal/frontier_coverage_10/centered_abs_mean": 0.17990356385707856, "signal/frontier_coverage_10/group_bin_occupancy": 0.859765625, "signal/frontier_coverage_10/group_std_mean": 0.23137963116168975, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032202736940234898, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032202736940234898, "signal/frontier_coverage_15/centered_abs_mean": 0.1798170268535614, "signal/frontier_coverage_15/group_bin_occupancy": 0.859375, "signal/frontier_coverage_15/group_std_mean": 0.2312684863805771, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032187245786190035, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032187245786190035, "signal/frontier_coverage_20/centered_abs_mean": 0.17393405735492706, "signal/frontier_coverage_20/group_bin_occupancy": 0.856640625, "signal/frontier_coverage_20/group_std_mean": 0.22385527491569518, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031134195160120726, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031134195160120726, "signal/frontier_coverage_25/centered_abs_mean": 0.11192511320114136, "signal/frontier_coverage_25/group_bin_occupancy": 0.86171875, "signal/frontier_coverage_25/group_std_mean": 0.14527169466018677, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020034594694152475, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020034594694152475, "signal/frontier_coverage_5/centered_abs_mean": 0.17990356385707856, "signal/frontier_coverage_5/group_bin_occupancy": 0.859765625, "signal/frontier_coverage_5/group_std_mean": 0.23137963116168975, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032202736940234898, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032202736940234898, "signal/frontier_ece_reward/centered_abs_mean": 0.007925903517752885, "signal/frontier_ece_reward/group_bin_occupancy": 0.65703125, "signal/frontier_ece_reward/group_std_mean": 0.010128208808600903, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009907379397191107, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009907379397191107, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2713680982589722, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.741796875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34780768752098085, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03392101228237152, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03392101228237152, "step": 170 }, { "calibration/aurc": 0.25887046651992324, "calibration/batch_distribution_entropy": 0.9862801195014452, "calibration/batch_entropy_100bins": 0.9684416178563928, "calibration/batch_entropy_10bins": 0.9862801195014452, "calibration/batch_entropy_50bins": 0.9790974340217475, "calibration/batch_uniqueness": 0.953765869140625, "calibration/buffer_distribution_entropy": 0.99843546825646, "calibration/buffer_entropy_100bins": 0.998662413000743, "calibration/buffer_entropy_10bins": 0.99843546825646, "calibration/buffer_entropy_50bins": 0.9987507257151149, "calibration/confidence_entropy": 0.48703887912051763, "calibration/coverage@0%": 0.038671875, "calibration/coverage@1%": 0.103125, "calibration/coverage@10%": 0.23203125, "calibration/coverage@15%": 0.319921875, "calibration/coverage@20%": 0.375, "calibration/coverage@25%": 0.4625, "calibration/coverage@30%": 0.621875, "calibration/coverage@5%": 0.170703125, "calibration/ece": 0.11540190639652079, "calibration/mean_confidence": 0.4965868130535844, "calibration/prompt_uniqueness": 0.851806640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 526.8, "completions/max_terminated_length": 526.8, "completions/mean_length": 215.82470703125, "completions/mean_terminated_length": 215.82470703125, "completions/min_length": 106.2, "completions/min_terminated_length": 106.2, "epoch": 0.56, "grad_norm": 0.0008583422750234604, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 596124379.0, "reward": 0.8527642726898194, "reward_std": 0.09059911817312241, "rewards/accuracy_reward": 0.526953125, "rewards/brier_reward": 0.8009752631187439, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002781519223935902, "rewards/frontier_coverage_1": 0.12176511883735656, "rewards/frontier_coverage_10": 0.12176511883735656, "rewards/frontier_coverage_15": 0.12170367538928986, "rewards/frontier_coverage_20": 0.11667201519012452, "rewards/frontier_coverage_25": 0.07989428639411926, "rewards/frontier_coverage_5": 0.12176511883735656, "rewards/frontier_ece_reward": 0.00570831261575222, "rewards/frontier_entropy_batch_reward": -0.1898701012134552, "signal/accuracy_reward/centered_abs_mean": 0.09024658203125, "signal/accuracy_reward/group_bin_occupancy": 0.173828125, "signal/accuracy_reward/group_std_mean": 0.1275490090250969, "signal/accuracy_reward/group_zero_std_frac": 0.609375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045123291015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.045123291015625, "signal/advantage_abs_mean": 0.06931805834174157, "signal/advantage_pre_scale_abs_mean": 0.06931805834174157, "signal/advantage_pre_scale_std": 0.10619149655103684, "signal/advantage_std": 0.10619149655103684, "signal/brier_reward/centered_abs_mean": 0.12430946081876755, "signal/brier_reward/group_bin_occupancy": 0.854296875, "signal/brier_reward/group_std_mean": 0.16047678291797637, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015538682602345944, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015538682602345944, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028530734591186045, "signal/frontier_aurc_reward/group_bin_occupancy": 0.723828125, "signal/frontier_aurc_reward/group_std_mean": 0.004656852129846812, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.107001634314656e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.107001634314656e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16476930677890778, "signal/frontier_coverage_1/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_1/group_std_mean": 0.2144735872745514, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029493705835193394, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029493705835193394, "signal/frontier_coverage_10/centered_abs_mean": 0.16476930677890778, "signal/frontier_coverage_10/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_10/group_std_mean": 0.2144735872745514, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029493705835193394, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029493705835193394, "signal/frontier_coverage_15/centered_abs_mean": 0.16467354595661163, "signal/frontier_coverage_15/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_15/group_std_mean": 0.21435152888298034, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002947656437754631, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002947656437754631, "signal/frontier_coverage_20/centered_abs_mean": 0.15480645895004272, "signal/frontier_coverage_20/group_bin_occupancy": 0.87578125, "signal/frontier_coverage_20/group_std_mean": 0.20163175463676453, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027710356283932925, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027710356283932925, "signal/frontier_coverage_25/centered_abs_mean": 0.09889017939567565, "signal/frontier_coverage_25/group_bin_occupancy": 0.875390625, "signal/frontier_coverage_25/group_std_mean": 0.1293856218457222, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017701340839266777, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017701340839266777, "signal/frontier_coverage_5/centered_abs_mean": 0.16476930677890778, "signal/frontier_coverage_5/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_5/group_std_mean": 0.2144735872745514, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029493705835193394, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029493705835193394, "signal/frontier_ece_reward/centered_abs_mean": 0.007086984347552061, "signal/frontier_ece_reward/group_bin_occupancy": 0.631640625, "signal/frontier_ece_reward/group_std_mean": 0.009137248806655406, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008858730434440076, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008858730434440076, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25609534978866577, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.743359375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3285098135471344, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03201191872358322, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03201191872358322, "step": 175 }, { "calibration/aurc": 0.31203197751817957, "calibration/batch_distribution_entropy": 0.9843787104200963, "calibration/batch_entropy_100bins": 0.9705130031573705, "calibration/batch_entropy_10bins": 0.9843787104200963, "calibration/batch_entropy_50bins": 0.9781790741680929, "calibration/batch_uniqueness": 0.953887939453125, "calibration/buffer_distribution_entropy": 0.9986194552567443, "calibration/buffer_entropy_100bins": 0.9987917994612259, "calibration/buffer_entropy_10bins": 0.9986194552567443, "calibration/buffer_entropy_50bins": 0.998877645743185, "calibration/confidence_entropy": 0.49376287006709063, "calibration/coverage@0%": 0.011328125, "calibration/coverage@1%": 0.011328125, "calibration/coverage@10%": 0.125390625, "calibration/coverage@15%": 0.211328125, "calibration/coverage@20%": 0.283203125, "calibration/coverage@25%": 0.368359375, "calibration/coverage@30%": 0.49375, "calibration/coverage@5%": 0.066015625, "calibration/ece": 0.08653007786443709, "calibration/mean_confidence": 0.49493694107697583, "calibration/prompt_uniqueness": 0.851953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 759.8, "completions/max_terminated_length": 546.6, "completions/mean_length": 215.15576171875, "completions/mean_terminated_length": 215.0267578125, "completions/min_length": 105.2, "completions/min_terminated_length": 105.2, "epoch": 0.576, "grad_norm": 0.000952628324739635, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 613514198.0, "reward": 0.8436745047569275, "reward_std": 0.09130887687206268, "rewards/accuracy_reward": 0.51513671875, "rewards/brier_reward": 0.7896932244300843, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.003295312123373151, "rewards/frontier_coverage_1": 0.11995794028043746, "rewards/frontier_coverage_10": 0.11995794028043746, "rewards/frontier_coverage_15": 0.11995095014572144, "rewards/frontier_coverage_20": 0.11125928610563278, "rewards/frontier_coverage_25": 0.07660634070634842, "rewards/frontier_coverage_5": 0.11995794028043746, "rewards/frontier_ece_reward": 0.004759292863309383, "rewards/frontier_entropy_batch_reward": -0.19996359348297119, "signal/accuracy_reward/centered_abs_mean": 0.090301513671875, "signal/accuracy_reward/group_bin_occupancy": 0.173828125, "signal/accuracy_reward/group_std_mean": 0.1269981548190117, "signal/accuracy_reward/group_zero_std_frac": 0.609375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0451507568359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0451507568359375, "signal/advantage_abs_mean": 0.06951765716075897, "signal/advantage_pre_scale_abs_mean": 0.06951765716075897, "signal/advantage_pre_scale_std": 0.10639693737030029, "signal/advantage_std": 0.10639693737030029, "signal/brier_reward/centered_abs_mean": 0.12400663793087005, "signal/brier_reward/group_bin_occupancy": 0.833203125, "signal/brier_reward/group_std_mean": 0.16084616780281066, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015500829741358756, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015500829741358756, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0031059539876878263, "signal/frontier_aurc_reward/group_bin_occupancy": 0.695703125, "signal/frontier_aurc_reward/group_std_mean": 0.005166613683104515, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.5596576567040755e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.5596576567040755e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16344794929027556, "signal/frontier_coverage_1/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_1/group_std_mean": 0.21142135560512543, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029257182497531174, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029257182497531174, "signal/frontier_coverage_10/centered_abs_mean": 0.16344794929027556, "signal/frontier_coverage_10/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_10/group_std_mean": 0.21142135560512543, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029257182497531174, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029257182497531174, "signal/frontier_coverage_15/centered_abs_mean": 0.16335625648498536, "signal/frontier_coverage_15/group_bin_occupancy": 0.860546875, "signal/frontier_coverage_15/group_std_mean": 0.2113026887178421, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029240769799798727, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029240769799798727, "signal/frontier_coverage_20/centered_abs_mean": 0.15308941304683685, "signal/frontier_coverage_20/group_bin_occupancy": 0.86015625, "signal/frontier_coverage_20/group_std_mean": 0.19825595915317534, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002740300307050347, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002740300307050347, "signal/frontier_coverage_25/centered_abs_mean": 0.09508876204490661, "signal/frontier_coverage_25/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_25/group_std_mean": 0.12375225573778152, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017020887462422252, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017020887462422252, "signal/frontier_coverage_5/centered_abs_mean": 0.16344794929027556, "signal/frontier_coverage_5/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_5/group_std_mean": 0.21142135560512543, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029257182497531174, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029257182497531174, "signal/frontier_ece_reward/centered_abs_mean": 0.006654571555554867, "signal/frontier_ece_reward/group_bin_occupancy": 0.634375, "signal/frontier_ece_reward/group_std_mean": 0.008567211776971817, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008318214444443583, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008318214444443583, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2636649996042252, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72421875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3369534254074097, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03295812495052815, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03295812495052815, "step": 180 }, { "calibration/aurc": 0.2965379623783643, "calibration/batch_distribution_entropy": 0.9852064048622537, "calibration/batch_entropy_100bins": 0.9720746322070163, "calibration/batch_entropy_10bins": 0.9852064048622537, "calibration/batch_entropy_50bins": 0.980828527810799, "calibration/batch_uniqueness": 0.9545549219314161, "calibration/buffer_distribution_entropy": 0.9986850434371703, "calibration/buffer_entropy_100bins": 0.9988615389589619, "calibration/buffer_entropy_10bins": 0.9986850434371703, "calibration/buffer_entropy_50bins": 0.9989465814800556, "calibration/confidence_entropy": 0.49104615217744146, "calibration/coverage@0%": 0.0344017551369863, "calibration/coverage@1%": 0.0344017551369863, "calibration/coverage@10%": 0.20481057363013697, "calibration/coverage@15%": 0.3334041707436399, "calibration/coverage@20%": 0.46352281066536205, "calibration/coverage@25%": 0.5444043542074364, "calibration/coverage@30%": 0.6159162487769081, "calibration/coverage@5%": 0.0793503852739726, "calibration/ece": 0.12458582109149696, "calibration/mean_confidence": 0.493458359695666, "calibration/prompt_uniqueness": 0.8485828157518209, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 720.4, "completions/max_terminated_length": 523.0, "completions/mean_length": 209.785546875, "completions/mean_terminated_length": 209.65625915527343, "completions/min_length": 99.2, "completions/min_terminated_length": 99.2, "epoch": 0.592, "grad_norm": 0.0010041077621281147, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 630830114.0, "reward": 0.8443014621734619, "reward_std": 0.09108059257268905, "rewards/accuracy_reward": 0.5109375, "rewards/brier_reward": 0.7945732355117798, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.003106560418382287, "rewards/frontier_coverage_1": 0.1313086122274399, "rewards/frontier_coverage_10": 0.1313086122274399, "rewards/frontier_coverage_15": 0.13125890344381333, "rewards/frontier_coverage_20": 0.12137791365385056, "rewards/frontier_coverage_25": 0.07633394300937653, "rewards/frontier_coverage_5": 0.1313086122274399, "rewards/frontier_ece_reward": 0.005067649204283953, "rewards/frontier_entropy_batch_reward": -0.19127190113067627, "signal/accuracy_reward/centered_abs_mean": 0.1004638671875, "signal/accuracy_reward/group_bin_occupancy": 0.17265625, "signal/accuracy_reward/group_std_mean": 0.13193922638893127, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05023193359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05023193359375, "signal/advantage_abs_mean": 0.0713750347495079, "signal/advantage_pre_scale_abs_mean": 0.0713750347495079, "signal/advantage_pre_scale_std": 0.10935924351215362, "signal/advantage_std": 0.10935924351215362, "signal/brier_reward/centered_abs_mean": 0.12105749100446701, "signal/brier_reward/group_bin_occupancy": 0.844140625, "signal/brier_reward/group_std_mean": 0.15631654858589172, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015132186375558377, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015132186375558377, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.003012161422520876, "signal/frontier_aurc_reward/group_bin_occupancy": 0.707421875, "signal/frontier_aurc_reward/group_std_mean": 0.005084943398833275, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.3917687182547525e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.3917687182547525e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17011265456676483, "signal/frontier_coverage_1/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_1/group_std_mean": 0.21760738790035247, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003045016434043646, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003045016434043646, "signal/frontier_coverage_10/centered_abs_mean": 0.17011265456676483, "signal/frontier_coverage_10/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_10/group_std_mean": 0.21760738790035247, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003045016434043646, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003045016434043646, "signal/frontier_coverage_15/centered_abs_mean": 0.17000848054885864, "signal/frontier_coverage_15/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_15/group_std_mean": 0.21747492849826813, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003043151693418622, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003043151693418622, "signal/frontier_coverage_20/centered_abs_mean": 0.15837956964969635, "signal/frontier_coverage_20/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_20/group_std_mean": 0.2029614955186844, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028349942062050105, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028349942062050105, "signal/frontier_coverage_25/centered_abs_mean": 0.09450580030679703, "signal/frontier_coverage_25/group_bin_occupancy": 0.884375, "signal/frontier_coverage_25/group_std_mean": 0.12207887768745422, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016916538355872036, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016916538355872036, "signal/frontier_coverage_5/centered_abs_mean": 0.17011265456676483, "signal/frontier_coverage_5/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_5/group_std_mean": 0.21760738790035247, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003045016434043646, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003045016434043646, "signal/frontier_ece_reward/centered_abs_mean": 0.006299029383808374, "signal/frontier_ece_reward/group_bin_occupancy": 0.619140625, "signal/frontier_ece_reward/group_std_mean": 0.008179245609790086, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007873786729760468, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007873786729760468, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26203358769416807, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.737890625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33473817110061643, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03275419846177101, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03275419846177101, "step": 185 }, { "calibration/aurc": 0.23255238602768316, "calibration/batch_distribution_entropy": 0.9705031192858022, "calibration/batch_entropy_100bins": 0.9659821492408802, "calibration/batch_entropy_10bins": 0.9705031192858022, "calibration/batch_entropy_50bins": 0.9722374803618952, "calibration/batch_uniqueness": 0.951416015625, "calibration/buffer_distribution_entropy": 0.9986525167285963, "calibration/buffer_entropy_100bins": 0.998873465649002, "calibration/buffer_entropy_10bins": 0.9986525167285963, "calibration/buffer_entropy_50bins": 0.9989401550363587, "calibration/confidence_entropy": 0.4738903778069483, "calibration/coverage@0%": 0.066015625, "calibration/coverage@1%": 0.067578125, "calibration/coverage@10%": 0.28515625, "calibration/coverage@15%": 0.3921875, "calibration/coverage@20%": 0.471875, "calibration/coverage@25%": 0.570703125, "calibration/coverage@30%": 0.68125, "calibration/coverage@5%": 0.166015625, "calibration/ece": 0.09631291270696571, "calibration/mean_confidence": 0.4673449272543544, "calibration/prompt_uniqueness": 0.84677734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 756.8, "completions/max_terminated_length": 747.8, "completions/mean_length": 205.48544921875, "completions/mean_terminated_length": 205.35531005859374, "completions/min_length": 100.4, "completions/min_terminated_length": 100.4, "epoch": 0.608, "grad_norm": 0.000828551419544965, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 647933773.0, "reward": 0.8504921317100524, "reward_std": 0.08589145988225937, "rewards/accuracy_reward": 0.51875, "rewards/brier_reward": 0.8136067509651184, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002254353184252977, "rewards/frontier_coverage_1": 0.1501113146543503, "rewards/frontier_coverage_10": 0.1501113146543503, "rewards/frontier_coverage_15": 0.1499549314379692, "rewards/frontier_coverage_20": 0.13523284196853638, "rewards/frontier_coverage_25": 0.08837753832340241, "rewards/frontier_coverage_5": 0.1501113146543503, "rewards/frontier_ece_reward": 0.005275832582265138, "rewards/frontier_entropy_batch_reward": -0.20682401657104493, "signal/accuracy_reward/centered_abs_mean": 0.0917724609375, "signal/accuracy_reward/group_bin_occupancy": 0.16796875, "signal/accuracy_reward/group_std_mean": 0.12107746154069901, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04588623046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04588623046875, "signal/advantage_abs_mean": 0.06623770222067833, "signal/advantage_pre_scale_abs_mean": 0.06623770222067833, "signal/advantage_pre_scale_std": 0.10089752227067947, "signal/advantage_std": 0.10089752227067947, "signal/brier_reward/centered_abs_mean": 0.11607680916786194, "signal/brier_reward/group_bin_occupancy": 0.835546875, "signal/brier_reward/group_std_mean": 0.14877235889434814, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014509601145982742, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014509601145982742, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020517975790426135, "signal/frontier_aurc_reward/group_bin_occupancy": 0.727734375, "signal/frontier_aurc_reward/group_std_mean": 0.003500781860202551, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.672717430163175e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.672717430163175e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17476378679275512, "signal/frontier_coverage_1/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_1/group_std_mean": 0.2204089343547821, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031282717362046243, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031282717362046243, "signal/frontier_coverage_10/centered_abs_mean": 0.17476378679275512, "signal/frontier_coverage_10/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_10/group_std_mean": 0.2204089343547821, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031282717362046243, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031282717362046243, "signal/frontier_coverage_15/centered_abs_mean": 0.17451978027820586, "signal/frontier_coverage_15/group_bin_occupancy": 0.8765625, "signal/frontier_coverage_15/group_std_mean": 0.22010447680950165, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003123903926461935, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003123903926461935, "signal/frontier_coverage_20/centered_abs_mean": 0.15353093445301055, "signal/frontier_coverage_20/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_20/group_std_mean": 0.19391053915023804, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027482036035507917, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027482036035507917, "signal/frontier_coverage_25/centered_abs_mean": 0.09261592626571655, "signal/frontier_coverage_25/group_bin_occupancy": 0.8921875, "signal/frontier_coverage_25/group_std_mean": 0.1174243450164795, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016578249633312225, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016578249633312225, "signal/frontier_coverage_5/centered_abs_mean": 0.17476378679275512, "signal/frontier_coverage_5/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_5/group_std_mean": 0.2204089343547821, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031282717362046243, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031282717362046243, "signal/frontier_ece_reward/centered_abs_mean": 0.0058389359153807165, "signal/frontier_ece_reward/group_bin_occupancy": 0.5984375, "signal/frontier_ece_reward/group_std_mean": 0.007358342409133911, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007298669894225896, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007298669894225896, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27921711802482607, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.740234375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3562686026096344, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03490213975310326, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03490213975310326, "step": 190 }, { "calibration/aurc": 0.24642497306977423, "calibration/batch_distribution_entropy": 0.9901574898144636, "calibration/batch_entropy_100bins": 0.9759458013037052, "calibration/batch_entropy_10bins": 0.9901574898144636, "calibration/batch_entropy_50bins": 0.9836366792813663, "calibration/batch_uniqueness": 0.955316162109375, "calibration/buffer_distribution_entropy": 0.9987390546134606, "calibration/buffer_entropy_100bins": 0.9989018115142876, "calibration/buffer_entropy_10bins": 0.9987390546134606, "calibration/buffer_entropy_50bins": 0.9989672677498665, "calibration/confidence_entropy": 0.5117506475315936, "calibration/coverage@0%": 0.030859375, "calibration/coverage@1%": 0.030859375, "calibration/coverage@10%": 0.20859375, "calibration/coverage@15%": 0.33515625, "calibration/coverage@20%": 0.439453125, "calibration/coverage@25%": 0.546875, "calibration/coverage@30%": 0.626171875, "calibration/coverage@5%": 0.05625, "calibration/ece": 0.1060162229629203, "calibration/mean_confidence": 0.48300620790316096, "calibration/prompt_uniqueness": 0.856640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 878.8, "completions/max_terminated_length": 470.2, "completions/mean_length": 202.49697265625, "completions/mean_terminated_length": 202.23670043945313, "completions/min_length": 101.4, "completions/min_terminated_length": 101.4, "epoch": 0.624, "grad_norm": 0.0011328250402584672, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 665351246.0, "reward": 0.8511114478111267, "reward_std": 0.0926138237118721, "rewards/accuracy_reward": 0.52197265625, "rewards/brier_reward": 0.8015788435935974, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.002650001086294651, "rewards/frontier_coverage_1": 0.1292887583374977, "rewards/frontier_coverage_10": 0.12927541583776475, "rewards/frontier_coverage_15": 0.1291019305586815, "rewards/frontier_coverage_20": 0.11305647492408752, "rewards/frontier_coverage_25": 0.07279382422566413, "rewards/frontier_coverage_5": 0.1292887583374977, "rewards/frontier_ece_reward": 0.004003529995679855, "rewards/frontier_entropy_batch_reward": -0.1836717516183853, "signal/accuracy_reward/centered_abs_mean": 0.103009033203125, "signal/accuracy_reward/group_bin_occupancy": 0.175, "signal/accuracy_reward/group_std_mean": 0.13735188841819762, "signal/accuracy_reward/group_zero_std_frac": 0.6, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0515045166015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0515045166015625, "signal/advantage_abs_mean": 0.07203299552202225, "signal/advantage_pre_scale_abs_mean": 0.07203299552202225, "signal/advantage_pre_scale_std": 0.10877863466739654, "signal/advantage_std": 0.10877863466739654, "signal/brier_reward/centered_abs_mean": 0.12054053992033005, "signal/brier_reward/group_bin_occupancy": 0.860546875, "signal/brier_reward/group_std_mean": 0.15555098354816438, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015067567490041256, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015067567490041256, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023642276879400014, "signal/frontier_aurc_reward/group_bin_occupancy": 0.73203125, "signal/frontier_aurc_reward/group_std_mean": 0.0038254653103649617, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2319672502344474e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2319672502344474e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1756080448627472, "signal/frontier_coverage_1/group_bin_occupancy": 0.879296875, "signal/frontier_coverage_1/group_std_mean": 0.2242843985557556, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031433838419616224, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031433838419616224, "signal/frontier_coverage_10/centered_abs_mean": 0.1755845367908478, "signal/frontier_coverage_10/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_10/group_std_mean": 0.22425468266010284, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031429629772901533, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031429629772901533, "signal/frontier_coverage_15/centered_abs_mean": 0.17519534826278688, "signal/frontier_coverage_15/group_bin_occupancy": 0.880078125, "signal/frontier_coverage_15/group_std_mean": 0.2237583041191101, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031359965912997724, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031359965912997724, "signal/frontier_coverage_20/centered_abs_mean": 0.14687740206718444, "signal/frontier_coverage_20/group_bin_occupancy": 0.8765625, "signal/frontier_coverage_20/group_std_mean": 0.1877150535583496, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026291054207831623, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026291054207831623, "signal/frontier_coverage_25/centered_abs_mean": 0.08315311372280121, "signal/frontier_coverage_25/group_bin_occupancy": 0.90390625, "signal/frontier_coverage_25/group_std_mean": 0.10691126137971878, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014884406700730324, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014884406700730324, "signal/frontier_coverage_5/centered_abs_mean": 0.1756080448627472, "signal/frontier_coverage_5/group_bin_occupancy": 0.879296875, "signal/frontier_coverage_5/group_std_mean": 0.2242843985557556, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031433838419616224, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031433838419616224, "signal/frontier_ece_reward/centered_abs_mean": 0.005254755448549986, "signal/frontier_ece_reward/group_bin_occupancy": 0.621484375, "signal/frontier_ece_reward/group_std_mean": 0.006762361247092485, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006568444310687483, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006568444310687483, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.262921079993248, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.723828125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.341296112537384, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032865134999156, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032865134999156, "step": 195 }, { "calibration/aurc": 0.26412426599353245, "calibration/batch_distribution_entropy": 0.9795424548247323, "calibration/batch_entropy_100bins": 0.9705601598425802, "calibration/batch_entropy_10bins": 0.9795424548247323, "calibration/batch_entropy_50bins": 0.9781327762313137, "calibration/batch_uniqueness": 0.9532878416500845, "calibration/buffer_distribution_entropy": 0.9988867867659781, "calibration/buffer_entropy_100bins": 0.9989921477324802, "calibration/buffer_entropy_10bins": 0.9988867867659781, "calibration/buffer_entropy_50bins": 0.9990694759295478, "calibration/confidence_entropy": 0.4984931459943328, "calibration/coverage@0%": 0.05156555772994129, "calibration/coverage@1%": 0.08789368272994129, "calibration/coverage@10%": 0.2625030577299413, "calibration/coverage@15%": 0.3410431445694716, "calibration/coverage@20%": 0.3887123899217221, "calibration/coverage@25%": 0.5305237891389433, "calibration/coverage@30%": 0.6500649767612525, "calibration/coverage@5%": 0.15898743272994129, "calibration/ece": 0.17953614494430098, "calibration/mean_confidence": 0.5356850748597626, "calibration/prompt_uniqueness": 0.8523553854220864, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 933.6, "completions/max_terminated_length": 651.6, "completions/mean_length": 207.77373046875, "completions/mean_terminated_length": 207.12470397949218, "completions/min_length": 98.8, "completions/min_terminated_length": 98.8, "epoch": 0.64, "grad_norm": 0.0009690375300124288, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 682821537.0, "reward": 0.8684950113296509, "reward_std": 0.08685783594846726, "rewards/accuracy_reward": 0.5673828125, "rewards/brier_reward": 0.80077143907547, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.0026358509669080375, "rewards/frontier_coverage_1": 0.09513780698180199, "rewards/frontier_coverage_10": 0.09511241912841797, "rewards/frontier_coverage_15": 0.09505020901560783, "rewards/frontier_coverage_20": 0.08291480019688606, "rewards/frontier_coverage_25": 0.05603170394897461, "rewards/frontier_coverage_5": 0.09513780698180199, "rewards/frontier_ece_reward": 0.004073908319696784, "rewards/frontier_entropy_batch_reward": -0.19807116389274598, "signal/accuracy_reward/centered_abs_mean": 0.081982421875, "signal/accuracy_reward/group_bin_occupancy": 0.1671875, "signal/accuracy_reward/group_std_mean": 0.11289723217487335, "signal/accuracy_reward/group_zero_std_frac": 0.6625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0409912109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0409912109375, "signal/advantage_abs_mean": 0.06701909229159356, "signal/advantage_pre_scale_abs_mean": 0.06701909229159356, "signal/advantage_pre_scale_std": 0.10343301296234131, "signal/advantage_std": 0.10343301296234131, "signal/brier_reward/centered_abs_mean": 0.11489285230636596, "signal/brier_reward/group_bin_occupancy": 0.845703125, "signal/brier_reward/group_std_mean": 0.14897901713848113, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014361606538295746, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014361606538295746, "signal/format_reward/centered_abs_mean": 0.00106201171875, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.002154887933284044, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000531005859375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000531005859375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027433151146396993, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7265625, "signal/frontier_aurc_reward/group_std_mean": 0.0047297993209213015, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.910533752990887e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.910533752990887e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14741408526897432, "signal/frontier_coverage_1/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_1/group_std_mean": 0.1911382108926773, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026387120597064495, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026387120597064495, "signal/frontier_coverage_10/centered_abs_mean": 0.14739079475402833, "signal/frontier_coverage_10/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_10/group_std_mean": 0.19110864102840425, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026382951997220515, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026382951997220515, "signal/frontier_coverage_15/centered_abs_mean": 0.146929270029068, "signal/frontier_coverage_15/group_bin_occupancy": 0.87421875, "signal/frontier_coverage_15/group_std_mean": 0.19052064120769502, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026300338562577964, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026300338562577964, "signal/frontier_coverage_20/centered_abs_mean": 0.11997720450162888, "signal/frontier_coverage_20/group_bin_occupancy": 0.866015625, "signal/frontier_coverage_20/group_std_mean": 0.15618555545806884, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002147591905668378, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002147591905668378, "signal/frontier_coverage_25/centered_abs_mean": 0.06797240227460861, "signal/frontier_coverage_25/group_bin_occupancy": 0.901953125, "signal/frontier_coverage_25/group_std_mean": 0.0883466050028801, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012167059583589434, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012167059583589434, "signal/frontier_coverage_5/centered_abs_mean": 0.14741408526897432, "signal/frontier_coverage_5/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_5/group_std_mean": 0.1911382108926773, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026387120597064495, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026387120597064495, "signal/frontier_ece_reward/centered_abs_mean": 0.005082414392381907, "signal/frontier_ece_reward/group_bin_occupancy": 0.602734375, "signal/frontier_ece_reward/group_std_mean": 0.006547214556485414, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006353017990477384, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006353017990477384, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2626469016075134, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74609375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3328437089920044, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03283086270093918, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03283086270093918, "step": 200 }, { "epoch": 0.64, "eval_calibration/aurc": 0.4118891587153638, "eval_calibration/batch_distribution_entropy": 0.9499651057906557, "eval_calibration/batch_entropy_100bins": 0.7164103655215839, "eval_calibration/batch_entropy_10bins": 0.9499651057906557, "eval_calibration/batch_entropy_50bins": 0.8018192330251381, "eval_calibration/batch_uniqueness": 0.90625, "eval_calibration/buffer_distribution_entropy": 0.9989505087605473, "eval_calibration/buffer_entropy_100bins": 0.9990211296746674, "eval_calibration/buffer_entropy_10bins": 0.9989505087605473, "eval_calibration/buffer_entropy_50bins": 0.9991013446788948, "eval_calibration/confidence_entropy": 0.512406467521397, "eval_calibration/coverage@0%": 0.03125, "eval_calibration/coverage@1%": 0.03125, "eval_calibration/coverage@10%": 0.03125, "eval_calibration/coverage@15%": 0.1484375, "eval_calibration/coverage@20%": 0.3046875, "eval_calibration/coverage@25%": 0.3828125, "eval_calibration/coverage@30%": 0.4375, "eval_calibration/coverage@5%": 0.03125, "eval_calibration/ece": 0.20437410058850125, "eval_calibration/mean_confidence": 0.4736161076469313, "eval_calibration/prompt_uniqueness": 0.90625, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 420.0, "eval_completions/max_terminated_length": 420.0, "eval_completions/mean_length": 208.73026657104492, "eval_completions/mean_terminated_length": 208.73026657104492, "eval_completions/min_length": 124.75, "eval_completions/min_terminated_length": 124.75, "eval_loss": 0.0, "eval_num_tokens": 682821537.0, "eval_reward": 0.7161590754985809, "eval_reward_std": 0.22477618232369423, "eval_rewards/accuracy_reward": 0.44921875, "eval_rewards/brier_reward": 0.7985615581274033, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.003726774360984564, "eval_rewards/frontier_coverage_1": 0.17506344616413116, "eval_rewards/frontier_coverage_10": 0.17506344616413116, "eval_rewards/frontier_coverage_15": 0.17469647899270058, "eval_rewards/frontier_coverage_20": 0.13535447604954243, "eval_rewards/frontier_coverage_25": 0.07677387073636055, "eval_rewards/frontier_coverage_5": 0.17506344616413116, "eval_rewards/frontier_ece_reward": 0.0037689171731472015, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 21.9268, "eval_samples_per_second": 22.803, "eval_signal/accuracy_reward/centered_abs_mean": 0.474609375, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4946126714348793, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2373046875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2373046875, "eval_signal/advantage_abs_mean": 0.20923983305692673, "eval_signal/advantage_pre_scale_abs_mean": 0.20923983305692673, "eval_signal/advantage_pre_scale_std": 0.22233285754919052, "eval_signal/advantage_std": 0.22233285754919052, "eval_signal/brier_reward/centered_abs_mean": 0.182050883769989, "eval_signal/brier_reward/group_bin_occupancy": 0.8828125, "eval_signal/brier_reward/group_std_mean": 0.23487457260489464, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022756360471248627, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.022756360471248627, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.00498336530290544, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6171875, "eval_signal/frontier_aurc_reward/group_std_mean": 0.009891956811770797, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.920223444874864e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.920223444874864e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3510345071554184, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_1/group_std_mean": 0.4272778555750847, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006283517461270094, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006283517461270094, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3510345071554184, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_10/group_std_mean": 0.4272778555750847, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006283517461270094, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006283517461270094, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3502937853336334, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_15/group_std_mean": 0.42644187808036804, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0062702588038519025, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0062702588038519025, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.2606714144349098, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_20/group_std_mean": 0.32316891103982925, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004666018299758434, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004666018299758434, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.12922955304384232, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_25/group_std_mean": 0.16818556562066078, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023132089991122484, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023132089991122484, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3510345071554184, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_5/group_std_mean": 0.4272778555750847, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006283517461270094, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006283517461270094, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.006779930088669062, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.859375, "eval_signal/frontier_ece_reward/group_std_mean": 0.0086629968136549, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008474912610836327, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008474912610836327, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.125, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.182, "step": 200 }, { "calibration/aurc": 0.4163644231985885, "calibration/batch_distribution_entropy": 0.9749810507515579, "calibration/batch_entropy_100bins": 0.966999722833614, "calibration/batch_entropy_10bins": 0.9749810507515579, "calibration/batch_entropy_50bins": 0.9740783954275158, "calibration/batch_uniqueness": 0.9524871826171875, "calibration/buffer_distribution_entropy": 0.9990417591124992, "calibration/buffer_entropy_100bins": 0.9990906778413995, "calibration/buffer_entropy_10bins": 0.9990417591124992, "calibration/buffer_entropy_50bins": 0.9991687120016991, "calibration/confidence_entropy": 0.5268865931440982, "calibration/coverage@0%": 0.005078125, "calibration/coverage@1%": 0.005078125, "calibration/coverage@10%": 0.009375, "calibration/coverage@15%": 0.014453125, "calibration/coverage@20%": 0.0734375, "calibration/coverage@25%": 0.187109375, "calibration/coverage@30%": 0.301953125, "calibration/coverage@5%": 0.005078125, "calibration/ece": 0.09387041510133062, "calibration/mean_confidence": 0.47938372087901754, "calibration/prompt_uniqueness": 0.85546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 676.0, "completions/max_terminated_length": 459.2, "completions/mean_length": 204.43974609375, "completions/mean_terminated_length": 204.30895385742187, "completions/min_length": 99.8, "completions/min_terminated_length": 99.8, "epoch": 0.656, "grad_norm": 0.001068526296876371, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 699771544.0, "reward": 0.8357076287269593, "reward_std": 0.09418870508670807, "rewards/accuracy_reward": 0.49921875, "rewards/brier_reward": 0.7890438675880432, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.003022062359377742, "rewards/frontier_coverage_1": 0.1237358182668686, "rewards/frontier_coverage_10": 0.1237358182668686, "rewards/frontier_coverage_15": 0.12354113608598709, "rewards/frontier_coverage_20": 0.09611983597278595, "rewards/frontier_coverage_25": 0.058796758949756625, "rewards/frontier_coverage_5": 0.1237358182668686, "rewards/frontier_ece_reward": 0.0032737540546804666, "rewards/frontier_entropy_batch_reward": -0.19495902359485626, "signal/accuracy_reward/centered_abs_mean": 0.09501953125, "signal/accuracy_reward/group_bin_occupancy": 0.1734375, "signal/accuracy_reward/group_std_mean": 0.1296718657016754, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.047509765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.047509765625, "signal/advantage_abs_mean": 0.07281130701303482, "signal/advantage_pre_scale_abs_mean": 0.07281130701303482, "signal/advantage_pre_scale_std": 0.11112865060567856, "signal/advantage_std": 0.11112865060567856, "signal/brier_reward/centered_abs_mean": 0.12090405225753784, "signal/brier_reward/group_bin_occupancy": 0.863671875, "signal/brier_reward/group_std_mean": 0.15587877333164216, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01511300653219223, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01511300653219223, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002597982669249177, "signal/frontier_aurc_reward/group_bin_occupancy": 0.73671875, "signal/frontier_aurc_reward/group_std_mean": 0.0043062128126621245, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.6503888734150677e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.6503888734150677e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15984987318515778, "signal/frontier_coverage_1/group_bin_occupancy": 0.87578125, "signal/frontier_coverage_1/group_std_mean": 0.20615570545196532, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002861312637105584, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002861312637105584, "signal/frontier_coverage_10/centered_abs_mean": 0.15984987318515778, "signal/frontier_coverage_10/group_bin_occupancy": 0.87578125, "signal/frontier_coverage_10/group_std_mean": 0.20615570545196532, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002861312637105584, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002861312637105584, "signal/frontier_coverage_15/centered_abs_mean": 0.1595711052417755, "signal/frontier_coverage_15/group_bin_occupancy": 0.87578125, "signal/frontier_coverage_15/group_std_mean": 0.205799001455307, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002856322703883052, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002856322703883052, "signal/frontier_coverage_20/centered_abs_mean": 0.11935619711875915, "signal/frontier_coverage_20/group_bin_occupancy": 0.8671875, "signal/frontier_coverage_20/group_std_mean": 0.15463128089904785, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002136475685983896, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002136475685983896, "signal/frontier_coverage_25/centered_abs_mean": 0.06763988435268402, "signal/frontier_coverage_25/group_bin_occupancy": 0.902734375, "signal/frontier_coverage_25/group_std_mean": 0.0881109967827797, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012107538990676404, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012107538990676404, "signal/frontier_coverage_5/centered_abs_mean": 0.15984987318515778, "signal/frontier_coverage_5/group_bin_occupancy": 0.87578125, "signal/frontier_coverage_5/group_std_mean": 0.20615570545196532, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002861312637105584, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002861312637105584, "signal/frontier_ece_reward/centered_abs_mean": 0.004474427737295628, "signal/frontier_ece_reward/group_bin_occupancy": 0.625390625, "signal/frontier_ece_reward/group_std_mean": 0.005852994229644537, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005593034671619535, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005593034671619535, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2663916915655136, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.741015625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34035165309906007, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0332989614456892, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0332989614456892, "step": 205 }, { "calibration/aurc": 0.28483097130697954, "calibration/batch_distribution_entropy": 0.9815272458034034, "calibration/batch_entropy_100bins": 0.9706086431974328, "calibration/batch_entropy_10bins": 0.9815272458034034, "calibration/batch_entropy_50bins": 0.9791595614288557, "calibration/batch_uniqueness": 0.953887939453125, "calibration/buffer_distribution_entropy": 0.9991894083094129, "calibration/buffer_entropy_100bins": 0.9991681415606204, "calibration/buffer_entropy_10bins": 0.9991894083094129, "calibration/buffer_entropy_50bins": 0.9992375331871519, "calibration/confidence_entropy": 0.5007356086799657, "calibration/coverage@0%": 0.0390625, "calibration/coverage@1%": 0.0390625, "calibration/coverage@10%": 0.17421875, "calibration/coverage@15%": 0.223828125, "calibration/coverage@20%": 0.317578125, "calibration/coverage@25%": 0.3875, "calibration/coverage@30%": 0.490625, "calibration/coverage@5%": 0.073046875, "calibration/ece": 0.11163271809879152, "calibration/mean_confidence": 0.48953429901749335, "calibration/prompt_uniqueness": 0.85048828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 885.8, "completions/max_terminated_length": 472.8, "completions/mean_length": 202.37763671875, "completions/mean_terminated_length": 202.11712341308595, "completions/min_length": 93.8, "completions/min_terminated_length": 93.8, "epoch": 0.672, "grad_norm": 0.0008770785643719137, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 716757331.0, "reward": 0.8457530260086059, "reward_std": 0.08760204017162324, "rewards/accuracy_reward": 0.51572265625, "rewards/brier_reward": 0.799196469783783, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0027179742231965066, "rewards/frontier_coverage_1": 0.13748425543308257, "rewards/frontier_coverage_10": 0.13748425543308257, "rewards/frontier_coverage_15": 0.13730760663747787, "rewards/frontier_coverage_20": 0.10792672708630562, "rewards/frontier_coverage_25": 0.06943527311086654, "rewards/frontier_coverage_5": 0.13748425543308257, "rewards/frontier_ece_reward": 0.003610169980674982, "rewards/frontier_entropy_batch_reward": -0.20262654721736909, "signal/accuracy_reward/centered_abs_mean": 0.102716064453125, "signal/accuracy_reward/group_bin_occupancy": 0.169140625, "signal/accuracy_reward/group_std_mean": 0.131059630215168, "signal/accuracy_reward/group_zero_std_frac": 0.646875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0513580322265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0513580322265625, "signal/advantage_abs_mean": 0.06931476294994354, "signal/advantage_pre_scale_abs_mean": 0.06931476294994354, "signal/advantage_pre_scale_std": 0.1052887812256813, "signal/advantage_std": 0.1052887812256813, "signal/brier_reward/centered_abs_mean": 0.1212724655866623, "signal/brier_reward/group_bin_occupancy": 0.839453125, "signal/brier_reward/group_std_mean": 0.15539450347423553, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015159058198332787, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015159058198332787, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024694956839084624, "signal/frontier_aurc_reward/group_bin_occupancy": 0.71484375, "signal/frontier_aurc_reward/group_std_mean": 0.004247998539358378, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.420397090143524e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.420397090143524e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1761551171541214, "signal/frontier_coverage_1/group_bin_occupancy": 0.861328125, "signal/frontier_coverage_1/group_std_mean": 0.2236780822277069, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003153176372870803, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003153176372870803, "signal/frontier_coverage_10/centered_abs_mean": 0.1761551171541214, "signal/frontier_coverage_10/group_bin_occupancy": 0.861328125, "signal/frontier_coverage_10/group_std_mean": 0.2236780822277069, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003153176372870803, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003153176372870803, "signal/frontier_coverage_15/centered_abs_mean": 0.17571605741977692, "signal/frontier_coverage_15/group_bin_occupancy": 0.8625, "signal/frontier_coverage_15/group_std_mean": 0.22313523888587952, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031453173141926527, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031453173141926527, "signal/frontier_coverage_20/centered_abs_mean": 0.1281582921743393, "signal/frontier_coverage_20/group_bin_occupancy": 0.857421875, "signal/frontier_coverage_20/group_std_mean": 0.1638483375310898, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022940333001315594, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022940333001315594, "signal/frontier_coverage_25/centered_abs_mean": 0.07442445904016495, "signal/frontier_coverage_25/group_bin_occupancy": 0.890234375, "signal/frontier_coverage_25/group_std_mean": 0.09590905010700226, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013321977807208897, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013321977807208897, "signal/frontier_coverage_5/centered_abs_mean": 0.1761551171541214, "signal/frontier_coverage_5/group_bin_occupancy": 0.861328125, "signal/frontier_coverage_5/group_std_mean": 0.2236780822277069, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003153176372870803, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003153176372870803, "signal/frontier_ece_reward/centered_abs_mean": 0.004578849300742149, "signal/frontier_ece_reward/group_bin_occupancy": 0.598046875, "signal/frontier_ece_reward/group_std_mean": 0.005887005571275949, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005723561625927687, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005723561625927687, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2648188531398773, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.721875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34055405855178833, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03310235664248466, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03310235664248466, "step": 210 }, { "calibration/aurc": 0.34530715915389093, "calibration/batch_distribution_entropy": 0.9766022674114094, "calibration/batch_entropy_100bins": 0.9668047432555472, "calibration/batch_entropy_10bins": 0.9766022674114094, "calibration/batch_entropy_50bins": 0.9743620147547288, "calibration/batch_uniqueness": 0.952294921875, "calibration/buffer_distribution_entropy": 0.9992386461143514, "calibration/buffer_entropy_100bins": 0.9992085238697724, "calibration/buffer_entropy_10bins": 0.9992386461143514, "calibration/buffer_entropy_50bins": 0.9992727921396962, "calibration/confidence_entropy": 0.5042982261283598, "calibration/coverage@0%": 0.0109375, "calibration/coverage@1%": 0.0109375, "calibration/coverage@10%": 0.091796875, "calibration/coverage@15%": 0.215234375, "calibration/coverage@20%": 0.3265625, "calibration/coverage@25%": 0.392578125, "calibration/coverage@30%": 0.5296875, "calibration/coverage@5%": 0.0171875, "calibration/ece": 0.12370065837025182, "calibration/mean_confidence": 0.49322062629819213, "calibration/prompt_uniqueness": 0.851513671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.4, "completions/max_terminated_length": 444.4, "completions/mean_length": 202.21005859375, "completions/mean_terminated_length": 202.21005859375, "completions/min_length": 95.4, "completions/min_terminated_length": 95.4, "epoch": 0.688, "grad_norm": 0.001138906111009419, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 733781882.0, "reward": 0.8525039792060852, "reward_std": 0.08989884555339814, "rewards/accuracy_reward": 0.5345703125, "rewards/brier_reward": 0.7973044633865356, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002551911678165197, "rewards/frontier_coverage_1": 0.11433582976460457, "rewards/frontier_coverage_10": 0.11433582976460457, "rewards/frontier_coverage_15": 0.11383322924375534, "rewards/frontier_coverage_20": 0.08399821668863297, "rewards/frontier_coverage_25": 0.054333243519067764, "rewards/frontier_coverage_5": 0.11433582976460457, "rewards/frontier_ece_reward": 0.003297937847673893, "rewards/frontier_entropy_batch_reward": -0.20293367207050322, "signal/accuracy_reward/centered_abs_mean": 0.100048828125, "signal/accuracy_reward/group_bin_occupancy": 0.173046875, "signal/accuracy_reward/group_std_mean": 0.13349073976278306, "signal/accuracy_reward/group_zero_std_frac": 0.615625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0500244140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0500244140625, "signal/advantage_abs_mean": 0.06954181641340255, "signal/advantage_pre_scale_abs_mean": 0.06954181641340255, "signal/advantage_pre_scale_std": 0.10719988644123077, "signal/advantage_std": 0.10719988644123077, "signal/brier_reward/centered_abs_mean": 0.11443150490522384, "signal/brier_reward/group_bin_occupancy": 0.85390625, "signal/brier_reward/group_std_mean": 0.1465883433818817, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01430393811315298, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01430393811315298, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002246859250590205, "signal/frontier_aurc_reward/group_bin_occupancy": 0.731640625, "signal/frontier_aurc_reward/group_std_mean": 0.0038079099263995885, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0218777576228605e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0218777576228605e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1653559386730194, "signal/frontier_coverage_1/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_1/group_std_mean": 0.21095694303512574, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002959871245548129, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002959871245548129, "signal/frontier_coverage_10/centered_abs_mean": 0.1653559386730194, "signal/frontier_coverage_10/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_10/group_std_mean": 0.21095694303512574, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002959871245548129, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002959871245548129, "signal/frontier_coverage_15/centered_abs_mean": 0.1643199324607849, "signal/frontier_coverage_15/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_15/group_std_mean": 0.20968802869319916, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029413266573101284, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029413266573101284, "signal/frontier_coverage_20/centered_abs_mean": 0.11405820548534393, "signal/frontier_coverage_20/group_bin_occupancy": 0.86640625, "signal/frontier_coverage_20/group_std_mean": 0.14639358520507811, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020416418788954615, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020416418788954615, "signal/frontier_coverage_25/centered_abs_mean": 0.0672955259680748, "signal/frontier_coverage_25/group_bin_occupancy": 0.915234375, "signal/frontier_coverage_25/group_std_mean": 0.08634553998708724, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012045898474752903, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012045898474752903, "signal/frontier_coverage_5/centered_abs_mean": 0.1653559386730194, "signal/frontier_coverage_5/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_5/group_std_mean": 0.21095694303512574, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002959871245548129, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002959871245548129, "signal/frontier_ece_reward/centered_abs_mean": 0.0043392408639192585, "signal/frontier_ece_reward/group_bin_occupancy": 0.6078125, "signal/frontier_ece_reward/group_std_mean": 0.005523344594985246, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005424051079899073, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005424051079899073, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2766480267047882, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.737890625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3514810025691986, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03458100333809853, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03458100333809853, "step": 215 }, { "calibration/aurc": 0.26433509062939187, "calibration/batch_distribution_entropy": 0.978364776360986, "calibration/batch_entropy_100bins": 0.9685694678027698, "calibration/batch_entropy_10bins": 0.978364776360986, "calibration/batch_entropy_50bins": 0.9765817547053196, "calibration/batch_uniqueness": 0.9531219482421875, "calibration/buffer_distribution_entropy": 0.9992599704743942, "calibration/buffer_entropy_100bins": 0.999201034721138, "calibration/buffer_entropy_10bins": 0.9992599704743942, "calibration/buffer_entropy_50bins": 0.999264548139028, "calibration/confidence_entropy": 0.48513173016721006, "calibration/coverage@0%": 0.012890625, "calibration/coverage@1%": 0.012890625, "calibration/coverage@10%": 0.069921875, "calibration/coverage@15%": 0.165625, "calibration/coverage@20%": 0.28046875, "calibration/coverage@25%": 0.505859375, "calibration/coverage@30%": 0.727734375, "calibration/coverage@5%": 0.025, "calibration/ece": 0.0904218188804096, "calibration/mean_confidence": 0.5354777583629338, "calibration/prompt_uniqueness": 0.843310546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 639.6, "completions/max_terminated_length": 421.0, "completions/mean_length": 197.9205078125, "completions/mean_terminated_length": 197.79017944335936, "completions/min_length": 93.2, "completions/min_terminated_length": 93.2, "epoch": 0.704, "grad_norm": 0.0008411157759837806, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 750674732.0, "reward": 0.8568742513656616, "reward_std": 0.09010809510946274, "rewards/accuracy_reward": 0.53876953125, "rewards/brier_reward": 0.8050263285636902, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0029557050904259084, "rewards/frontier_coverage_1": 0.11192709654569626, "rewards/frontier_coverage_10": 0.11192709654569626, "rewards/frontier_coverage_15": 0.11145668923854828, "rewards/frontier_coverage_20": 0.08219068348407746, "rewards/frontier_coverage_25": 0.055270007252693175, "rewards/frontier_coverage_5": 0.11192709654569626, "rewards/frontier_ece_reward": 0.0034165045712143184, "rewards/frontier_entropy_batch_reward": -0.1914419263601303, "signal/accuracy_reward/centered_abs_mean": 0.092047119140625, "signal/accuracy_reward/group_bin_occupancy": 0.168359375, "signal/accuracy_reward/group_std_mean": 0.1208561822772026, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0460235595703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0460235595703125, "signal/advantage_abs_mean": 0.07105211615562439, "signal/advantage_pre_scale_abs_mean": 0.07105211615562439, "signal/advantage_pre_scale_std": 0.10825964659452439, "signal/advantage_std": 0.10825964659452439, "signal/brier_reward/centered_abs_mean": 0.11719977408647538, "signal/brier_reward/group_bin_occupancy": 0.84921875, "signal/brier_reward/group_std_mean": 0.15045890510082244, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014649971760809422, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014649971760809422, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028852388728410005, "signal/frontier_aurc_reward/group_bin_occupancy": 0.72890625, "signal/frontier_aurc_reward/group_std_mean": 0.004860749281942844, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1645773783093316e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1645773783093316e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1517467588186264, "signal/frontier_coverage_1/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_1/group_std_mean": 0.1936959743499756, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027162669226527213, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027162669226527213, "signal/frontier_coverage_10/centered_abs_mean": 0.1517467588186264, "signal/frontier_coverage_10/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_10/group_std_mean": 0.1936959743499756, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027162669226527213, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027162669226527213, "signal/frontier_coverage_15/centered_abs_mean": 0.15077128112316132, "signal/frontier_coverage_15/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_15/group_std_mean": 0.1924582153558731, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026988058350980284, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026988058350980284, "signal/frontier_coverage_20/centered_abs_mean": 0.09997670203447342, "signal/frontier_coverage_20/group_bin_occupancy": 0.8640625, "signal/frontier_coverage_20/group_std_mean": 0.1284783437848091, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017895829398185015, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017895829398185015, "signal/frontier_coverage_25/centered_abs_mean": 0.06153928935527801, "signal/frontier_coverage_25/group_bin_occupancy": 0.922265625, "signal/frontier_coverage_25/group_std_mean": 0.07910384982824326, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011015532538294793, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011015532538294793, "signal/frontier_coverage_5/centered_abs_mean": 0.1517467588186264, "signal/frontier_coverage_5/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_5/group_std_mean": 0.1936959743499756, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027162669226527213, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027162669226527213, "signal/frontier_ece_reward/centered_abs_mean": 0.004455319605767727, "signal/frontier_ece_reward/group_bin_occupancy": 0.625, "signal/frontier_ece_reward/group_std_mean": 0.005671911407262087, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005569149507209659, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005569149507209659, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26655210852622985, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72734375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34124083518981935, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03331901356577873, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03331901356577873, "step": 220 }, { "calibration/aurc": 0.2571725886157596, "calibration/batch_distribution_entropy": 0.9868377117637062, "calibration/batch_entropy_100bins": 0.9723589233248291, "calibration/batch_entropy_10bins": 0.9868377117637062, "calibration/batch_entropy_50bins": 0.9812132345480924, "calibration/batch_uniqueness": 0.9542327880859375, "calibration/buffer_distribution_entropy": 0.999290995119787, "calibration/buffer_entropy_100bins": 0.9992192079587326, "calibration/buffer_entropy_10bins": 0.999290995119787, "calibration/buffer_entropy_50bins": 0.9992962849096898, "calibration/confidence_entropy": 0.4974882722837635, "calibration/coverage@0%": 0.0703125, "calibration/coverage@1%": 0.09921875, "calibration/coverage@10%": 0.24921875, "calibration/coverage@15%": 0.331640625, "calibration/coverage@20%": 0.378125, "calibration/coverage@25%": 0.46640625, "calibration/coverage@30%": 0.598046875, "calibration/coverage@5%": 0.165234375, "calibration/ece": 0.12935237041983996, "calibration/mean_confidence": 0.5272114849619792, "calibration/prompt_uniqueness": 0.843505859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 385.6, "completions/max_terminated_length": 385.6, "completions/mean_length": 191.19853515625, "completions/mean_terminated_length": 191.19853515625, "completions/min_length": 92.4, "completions/min_terminated_length": 92.4, "epoch": 0.72, "grad_norm": 0.0011021590325981379, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 767642461.0, "reward": 0.8635419249534607, "reward_std": 0.0918789803981781, "rewards/accuracy_reward": 0.55908203125, "rewards/brier_reward": 0.8094393730163574, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002607145463116467, "rewards/frontier_coverage_1": 0.10515292584896088, "rewards/frontier_coverage_10": 0.10515292584896088, "rewards/frontier_coverage_15": 0.10439955592155456, "rewards/frontier_coverage_20": 0.0752902314066887, "rewards/frontier_coverage_25": 0.053488964587450026, "rewards/frontier_coverage_5": 0.10515292584896088, "rewards/frontier_ece_reward": 0.00315559939481318, "rewards/frontier_entropy_batch_reward": -0.2187791347503662, "signal/accuracy_reward/centered_abs_mean": 0.093731689453125, "signal/accuracy_reward/group_bin_occupancy": 0.171484375, "signal/accuracy_reward/group_std_mean": 0.12776783406734465, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0468658447265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0468658447265625, "signal/advantage_abs_mean": 0.07090412229299545, "signal/advantage_pre_scale_abs_mean": 0.07090412229299545, "signal/advantage_pre_scale_std": 0.1082430675625801, "signal/advantage_std": 0.1082430675625801, "signal/brier_reward/centered_abs_mean": 0.10838331580162049, "signal/brier_reward/group_bin_occupancy": 0.851953125, "signal/brier_reward/group_std_mean": 0.14058519005775452, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01354791447520256, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01354791447520256, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024020272307097913, "signal/frontier_aurc_reward/group_bin_occupancy": 0.742578125, "signal/frontier_aurc_reward/group_std_mean": 0.00396113651804626, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2996287811547516e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2996287811547516e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14759007096290588, "signal/frontier_coverage_1/group_bin_occupancy": 0.875, "signal/frontier_coverage_1/group_std_mean": 0.19091827869415284, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002641862211748958, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002641862211748958, "signal/frontier_coverage_10/centered_abs_mean": 0.14759007096290588, "signal/frontier_coverage_10/group_bin_occupancy": 0.875, "signal/frontier_coverage_10/group_std_mean": 0.19091827869415284, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002641862211748958, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002641862211748958, "signal/frontier_coverage_15/centered_abs_mean": 0.1430205911397934, "signal/frontier_coverage_15/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_15/group_std_mean": 0.1849964141845703, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025600686203688383, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025600686203688383, "signal/frontier_coverage_20/centered_abs_mean": 0.09058674424886703, "signal/frontier_coverage_20/group_bin_occupancy": 0.866015625, "signal/frontier_coverage_20/group_std_mean": 0.11796402931213379, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016215026378631591, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016215026378631591, "signal/frontier_coverage_25/centered_abs_mean": 0.056336529552936554, "signal/frontier_coverage_25/group_bin_occupancy": 0.923046875, "signal/frontier_coverage_25/group_std_mean": 0.07268869429826737, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010084238601848483, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010084238601848483, "signal/frontier_coverage_5/centered_abs_mean": 0.14759007096290588, "signal/frontier_coverage_5/group_bin_occupancy": 0.875, "signal/frontier_coverage_5/group_std_mean": 0.19091827869415284, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002641862211748958, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002641862211748958, "signal/frontier_ece_reward/centered_abs_mean": 0.00401081838645041, "signal/frontier_ece_reward/group_bin_occupancy": 0.633203125, "signal/frontier_ece_reward/group_std_mean": 0.0050904926843941215, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005013522983063013, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005013522983063013, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2880380153656006, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35947364568710327, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03600475192070007, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03600475192070007, "step": 225 }, { "calibration/aurc": 0.2671787874657409, "calibration/batch_distribution_entropy": 0.9745606845119091, "calibration/batch_entropy_100bins": 0.9669656345831037, "calibration/batch_entropy_10bins": 0.9745606845119091, "calibration/batch_entropy_50bins": 0.974810522331128, "calibration/batch_uniqueness": 0.952349853515625, "calibration/buffer_distribution_entropy": 0.9992588263367409, "calibration/buffer_entropy_100bins": 0.9992122785372862, "calibration/buffer_entropy_10bins": 0.9992588263367409, "calibration/buffer_entropy_50bins": 0.9993063227006909, "calibration/confidence_entropy": 0.4825361471276001, "calibration/coverage@0%": 0.01875, "calibration/coverage@1%": 0.01875, "calibration/coverage@10%": 0.08828125, "calibration/coverage@15%": 0.25625, "calibration/coverage@20%": 0.40859375, "calibration/coverage@25%": 0.503125, "calibration/coverage@30%": 0.637109375, "calibration/coverage@5%": 0.023828125, "calibration/ece": 0.12239777459769277, "calibration/mean_confidence": 0.5535488767952312, "calibration/prompt_uniqueness": 0.839697265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 415.8, "completions/max_terminated_length": 415.8, "completions/mean_length": 190.82607421875, "completions/mean_terminated_length": 190.82607421875, "completions/min_length": 92.6, "completions/min_terminated_length": 92.6, "epoch": 0.736, "grad_norm": 0.0011723111383616924, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 784536104.0, "reward": 0.8648198366165161, "reward_std": 0.09010217189788819, "rewards/accuracy_reward": 0.55693359375, "rewards/brier_reward": 0.8001532912254333, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0029186454601585867, "rewards/frontier_coverage_1": 0.10427813678979873, "rewards/frontier_coverage_10": 0.10427813678979873, "rewards/frontier_coverage_15": 0.09994309544563293, "rewards/frontier_coverage_20": 0.07507269382476807, "rewards/frontier_coverage_25": 0.05625101327896118, "rewards/frontier_coverage_5": 0.10427813678979873, "rewards/frontier_ece_reward": 0.0028485337272286413, "rewards/frontier_entropy_batch_reward": -0.1892842948436737, "signal/accuracy_reward/centered_abs_mean": 0.097454833984375, "signal/accuracy_reward/group_bin_occupancy": 0.16796875, "signal/accuracy_reward/group_std_mean": 0.12492033690214158, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0487274169921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0487274169921875, "signal/advantage_abs_mean": 0.07159559726715088, "signal/advantage_pre_scale_abs_mean": 0.07159559726715088, "signal/advantage_pre_scale_std": 0.10877462178468704, "signal/advantage_std": 0.10877462178468704, "signal/brier_reward/centered_abs_mean": 0.11685294806957244, "signal/brier_reward/group_bin_occupancy": 0.8484375, "signal/brier_reward/group_std_mean": 0.14845768213272095, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014606618508696555, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014606618508696555, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027471881825476886, "signal/frontier_aurc_reward/group_bin_occupancy": 0.741015625, "signal/frontier_aurc_reward/group_std_mean": 0.004431968554854393, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.917466576443985e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.917466576443985e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15529634058475494, "signal/frontier_coverage_1/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_1/group_std_mean": 0.1962975323200226, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027798044495284556, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027798044495284556, "signal/frontier_coverage_10/centered_abs_mean": 0.15529634058475494, "signal/frontier_coverage_10/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_10/group_std_mean": 0.1962975323200226, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027798044495284556, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027798044495284556, "signal/frontier_coverage_15/centered_abs_mean": 0.14664601981639863, "signal/frontier_coverage_15/group_bin_occupancy": 0.866015625, "signal/frontier_coverage_15/group_std_mean": 0.1854826033115387, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002624963456764817, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002624963456764817, "signal/frontier_coverage_20/centered_abs_mean": 0.09293387830257416, "signal/frontier_coverage_20/group_bin_occupancy": 0.87421875, "signal/frontier_coverage_20/group_std_mean": 0.11852222084999084, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016635163454338908, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016635163454338908, "signal/frontier_coverage_25/centered_abs_mean": 0.0600375160574913, "signal/frontier_coverage_25/group_bin_occupancy": 0.931640625, "signal/frontier_coverage_25/group_std_mean": 0.07639677226543426, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010746715241111815, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010746715241111815, "signal/frontier_coverage_5/centered_abs_mean": 0.15529634058475494, "signal/frontier_coverage_5/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_5/group_std_mean": 0.1962975323200226, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027798044495284556, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027798044495284556, "signal/frontier_ece_reward/centered_abs_mean": 0.00405830298550427, "signal/frontier_ece_reward/group_bin_occupancy": 0.619140625, "signal/frontier_ece_reward/group_std_mean": 0.005047469865530729, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005072878731880337, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005072878731880337, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2611878842115402, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.746875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.333402281999588, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032648485526442526, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032648485526442526, "step": 230 }, { "calibration/aurc": 0.268875945135634, "calibration/batch_distribution_entropy": 0.9749735442052577, "calibration/batch_entropy_100bins": 0.9643205175783025, "calibration/batch_entropy_10bins": 0.9749735442052577, "calibration/batch_entropy_50bins": 0.9732517019604874, "calibration/batch_uniqueness": 0.9509182219542363, "calibration/buffer_distribution_entropy": 0.999202156889033, "calibration/buffer_entropy_100bins": 0.9992126409759396, "calibration/buffer_entropy_10bins": 0.999202156889033, "calibration/buffer_entropy_50bins": 0.999297391738472, "calibration/confidence_entropy": 0.4768481504008153, "calibration/coverage@0%": 0.03398819716242661, "calibration/coverage@1%": 0.03398819716242661, "calibration/coverage@10%": 0.09805069716242662, "calibration/coverage@15%": 0.2628944471624266, "calibration/coverage@20%": 0.36172333659491196, "calibration/coverage@25%": 0.5231508378180039, "calibration/coverage@30%": 0.6325824058219178, "calibration/coverage@5%": 0.05586319716242662, "calibration/ece": 0.1138435006980445, "calibration/mean_confidence": 0.4696731800362023, "calibration/prompt_uniqueness": 0.8447302207986473, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 432.0, "completions/max_terminated_length": 432.0, "completions/mean_length": 196.7138671875, "completions/mean_terminated_length": 196.7138671875, "completions/min_length": 107.6, "completions/min_terminated_length": 107.6, "epoch": 0.752, "grad_norm": 0.0009408697951585054, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 801777654.0, "reward": 0.8604554295539856, "reward_std": 0.08923238068819046, "rewards/accuracy_reward": 0.54912109375, "rewards/brier_reward": 0.8006747007369995, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0029859797097742556, "rewards/frontier_coverage_1": 0.11275802329182624, "rewards/frontier_coverage_10": 0.11275802329182624, "rewards/frontier_coverage_15": 0.10675515756011009, "rewards/frontier_coverage_20": 0.07440133690834046, "rewards/frontier_coverage_25": 0.05188974887132645, "rewards/frontier_coverage_5": 0.11275802329182624, "rewards/frontier_ece_reward": 0.002763616549782455, "rewards/frontier_entropy_batch_reward": -0.19688346982002258, "signal/accuracy_reward/centered_abs_mean": 0.090399169921875, "signal/accuracy_reward/group_bin_occupancy": 0.171875, "signal/accuracy_reward/group_std_mean": 0.12397283762693405, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0451995849609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0451995849609375, "signal/advantage_abs_mean": 0.06917839050292969, "signal/advantage_pre_scale_abs_mean": 0.06917839050292969, "signal/advantage_pre_scale_std": 0.10713021010160446, "signal/advantage_std": 0.10713021010160446, "signal/brier_reward/centered_abs_mean": 0.11189695447683334, "signal/brier_reward/group_bin_occupancy": 0.845703125, "signal/brier_reward/group_std_mean": 0.1444198101758957, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013987119309604168, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013987119309604168, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002711809379979968, "signal/frontier_aurc_reward/group_bin_occupancy": 0.73671875, "signal/frontier_aurc_reward/group_std_mean": 0.004320217343047261, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.854138751397841e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.854138751397841e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14956962913274766, "signal/frontier_coverage_1/group_bin_occupancy": 0.866796875, "signal/frontier_coverage_1/group_std_mean": 0.19387493133544922, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002677296195179224, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002677296195179224, "signal/frontier_coverage_10/centered_abs_mean": 0.14956962913274766, "signal/frontier_coverage_10/group_bin_occupancy": 0.866796875, "signal/frontier_coverage_10/group_std_mean": 0.19387493133544922, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002677296195179224, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002677296195179224, "signal/frontier_coverage_15/centered_abs_mean": 0.14117977619171143, "signal/frontier_coverage_15/group_bin_occupancy": 0.86171875, "signal/frontier_coverage_15/group_std_mean": 0.18318403959274293, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002527117915451527, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002527117915451527, "signal/frontier_coverage_20/centered_abs_mean": 0.08791815936565399, "signal/frontier_coverage_20/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_20/group_std_mean": 0.11480976194143296, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001573735009878874, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001573735009878874, "signal/frontier_coverage_25/centered_abs_mean": 0.05685350224375725, "signal/frontier_coverage_25/group_bin_occupancy": 0.923828125, "signal/frontier_coverage_25/group_std_mean": 0.07329353988170624, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010176776675507426, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010176776675507426, "signal/frontier_coverage_5/centered_abs_mean": 0.14956962913274766, "signal/frontier_coverage_5/group_bin_occupancy": 0.866796875, "signal/frontier_coverage_5/group_std_mean": 0.19387493133544922, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002677296195179224, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002677296195179224, "signal/frontier_ece_reward/centered_abs_mean": 0.0038073719944804905, "signal/frontier_ece_reward/group_bin_occupancy": 0.6296875, "signal/frontier_ece_reward/group_std_mean": 0.004847258795052767, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004759214993100613, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004759214993100613, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2709112524986267, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.744921875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3456527829170227, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033863906562328336, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033863906562328336, "step": 235 }, { "calibration/aurc": 0.2740796036833044, "calibration/batch_distribution_entropy": 0.9818428608553773, "calibration/batch_entropy_100bins": 0.9719007710164602, "calibration/batch_entropy_10bins": 0.9818428608553773, "calibration/batch_entropy_50bins": 0.9787606167041576, "calibration/batch_uniqueness": 0.9538522404981341, "calibration/buffer_distribution_entropy": 0.9993119692396348, "calibration/buffer_entropy_100bins": 0.9992856717754369, "calibration/buffer_entropy_10bins": 0.9993119692396348, "calibration/buffer_entropy_50bins": 0.9993856639865714, "calibration/confidence_entropy": 0.5110251790731171, "calibration/coverage@0%": 0.07266542318982387, "calibration/coverage@1%": 0.07305604818982388, "calibration/coverage@10%": 0.23283543297455972, "calibration/coverage@15%": 0.31722342832681016, "calibration/coverage@20%": 0.386768438111546, "calibration/coverage@25%": 0.4594606164383562, "calibration/coverage@30%": 0.5407473091976517, "calibration/coverage@5%": 0.17580112524461838, "calibration/ece": 0.1521925787277262, "calibration/mean_confidence": 0.4857958555314106, "calibration/prompt_uniqueness": 0.859637178069719, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 676.6, "completions/max_terminated_length": 469.4, "completions/mean_length": 211.4650390625, "completions/mean_terminated_length": 211.33624572753905, "completions/min_length": 108.2, "completions/min_terminated_length": 108.2, "epoch": 0.768, "grad_norm": 0.0010750554502010345, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 818875760.0, "reward": 0.8467617988586426, "reward_std": 0.0898010030388832, "rewards/accuracy_reward": 0.51259765625, "rewards/brier_reward": 0.8113283753395081, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002595884189940989, "rewards/frontier_coverage_1": 0.1418531656265259, "rewards/frontier_coverage_10": 0.1418424904346466, "rewards/frontier_coverage_15": 0.1365887075662613, "rewards/frontier_coverage_20": 0.09369135200977326, "rewards/frontier_coverage_25": 0.060536155849695204, "rewards/frontier_coverage_5": 0.1418531656265259, "rewards/frontier_ece_reward": 0.002913234336301684, "rewards/frontier_entropy_batch_reward": -0.19235891699790955, "signal/accuracy_reward/centered_abs_mean": 0.093255615234375, "signal/accuracy_reward/group_bin_occupancy": 0.168359375, "signal/accuracy_reward/group_std_mean": 0.12222997695207596, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0466278076171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0466278076171875, "signal/advantage_abs_mean": 0.0705685243010521, "signal/advantage_pre_scale_abs_mean": 0.0705685243010521, "signal/advantage_pre_scale_std": 0.1090763971209526, "signal/advantage_std": 0.1090763971209526, "signal/brier_reward/centered_abs_mean": 0.11311222910881043, "signal/brier_reward/group_bin_occupancy": 0.855078125, "signal/brier_reward/group_std_mean": 0.14575394093990326, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014139028638601303, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014139028638601303, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022981606656685472, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7140625, "signal/frontier_aurc_reward/group_std_mean": 0.0038101823534816503, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.113707545911893e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.113707545911893e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15940954387187958, "signal/frontier_coverage_1/group_bin_occupancy": 0.886328125, "signal/frontier_coverage_1/group_std_mean": 0.20271311998367308, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002853430714458227, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002853430714458227, "signal/frontier_coverage_10/centered_abs_mean": 0.15939579010009766, "signal/frontier_coverage_10/group_bin_occupancy": 0.886328125, "signal/frontier_coverage_10/group_std_mean": 0.20269620716571807, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002853184659034014, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002853184659034014, "signal/frontier_coverage_15/centered_abs_mean": 0.14816523492336273, "signal/frontier_coverage_15/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_15/group_std_mean": 0.18849847018718718, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026521575171500446, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026521575171500446, "signal/frontier_coverage_20/centered_abs_mean": 0.09317153096199035, "signal/frontier_coverage_20/group_bin_occupancy": 0.891015625, "signal/frontier_coverage_20/group_std_mean": 0.11924822032451629, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016677704174071551, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016677704174071551, "signal/frontier_coverage_25/centered_abs_mean": 0.05879691541194916, "signal/frontier_coverage_25/group_bin_occupancy": 0.9234375, "signal/frontier_coverage_25/group_std_mean": 0.07511676400899887, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010524647310376166, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010524647310376166, "signal/frontier_coverage_5/centered_abs_mean": 0.15940954387187958, "signal/frontier_coverage_5/group_bin_occupancy": 0.886328125, "signal/frontier_coverage_5/group_std_mean": 0.20271311998367308, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002853430714458227, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002853430714458227, "signal/frontier_ece_reward/centered_abs_mean": 0.003463554894551635, "signal/frontier_ece_reward/group_bin_occupancy": 0.62421875, "signal/frontier_ece_reward/group_std_mean": 0.004399275593459606, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00043294436181895435, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00043294436181895435, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25717605352401735, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33379308581352235, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03214700669050217, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03214700669050217, "step": 240 }, { "calibration/aurc": 0.3350168970245671, "calibration/batch_distribution_entropy": 0.979507349012119, "calibration/batch_entropy_100bins": 0.9683820677767004, "calibration/batch_entropy_10bins": 0.979507349012119, "calibration/batch_entropy_50bins": 0.9770155344856404, "calibration/batch_uniqueness": 0.9531518665621839, "calibration/buffer_distribution_entropy": 0.9993121305283615, "calibration/buffer_entropy_100bins": 0.999273233968751, "calibration/buffer_entropy_10bins": 0.9993121305283615, "calibration/buffer_entropy_50bins": 0.9993773005255697, "calibration/confidence_entropy": 0.4675455419188176, "calibration/coverage@0%": 0.02188035102739726, "calibration/coverage@1%": 0.04570847602739726, "calibration/coverage@10%": 0.18681353962818004, "calibration/coverage@15%": 0.24155531433463798, "calibration/coverage@20%": 0.30644569471624267, "calibration/coverage@25%": 0.4002813111545988, "calibration/coverage@30%": 0.45930390777886493, "calibration/coverage@5%": 0.11211472602739728, "calibration/ece": 0.16025081061134686, "calibration/mean_confidence": 0.5276171519486046, "calibration/prompt_uniqueness": 0.8416884470928719, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 730.6, "completions/max_terminated_length": 561.8, "completions/mean_length": 217.26181640625, "completions/mean_terminated_length": 217.13308410644532, "completions/min_length": 111.6, "completions/min_terminated_length": 111.6, "epoch": 0.784, "grad_norm": 0.0007616052753292024, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 836274889.0, "reward": 0.8599894285202027, "reward_std": 0.08833477348089218, "rewards/accuracy_reward": 0.55615234375, "rewards/brier_reward": 0.7853159546852112, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0031998661812394857, "rewards/frontier_coverage_1": 0.08971645757555961, "rewards/frontier_coverage_10": 0.08970820307731628, "rewards/frontier_coverage_15": 0.08065339028835297, "rewards/frontier_coverage_20": 0.06030083037912846, "rewards/frontier_coverage_25": 0.04737272821366787, "rewards/frontier_coverage_5": 0.08971645757555961, "rewards/frontier_ece_reward": 0.0020965512841939924, "rewards/frontier_entropy_batch_reward": -0.19676691591739653, "signal/accuracy_reward/centered_abs_mean": 0.091058349609375, "signal/accuracy_reward/group_bin_occupancy": 0.169140625, "signal/accuracy_reward/group_std_mean": 0.12126990556716918, "signal/accuracy_reward/group_zero_std_frac": 0.646875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0455291748046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0455291748046875, "signal/advantage_abs_mean": 0.06926657930016518, "signal/advantage_pre_scale_abs_mean": 0.06926657930016518, "signal/advantage_pre_scale_std": 0.10528580248355865, "signal/advantage_std": 0.10528580248355865, "signal/brier_reward/centered_abs_mean": 0.11832668632268906, "signal/brier_reward/group_bin_occupancy": 0.83984375, "signal/brier_reward/group_std_mean": 0.15334346294403076, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014790835790336132, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014790835790336132, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002936989581212401, "signal/frontier_aurc_reward/group_bin_occupancy": 0.726171875, "signal/frontier_aurc_reward/group_std_mean": 0.004758535791188479, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.257211159914732e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.257211159914732e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15365730226039886, "signal/frontier_coverage_1/group_bin_occupancy": 0.862890625, "signal/frontier_coverage_1/group_std_mean": 0.19880372881889344, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002750465599820018, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002750465599820018, "signal/frontier_coverage_10/centered_abs_mean": 0.15364324450492858, "signal/frontier_coverage_10/group_bin_occupancy": 0.862890625, "signal/frontier_coverage_10/group_std_mean": 0.19878601729869844, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002750213909894228, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002750213909894228, "signal/frontier_coverage_15/centered_abs_mean": 0.1363199084997177, "signal/frontier_coverage_15/group_bin_occupancy": 0.857421875, "signal/frontier_coverage_15/group_std_mean": 0.1768506795167923, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024401261936873196, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024401261936873196, "signal/frontier_coverage_20/centered_abs_mean": 0.08445133566856385, "signal/frontier_coverage_20/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_20/group_std_mean": 0.11020771414041519, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001511678844690323, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001511678844690323, "signal/frontier_coverage_25/centered_abs_mean": 0.05710015743970871, "signal/frontier_coverage_25/group_bin_occupancy": 0.927734375, "signal/frontier_coverage_25/group_std_mean": 0.07321809381246566, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001022092835046351, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001022092835046351, "signal/frontier_coverage_5/centered_abs_mean": 0.15365730226039886, "signal/frontier_coverage_5/group_bin_occupancy": 0.862890625, "signal/frontier_coverage_5/group_std_mean": 0.19880372881889344, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002750465599820018, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002750465599820018, "signal/frontier_ece_reward/centered_abs_mean": 0.003645011968910694, "signal/frontier_ece_reward/group_bin_occupancy": 0.624609375, "signal/frontier_ece_reward/group_std_mean": 0.004628013540059328, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00045562649611383674, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00045562649611383674, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2689265012741089, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7359375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34439175128936766, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03361581265926361, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03361581265926361, "step": 245 }, { "calibration/aurc": 0.2089238888139648, "calibration/batch_distribution_entropy": 0.9783443187041841, "calibration/batch_entropy_100bins": 0.9692957828700483, "calibration/batch_entropy_10bins": 0.9783443187041841, "calibration/batch_entropy_50bins": 0.9749560863340105, "calibration/batch_uniqueness": 0.9527830701487154, "calibration/buffer_distribution_entropy": 0.9992285993286943, "calibration/buffer_entropy_100bins": 0.9992151365527878, "calibration/buffer_entropy_10bins": 0.9992285993286943, "calibration/buffer_entropy_50bins": 0.9992907312602902, "calibration/confidence_entropy": 0.4936001315000181, "calibration/coverage@0%": 0.04140854329745597, "calibration/coverage@1%": 0.04140854329745597, "calibration/coverage@10%": 0.3380305161448141, "calibration/coverage@15%": 0.48029293052837574, "calibration/coverage@20%": 0.5584530944227006, "calibration/coverage@25%": 0.6248937438845401, "calibration/coverage@30%": 0.7159460616438356, "calibration/coverage@5%": 0.11017153864970645, "calibration/ece": 0.11535332939116949, "calibration/mean_confidence": 0.48316137568507056, "calibration/prompt_uniqueness": 0.8392774758552288, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 1050.4, "completions/max_terminated_length": 630.8, "completions/mean_length": 223.39736328125, "completions/mean_terminated_length": 223.14104309082032, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.8, "grad_norm": 0.0011625770712271333, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 853573038.0, "reward": 0.8727638602256775, "reward_std": 0.08851251155138015, "rewards/accuracy_reward": 0.57861328125, "rewards/brier_reward": 0.8088589787483216, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002810212690383196, "rewards/frontier_coverage_1": 0.09647311270236969, "rewards/frontier_coverage_10": 0.09640982151031494, "rewards/frontier_coverage_15": 0.08719095587730408, "rewards/frontier_coverage_20": 0.06359865590929985, "rewards/frontier_coverage_25": 0.049777823686599734, "rewards/frontier_coverage_5": 0.09647311270236969, "rewards/frontier_ece_reward": 0.00226962142623961, "rewards/frontier_entropy_batch_reward": -0.21244405210018158, "signal/accuracy_reward/centered_abs_mean": 0.089971923828125, "signal/accuracy_reward/group_bin_occupancy": 0.168359375, "signal/accuracy_reward/group_std_mean": 0.11883124858140945, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0449859619140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0449859619140625, "signal/advantage_abs_mean": 0.06932459995150567, "signal/advantage_pre_scale_abs_mean": 0.06932459995150567, "signal/advantage_pre_scale_std": 0.10704608410596847, "signal/advantage_std": 0.10704608410596847, "signal/brier_reward/centered_abs_mean": 0.10898203402757645, "signal/brier_reward/group_bin_occupancy": 0.8546875, "signal/brier_reward/group_std_mean": 0.13880451619625092, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013622754253447056, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013622754253447056, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026166523108258843, "signal/frontier_aurc_reward/group_bin_occupancy": 0.719140625, "signal/frontier_aurc_reward/group_std_mean": 0.0043006549589335915, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.683807346737012e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.683807346737012e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14726514369249344, "signal/frontier_coverage_1/group_bin_occupancy": 0.875, "signal/frontier_coverage_1/group_std_mean": 0.18634527921676636, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026360460091382266, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026360460091382266, "signal/frontier_coverage_10/centered_abs_mean": 0.14715069383382798, "signal/frontier_coverage_10/group_bin_occupancy": 0.875, "signal/frontier_coverage_10/group_std_mean": 0.18619897961616516, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002633997332304716, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002633997332304716, "signal/frontier_coverage_15/centered_abs_mean": 0.12722482085227965, "signal/frontier_coverage_15/group_bin_occupancy": 0.867578125, "signal/frontier_coverage_15/group_std_mean": 0.1613352745771408, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002277324162423611, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002277324162423611, "signal/frontier_coverage_20/centered_abs_mean": 0.07807688787579536, "signal/frontier_coverage_20/group_bin_occupancy": 0.88515625, "signal/frontier_coverage_20/group_std_mean": 0.10010033547878265, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013975762762129308, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013975762762129308, "signal/frontier_coverage_25/centered_abs_mean": 0.05350769758224487, "signal/frontier_coverage_25/group_bin_occupancy": 0.937109375, "signal/frontier_coverage_25/group_std_mean": 0.06782967150211335, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009577877586707473, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009577877586707473, "signal/frontier_coverage_5/centered_abs_mean": 0.14726514369249344, "signal/frontier_coverage_5/group_bin_occupancy": 0.875, "signal/frontier_coverage_5/group_std_mean": 0.18634527921676636, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026360460091382266, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026360460091382266, "signal/frontier_ece_reward/centered_abs_mean": 0.0036554763093590735, "signal/frontier_ece_reward/group_bin_occupancy": 0.608203125, "signal/frontier_ece_reward/group_std_mean": 0.004595436621457338, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004569345386698842, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004569345386698842, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27770012617111206, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.727734375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35029610991477966, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03471251577138901, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03471251577138901, "step": 250 }, { "epoch": 0.8, "eval_calibration/aurc": 0.44251735983763185, "eval_calibration/batch_distribution_entropy": 0.9223826154717725, "eval_calibration/batch_entropy_100bins": 0.7052278361140918, "eval_calibration/batch_entropy_10bins": 0.9223826154717725, "eval_calibration/batch_entropy_50bins": 0.7855204844461221, "eval_calibration/batch_uniqueness": 0.8955078125, "eval_calibration/buffer_distribution_entropy": 0.9993625308716999, "eval_calibration/buffer_entropy_100bins": 0.9992947637811891, "eval_calibration/buffer_entropy_10bins": 0.9993625308716999, "eval_calibration/buffer_entropy_50bins": 0.9993754687374323, "eval_calibration/confidence_entropy": 0.47403639371622897, "eval_calibration/coverage@0%": 0.0546875, "eval_calibration/coverage@1%": 0.0546875, "eval_calibration/coverage@10%": 0.0546875, "eval_calibration/coverage@15%": 0.0703125, "eval_calibration/coverage@20%": 0.140625, "eval_calibration/coverage@25%": 0.203125, "eval_calibration/coverage@30%": 0.28125, "eval_calibration/coverage@5%": 0.0546875, "eval_calibration/ece": 0.22032250116355573, "eval_calibration/mean_confidence": 0.448963486796308, "eval_calibration/prompt_uniqueness": 0.8955078125, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 445.0, "eval_completions/max_terminated_length": 445.0, "eval_completions/mean_length": 228.59785842895508, "eval_completions/mean_terminated_length": 228.59785842895508, "eval_completions/min_length": 137.5, "eval_completions/min_terminated_length": 137.5, "eval_loss": 0.0, "eval_num_tokens": 853573038.0, "eval_reward": 0.7110898196697235, "eval_reward_std": 0.22806879505515099, "eval_rewards/accuracy_reward": 0.4375, "eval_rewards/brier_reward": 0.8064036816358566, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0034677567309699953, "eval_rewards/frontier_coverage_1": 0.19150707125663757, "eval_rewards/frontier_coverage_10": 0.19115351140499115, "eval_rewards/frontier_coverage_15": 0.1673499085009098, "eval_rewards/frontier_coverage_20": 0.1066279262304306, "eval_rewards/frontier_coverage_25": 0.0597064346075058, "eval_rewards/frontier_coverage_5": 0.19150707125663757, "eval_rewards/frontier_ece_reward": 0.002807055599987507, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 22.2722, "eval_samples_per_second": 22.45, "eval_signal/accuracy_reward/centered_abs_mean": 0.476806640625, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.49588072299957275, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2384033203125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2384033203125, "eval_signal/advantage_abs_mean": 0.21377598494291306, "eval_signal/advantage_pre_scale_abs_mean": 0.21377598494291306, "eval_signal/advantage_pre_scale_std": 0.225502610206604, "eval_signal/advantage_std": 0.225502610206604, "eval_signal/brier_reward/centered_abs_mean": 0.1754986234009266, "eval_signal/brier_reward/group_bin_occupancy": 0.8359375, "eval_signal/brier_reward/group_std_mean": 0.2285812497138977, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021937327925115824, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.021937327925115824, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004540800233371556, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6484375, "eval_signal/frontier_aurc_reward/group_std_mean": 0.00849473278503865, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.128032095555682e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.128032095555682e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.35326529294252396, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_1/group_std_mean": 0.42462950199842453, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006323448498733342, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006323448498733342, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.35248684138059616, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_10/group_std_mean": 0.42372994869947433, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006309514516033232, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006309514516033232, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.29927831143140793, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_15/group_std_mean": 0.3624297082424164, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005357081652618945, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005357081652618945, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.172193493694067, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.921875, "eval_signal/frontier_coverage_20/group_std_mean": 0.21549956128001213, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030822635162621737, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030822635162621737, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.08839073590934277, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9453125, "eval_signal/frontier_coverage_25/group_std_mean": 0.11184324324131012, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015821942070033401, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015821942070033401, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.35326529294252396, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_5/group_std_mean": 0.42462950199842453, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006323448498733342, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006323448498733342, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.0050687192706391215, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.890625, "eval_signal/frontier_ece_reward/group_std_mean": 0.006167635438032448, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006335899088298902, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006335899088298902, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.125, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.18, "step": 250 }, { "calibration/aurc": 0.2352606782149013, "calibration/batch_distribution_entropy": 0.9709767853004306, "calibration/batch_entropy_100bins": 0.9633920257815575, "calibration/batch_entropy_10bins": 0.9709767853004306, "calibration/batch_entropy_50bins": 0.9695731037391886, "calibration/batch_uniqueness": 0.9511383056640625, "calibration/buffer_distribution_entropy": 0.9993028558668, "calibration/buffer_entropy_100bins": 0.999259175252465, "calibration/buffer_entropy_10bins": 0.9993028558668, "calibration/buffer_entropy_50bins": 0.9993303494209318, "calibration/confidence_entropy": 0.4811342741701587, "calibration/coverage@0%": 0.001953125, "calibration/coverage@1%": 0.001953125, "calibration/coverage@10%": 0.124609375, "calibration/coverage@15%": 0.19921875, "calibration/coverage@20%": 0.509765625, "calibration/coverage@25%": 0.679296875, "calibration/coverage@30%": 0.790234375, "calibration/coverage@5%": 0.06015625, "calibration/ece": 0.14137887296716461, "calibration/mean_confidence": 0.5129679753761538, "calibration/prompt_uniqueness": 0.846533203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 664.6, "completions/max_terminated_length": 664.6, "completions/mean_length": 223.664453125, "completions/mean_terminated_length": 223.664453125, "completions/min_length": 110.6, "completions/min_terminated_length": 110.6, "epoch": 0.816, "grad_norm": 0.0016217977972701192, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 870962530.0, "reward": 0.8748025059700012, "reward_std": 0.09103592932224273, "rewards/accuracy_reward": 0.586328125, "rewards/brier_reward": 0.7861274123191834, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002859151270240545, "rewards/frontier_coverage_1": 0.06481336802244186, "rewards/frontier_coverage_10": 0.06481285095214843, "rewards/frontier_coverage_15": 0.06343448236584663, "rewards/frontier_coverage_20": 0.049446874484419825, "rewards/frontier_coverage_25": 0.04227612838149071, "rewards/frontier_coverage_5": 0.06481328457593918, "rewards/frontier_ece_reward": 0.001674002129584551, "rewards/frontier_entropy_batch_reward": -0.1839560568332672, "signal/accuracy_reward/centered_abs_mean": 0.1002685546875, "signal/accuracy_reward/group_bin_occupancy": 0.170703125, "signal/accuracy_reward/group_std_mean": 0.13107529729604722, "signal/accuracy_reward/group_zero_std_frac": 0.634375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05013427734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05013427734375, "signal/advantage_abs_mean": 0.07067288607358932, "signal/advantage_pre_scale_abs_mean": 0.07067288607358932, "signal/advantage_pre_scale_std": 0.10996713936328888, "signal/advantage_std": 0.10996713936328888, "signal/brier_reward/centered_abs_mean": 0.12410824000835419, "signal/brier_reward/group_bin_occupancy": 0.863671875, "signal/brier_reward/group_std_mean": 0.1572820007801056, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015513530001044273, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015513530001044273, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027249534614384174, "signal/frontier_aurc_reward/group_bin_occupancy": 0.73359375, "signal/frontier_aurc_reward/group_std_mean": 0.004633441660553217, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.877666797256097e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.877666797256097e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16365497708320617, "signal/frontier_coverage_1/group_bin_occupancy": 0.8765625, "signal/frontier_coverage_1/group_std_mean": 0.20742543637752534, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029294240288436414, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029294240288436414, "signal/frontier_coverage_10/centered_abs_mean": 0.16364520490169526, "signal/frontier_coverage_10/group_bin_occupancy": 0.8765625, "signal/frontier_coverage_10/group_std_mean": 0.2074132442474365, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002929249033331871, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002929249033331871, "signal/frontier_coverage_15/centered_abs_mean": 0.14128550589084626, "signal/frontier_coverage_15/group_bin_occupancy": 0.86875, "signal/frontier_coverage_15/group_std_mean": 0.1796106904745102, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002529010409489274, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002529010409489274, "signal/frontier_coverage_20/centered_abs_mean": 0.085682213306427, "signal/frontier_coverage_20/group_bin_occupancy": 0.8875, "signal/frontier_coverage_20/group_std_mean": 0.11009515970945358, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015337116550654174, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015337116550654174, "signal/frontier_coverage_25/centered_abs_mean": 0.057272438704967496, "signal/frontier_coverage_25/group_bin_occupancy": 0.927734375, "signal/frontier_coverage_25/group_std_mean": 0.07305631190538406, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001025176583789289, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001025176583789289, "signal/frontier_coverage_5/centered_abs_mean": 0.16365209817886353, "signal/frontier_coverage_5/group_bin_occupancy": 0.8765625, "signal/frontier_coverage_5/group_std_mean": 0.2074216663837433, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029293723870068788, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029293723870068788, "signal/frontier_ece_reward/centered_abs_mean": 0.0036711919121444224, "signal/frontier_ece_reward/group_bin_occupancy": 0.631640625, "signal/frontier_ece_reward/group_std_mean": 0.004636763595044613, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004588989890180528, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004588989890180528, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2510399729013443, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.731640625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.32446773648262023, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031379996612668035, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031379996612668035, "step": 255 }, { "calibration/aurc": 0.2889344001133566, "calibration/batch_distribution_entropy": 0.9739075456970238, "calibration/batch_entropy_100bins": 0.9649792933196213, "calibration/batch_entropy_10bins": 0.9739075456970238, "calibration/batch_entropy_50bins": 0.9740646917655301, "calibration/batch_uniqueness": 0.9515716552734375, "calibration/buffer_distribution_entropy": 0.999235170401582, "calibration/buffer_entropy_100bins": 0.9992304590039129, "calibration/buffer_entropy_10bins": 0.999235170401582, "calibration/buffer_entropy_50bins": 0.9992710439676749, "calibration/confidence_entropy": 0.48948251997062514, "calibration/coverage@0%": 0.0328125, "calibration/coverage@1%": 0.0328125, "calibration/coverage@10%": 0.243359375, "calibration/coverage@15%": 0.329296875, "calibration/coverage@20%": 0.369921875, "calibration/coverage@25%": 0.431640625, "calibration/coverage@30%": 0.516796875, "calibration/coverage@5%": 0.16796875, "calibration/ece": 0.11959374788247965, "calibration/mean_confidence": 0.4745517848151627, "calibration/prompt_uniqueness": 0.841357421875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 473.2, "completions/max_terminated_length": 473.2, "completions/mean_length": 232.755078125, "completions/mean_terminated_length": 232.755078125, "completions/min_length": 112.2, "completions/min_terminated_length": 112.2, "epoch": 0.832, "grad_norm": 0.0008559515117667615, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 888354294.0, "reward": 0.8606176733970642, "reward_std": 0.08793712109327316, "rewards/accuracy_reward": 0.54833984375, "rewards/brier_reward": 0.8110590815544129, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0024320174707099795, "rewards/frontier_coverage_1": 0.11919757276773453, "rewards/frontier_coverage_10": 0.11918442994356156, "rewards/frontier_coverage_15": 0.10204497873783111, "rewards/frontier_coverage_20": 0.07099157050251961, "rewards/frontier_coverage_25": 0.05516631901264191, "rewards/frontier_coverage_5": 0.11918965280056, "rewards/frontier_ece_reward": 0.00244655329734087, "rewards/frontier_entropy_batch_reward": -0.20545812249183654, "signal/accuracy_reward/centered_abs_mean": 0.091961669921875, "signal/accuracy_reward/group_bin_occupancy": 0.169921875, "signal/accuracy_reward/group_std_mean": 0.1235174298286438, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0459808349609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0459808349609375, "signal/advantage_abs_mean": 0.06834444552659988, "signal/advantage_pre_scale_abs_mean": 0.06834444552659988, "signal/advantage_pre_scale_std": 0.10633230209350586, "signal/advantage_std": 0.10633230209350586, "signal/brier_reward/centered_abs_mean": 0.10559364557266235, "signal/brier_reward/group_bin_occupancy": 0.8421875, "signal/brier_reward/group_std_mean": 0.13621910512447358, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013199205696582793, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013199205696582793, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.00220845362637192, "signal/frontier_aurc_reward/group_bin_occupancy": 0.730078125, "signal/frontier_aurc_reward/group_std_mean": 0.00363809815607965, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.953131890739314e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.953131890739314e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1493607133626938, "signal/frontier_coverage_1/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_1/group_std_mean": 0.1920451521873474, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026735567953437567, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026735567953437567, "signal/frontier_coverage_10/centered_abs_mean": 0.1493442475795746, "signal/frontier_coverage_10/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_10/group_std_mean": 0.1920243412256241, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026732619386166333, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026732619386166333, "signal/frontier_coverage_15/centered_abs_mean": 0.12451920211315155, "signal/frontier_coverage_15/group_bin_occupancy": 0.861328125, "signal/frontier_coverage_15/group_std_mean": 0.16057583391666413, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002228893619030714, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002228893619030714, "signal/frontier_coverage_20/centered_abs_mean": 0.07849853485822678, "signal/frontier_coverage_20/group_bin_occupancy": 0.89375, "signal/frontier_coverage_20/group_std_mean": 0.10143940895795822, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014051236677914858, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014051236677914858, "signal/frontier_coverage_25/centered_abs_mean": 0.05491392761468887, "signal/frontier_coverage_25/group_bin_occupancy": 0.93203125, "signal/frontier_coverage_25/group_std_mean": 0.06983330100774765, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009829592425376176, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009829592425376176, "signal/frontier_coverage_5/centered_abs_mean": 0.14935098588466644, "signal/frontier_coverage_5/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_5/group_std_mean": 0.19203279614448548, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026733824983239175, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026733824983239175, "signal/frontier_ece_reward/centered_abs_mean": 0.003441282361745834, "signal/frontier_ece_reward/group_bin_occupancy": 0.59765625, "signal/frontier_ece_reward/group_std_mean": 0.004348612949252128, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00043016029521822927, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00043016029521822927, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2689357101917267, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.726953125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3395772337913513, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033616963773965836, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033616963773965836, "step": 260 }, { "calibration/aurc": 0.3178738194955885, "calibration/batch_distribution_entropy": 0.9734277851333974, "calibration/batch_entropy_100bins": 0.9655052615628407, "calibration/batch_entropy_10bins": 0.9734277851333974, "calibration/batch_entropy_50bins": 0.9738673939128841, "calibration/batch_uniqueness": 0.9522003173828125, "calibration/buffer_distribution_entropy": 0.9992495195467482, "calibration/buffer_entropy_100bins": 0.9992220261544034, "calibration/buffer_entropy_10bins": 0.9992495195467482, "calibration/buffer_entropy_50bins": 0.9992710813705669, "calibration/confidence_entropy": 0.4941748172082874, "calibration/coverage@0%": 0.021875, "calibration/coverage@1%": 0.021875, "calibration/coverage@10%": 0.169140625, "calibration/coverage@15%": 0.223828125, "calibration/coverage@20%": 0.397265625, "calibration/coverage@25%": 0.471875, "calibration/coverage@30%": 0.56328125, "calibration/coverage@5%": 0.075390625, "calibration/ece": 0.13729417046243933, "calibration/mean_confidence": 0.5503775516014088, "calibration/prompt_uniqueness": 0.85205078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 760.8, "completions/max_terminated_length": 552.2, "completions/mean_length": 240.17587890625, "completions/mean_terminated_length": 240.0502502441406, "completions/min_length": 121.4, "completions/min_terminated_length": 121.4, "epoch": 0.848, "grad_norm": 0.0008649929077364504, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 905828063.0, "reward": 0.8515474200248718, "reward_std": 0.08773799389600753, "rewards/accuracy_reward": 0.53330078125, "rewards/brier_reward": 0.8009130716323852, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002978764148429036, "rewards/frontier_coverage_1": 0.11573868244886398, "rewards/frontier_coverage_10": 0.11573390364646911, "rewards/frontier_coverage_15": 0.09874581471085549, "rewards/frontier_coverage_20": 0.06614762619137764, "rewards/frontier_coverage_25": 0.04877230152487755, "rewards/frontier_coverage_5": 0.11573788076639176, "rewards/frontier_ece_reward": 0.0023759857984259726, "rewards/frontier_entropy_batch_reward": -0.20361319780349732, "signal/accuracy_reward/centered_abs_mean": 0.084564208984375, "signal/accuracy_reward/group_bin_occupancy": 0.16640625, "signal/accuracy_reward/group_std_mean": 0.1135980024933815, "signal/accuracy_reward/group_zero_std_frac": 0.66875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0422821044921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0422821044921875, "signal/advantage_abs_mean": 0.06841744184494018, "signal/advantage_pre_scale_abs_mean": 0.06841744184494018, "signal/advantage_pre_scale_std": 0.10599033236503601, "signal/advantage_std": 0.10599033236503601, "signal/brier_reward/centered_abs_mean": 0.11178396046161651, "signal/brier_reward/group_bin_occupancy": 0.857421875, "signal/brier_reward/group_std_mean": 0.14463868141174316, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013972995057702064, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013972995057702064, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027990068774670362, "signal/frontier_aurc_reward/group_bin_occupancy": 0.719140625, "signal/frontier_aurc_reward/group_std_mean": 0.00469839870929718, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.010222375858575e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.010222375858575e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14509033262729645, "signal/frontier_coverage_1/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_1/group_std_mean": 0.18849168419837953, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025971168652176857, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025971168652176857, "signal/frontier_coverage_10/centered_abs_mean": 0.14508127570152282, "signal/frontier_coverage_10/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_10/group_std_mean": 0.188480207324028, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025969548150897026, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025969548150897026, "signal/frontier_coverage_15/centered_abs_mean": 0.12304246425628662, "signal/frontier_coverage_15/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_15/group_std_mean": 0.16019290089607238, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002202460076659918, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002202460076659918, "signal/frontier_coverage_20/centered_abs_mean": 0.07703937292098999, "signal/frontier_coverage_20/group_bin_occupancy": 0.898046875, "signal/frontier_coverage_20/group_std_mean": 0.10073128789663315, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013790046563372017, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013790046563372017, "signal/frontier_coverage_25/centered_abs_mean": 0.05316209346055985, "signal/frontier_coverage_25/group_bin_occupancy": 0.931640625, "signal/frontier_coverage_25/group_std_mean": 0.06862208545207978, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009516014717519284, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009516014717519284, "signal/frontier_coverage_5/centered_abs_mean": 0.14508905708789827, "signal/frontier_coverage_5/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_5/group_std_mean": 0.18849003911018372, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002597094140946865, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002597094140946865, "signal/frontier_ece_reward/centered_abs_mean": 0.003437171783298254, "signal/frontier_ece_reward/group_bin_occupancy": 0.625390625, "signal/frontier_ece_reward/group_std_mean": 0.004384188260883093, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00042964647291228176, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00042964647291228176, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27023649513721465, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.748828125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3401765406131744, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03377956189215183, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03377956189215183, "step": 265 }, { "calibration/aurc": 0.2825008543621849, "calibration/batch_distribution_entropy": 0.9662087974973617, "calibration/batch_entropy_100bins": 0.96186889855781, "calibration/batch_entropy_10bins": 0.9662087974973617, "calibration/batch_entropy_50bins": 0.9688150730230692, "calibration/batch_uniqueness": 0.9511749267578125, "calibration/buffer_distribution_entropy": 0.9992175012534602, "calibration/buffer_entropy_100bins": 0.9991897788432713, "calibration/buffer_entropy_10bins": 0.9992175012534602, "calibration/buffer_entropy_50bins": 0.9992416855129468, "calibration/confidence_entropy": 0.49441546229639216, "calibration/coverage@0%": 0.00625, "calibration/coverage@1%": 0.00625, "calibration/coverage@10%": 0.0859375, "calibration/coverage@15%": 0.175, "calibration/coverage@20%": 0.253125, "calibration/coverage@25%": 0.408984375, "calibration/coverage@30%": 0.52109375, "calibration/coverage@5%": 0.020703125, "calibration/ece": 0.13110279802041727, "calibration/mean_confidence": 0.5927889569782829, "calibration/prompt_uniqueness": 0.85537109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 952.0, "completions/max_terminated_length": 568.8, "completions/mean_length": 247.87822265625, "completions/mean_terminated_length": 247.37486267089844, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.864, "grad_norm": 0.001017643604427576, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 923353152.0, "reward": 0.8750402927398682, "reward_std": 0.09378763735294342, "rewards/accuracy_reward": 0.59384765625, "rewards/brier_reward": 0.7912804245948791, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0026999496389180423, "rewards/frontier_coverage_1": 0.07068156786262988, "rewards/frontier_coverage_10": 0.07068105041980743, "rewards/frontier_coverage_15": 0.0655278891324997, "rewards/frontier_coverage_20": 0.05093179382383824, "rewards/frontier_coverage_25": 0.04501226842403412, "rewards/frontier_coverage_5": 0.07068156786262988, "rewards/frontier_ece_reward": 0.0017709420528262854, "rewards/frontier_entropy_batch_reward": -0.21965786516666413, "signal/accuracy_reward/centered_abs_mean": 0.095587158203125, "signal/accuracy_reward/group_bin_occupancy": 0.171484375, "signal/accuracy_reward/group_std_mean": 0.1286234974861145, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0477935791015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0477935791015625, "signal/advantage_abs_mean": 0.07281743288040161, "signal/advantage_pre_scale_abs_mean": 0.07281743288040161, "signal/advantage_pre_scale_std": 0.11024300009012222, "signal/advantage_std": 0.11024300009012222, "signal/brier_reward/centered_abs_mean": 0.11920353770256042, "signal/brier_reward/group_bin_occupancy": 0.844921875, "signal/brier_reward/group_std_mean": 0.15356789529323578, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014900442212820053, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014900442212820053, "signal/format_reward/centered_abs_mean": 0.00074462890625, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0018734002020210027, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000372314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026988452998921277, "signal/frontier_aurc_reward/group_bin_occupancy": 0.723828125, "signal/frontier_aurc_reward/group_std_mean": 0.004450180754065514, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8309330304618924e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8309330304618924e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15822483897209166, "signal/frontier_coverage_1/group_bin_occupancy": 0.8578125, "signal/frontier_coverage_1/group_std_mean": 0.20437292754650116, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028322245460003616, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028322245460003616, "signal/frontier_coverage_10/centered_abs_mean": 0.15821611285209655, "signal/frontier_coverage_10/group_bin_occupancy": 0.8578125, "signal/frontier_coverage_10/group_std_mean": 0.2043617308139801, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002832068270072341, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002832068270072341, "signal/frontier_coverage_15/centered_abs_mean": 0.13421038091182708, "signal/frontier_coverage_15/group_bin_occupancy": 0.85, "signal/frontier_coverage_15/group_std_mean": 0.17385528981685638, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002402365766465664, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002402365766465664, "signal/frontier_coverage_20/centered_abs_mean": 0.08285623341798783, "signal/frontier_coverage_20/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_20/group_std_mean": 0.10784974545240403, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014831265201792122, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014831265201792122, "signal/frontier_coverage_25/centered_abs_mean": 0.05798155665397644, "signal/frontier_coverage_25/group_bin_occupancy": 0.923828125, "signal/frontier_coverage_25/group_std_mean": 0.07409553527832032, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010378698818385601, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010378698818385601, "signal/frontier_coverage_5/centered_abs_mean": 0.15822483897209166, "signal/frontier_coverage_5/group_bin_occupancy": 0.8578125, "signal/frontier_coverage_5/group_std_mean": 0.20437292754650116, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028322245460003616, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028322245460003616, "signal/frontier_ece_reward/centered_abs_mean": 0.003631744394078851, "signal/frontier_ece_reward/group_bin_occupancy": 0.634375, "signal/frontier_ece_reward/group_std_mean": 0.004583617858588696, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00045396804925985635, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00045396804925985635, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28241761326789855, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7359375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3559215545654297, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03530220165848732, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03530220165848732, "step": 270 }, { "calibration/aurc": 0.3989955123694968, "calibration/batch_distribution_entropy": 0.983939721229347, "calibration/batch_entropy_100bins": 0.9701765594162302, "calibration/batch_entropy_10bins": 0.983939721229347, "calibration/batch_entropy_50bins": 0.9790045138186654, "calibration/batch_uniqueness": 0.953621613094267, "calibration/buffer_distribution_entropy": 0.9991278370535038, "calibration/buffer_entropy_100bins": 0.9991747826000577, "calibration/buffer_entropy_10bins": 0.9991278370535038, "calibration/buffer_entropy_50bins": 0.9992235492506977, "calibration/confidence_entropy": 0.4716516326426182, "calibration/coverage@0%": 0.005865490459882583, "calibration/coverage@1%": 0.005865490459882583, "calibration/coverage@10%": 0.008605216487279844, "calibration/coverage@15%": 0.0304955051369863, "calibration/coverage@20%": 0.04614420254403131, "calibration/coverage@25%": 0.1395272749510763, "calibration/coverage@30%": 0.2842060604207436, "calibration/coverage@5%": 0.005865490459882583, "calibration/ece": 0.12940730029672529, "calibration/mean_confidence": 0.49376591700142836, "calibration/prompt_uniqueness": 0.8365943170362904, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 1032.8, "completions/max_terminated_length": 679.8, "completions/mean_length": 250.778515625, "completions/mean_terminated_length": 250.52763977050782, "completions/min_length": 122.6, "completions/min_terminated_length": 122.6, "epoch": 0.88, "grad_norm": 0.0010307779302820563, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 941068196.0, "reward": 0.8426900625228881, "reward_std": 0.09440464824438095, "rewards/accuracy_reward": 0.51435546875, "rewards/brier_reward": 0.7943570613861084, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.003383295517414808, "rewards/frontier_coverage_1": 0.13087365329265593, "rewards/frontier_coverage_10": 0.13065045028924943, "rewards/frontier_coverage_15": 0.11363004744052888, "rewards/frontier_coverage_20": 0.07452845722436904, "rewards/frontier_coverage_25": 0.05105185955762863, "rewards/frontier_coverage_5": 0.13087365329265593, "rewards/frontier_ece_reward": 0.002461729710921645, "rewards/frontier_entropy_batch_reward": -0.20190061628818512, "signal/accuracy_reward/centered_abs_mean": 0.103082275390625, "signal/accuracy_reward/group_bin_occupancy": 0.174609375, "signal/accuracy_reward/group_std_mean": 0.13669233918190002, "signal/accuracy_reward/group_zero_std_frac": 0.603125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0515411376953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0515411376953125, "signal/advantage_abs_mean": 0.07417571395635605, "signal/advantage_pre_scale_abs_mean": 0.07417571395635605, "signal/advantage_pre_scale_std": 0.11395351439714432, "signal/advantage_std": 0.11395351439714432, "signal/brier_reward/centered_abs_mean": 0.12301892042160034, "signal/brier_reward/group_bin_occupancy": 0.8390625, "signal/brier_reward/group_std_mean": 0.15880888998508452, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015377365052700043, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015377365052700043, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0033534748945385218, "signal/frontier_aurc_reward/group_bin_occupancy": 0.69921875, "signal/frontier_aurc_reward/group_std_mean": 0.005596455931663513, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.002719528623857e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.002719528623857e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16604825258255004, "signal/frontier_coverage_1/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_1/group_std_mean": 0.21365970373153687, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00297226351685822, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00297226351685822, "signal/frontier_coverage_10/centered_abs_mean": 0.1656820148229599, "signal/frontier_coverage_10/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_10/group_std_mean": 0.21318538784980773, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029657080769538878, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029657080769538878, "signal/frontier_coverage_15/centered_abs_mean": 0.14318577647209169, "signal/frontier_coverage_15/group_bin_occupancy": 0.857421875, "signal/frontier_coverage_15/group_std_mean": 0.1846143424510956, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025630252901464702, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025630252901464702, "signal/frontier_coverage_20/centered_abs_mean": 0.0882651075720787, "signal/frontier_coverage_20/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_20/group_std_mean": 0.11416510492563248, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001579945394769311, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001579945394769311, "signal/frontier_coverage_25/centered_abs_mean": 0.05941323563456535, "signal/frontier_coverage_25/group_bin_occupancy": 0.937109375, "signal/frontier_coverage_25/group_std_mean": 0.075755076110363, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010634968522936106, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010634968522936106, "signal/frontier_coverage_5/centered_abs_mean": 0.16604825258255004, "signal/frontier_coverage_5/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_5/group_std_mean": 0.21365970373153687, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00297226351685822, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00297226351685822, "signal/frontier_ece_reward/centered_abs_mean": 0.0036736581940203907, "signal/frontier_ece_reward/group_bin_occupancy": 0.623828125, "signal/frontier_ece_reward/group_std_mean": 0.004686945211142301, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00045920727425254884, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00045920727425254884, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26177450716495515, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.740234375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33326379060745237, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032721813395619394, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032721813395619394, "step": 275 }, { "calibration/aurc": 0.34579738475043287, "calibration/batch_distribution_entropy": 0.9820375851432381, "calibration/batch_entropy_100bins": 0.9662901559276893, "calibration/batch_entropy_10bins": 0.9820375851432381, "calibration/batch_entropy_50bins": 0.9762958973616612, "calibration/batch_uniqueness": 0.9531724427664274, "calibration/buffer_distribution_entropy": 0.9989790305500732, "calibration/buffer_entropy_100bins": 0.9990994457185127, "calibration/buffer_entropy_10bins": 0.9989790305500732, "calibration/buffer_entropy_50bins": 0.9991439357595896, "calibration/confidence_entropy": 0.49993937105601816, "calibration/coverage@0%": 0.02343979329745597, "calibration/coverage@1%": 0.02343979329745597, "calibration/coverage@10%": 0.05156479329745597, "calibration/coverage@15%": 0.138674168297456, "calibration/coverage@20%": 0.2552195450097847, "calibration/coverage@25%": 0.3666271709882583, "calibration/coverage@30%": 0.4494916523972603, "calibration/coverage@5%": 0.04062729329745597, "calibration/ece": 0.1285110208907984, "calibration/mean_confidence": 0.5151410765100136, "calibration/prompt_uniqueness": 0.8424560800923517, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 760.6, "completions/max_terminated_length": 579.8, "completions/mean_length": 246.29541015625, "completions/mean_terminated_length": 246.169140625, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.896, "grad_norm": 0.000937454926315695, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 958701109.0, "reward": 0.8541659832000732, "reward_std": 0.08857241421937942, "rewards/accuracy_reward": 0.5396484375, "rewards/brier_reward": 0.7998676776885987, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002759965811856091, "rewards/frontier_coverage_1": 0.11034291237592697, "rewards/frontier_coverage_10": 0.11029749512672424, "rewards/frontier_coverage_15": 0.10030711442232132, "rewards/frontier_coverage_20": 0.06749739050865174, "rewards/frontier_coverage_25": 0.047641870379447934, "rewards/frontier_coverage_5": 0.11033990383148193, "rewards/frontier_ece_reward": 0.0017286977032199501, "rewards/frontier_entropy_batch_reward": -0.20393397510051728, "signal/accuracy_reward/centered_abs_mean": 0.0900390625, "signal/accuracy_reward/group_bin_occupancy": 0.171875, "signal/accuracy_reward/group_std_mean": 0.12443099468946457, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04501953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04501953125, "signal/advantage_abs_mean": 0.06760159730911255, "signal/advantage_pre_scale_abs_mean": 0.06760159730911255, "signal/advantage_pre_scale_std": 0.10470890700817108, "signal/advantage_std": 0.10470890700817108, "signal/brier_reward/centered_abs_mean": 0.11129257977008819, "signal/brier_reward/group_bin_occupancy": 0.850390625, "signal/brier_reward/group_std_mean": 0.14382209181785582, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013911572471261024, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013911572471261024, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023095492739230393, "signal/frontier_aurc_reward/group_bin_occupancy": 0.73359375, "signal/frontier_aurc_reward/group_std_mean": 0.0037586647551506756, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.134093142056372e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.134093142056372e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1561950832605362, "signal/frontier_coverage_1/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_1/group_std_mean": 0.20052540600299834, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027958919294178487, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027958919294178487, "signal/frontier_coverage_10/centered_abs_mean": 0.15565426647663116, "signal/frontier_coverage_10/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_10/group_std_mean": 0.199809730052948, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002786211296916008, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002786211296916008, "signal/frontier_coverage_15/centered_abs_mean": 0.13624198436737062, "signal/frontier_coverage_15/group_bin_occupancy": 0.863671875, "signal/frontier_coverage_15/group_std_mean": 0.1747460901737213, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024387314915657043, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024387314915657043, "signal/frontier_coverage_20/centered_abs_mean": 0.0824216440320015, "signal/frontier_coverage_20/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_20/group_std_mean": 0.1063196137547493, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014753472525626421, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014753472525626421, "signal/frontier_coverage_25/centered_abs_mean": 0.053366570919752124, "signal/frontier_coverage_25/group_bin_occupancy": 0.906640625, "signal/frontier_coverage_25/group_std_mean": 0.06851721107959748, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009552616043947637, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009552616043947637, "signal/frontier_coverage_5/centered_abs_mean": 0.1561916083097458, "signal/frontier_coverage_5/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_5/group_std_mean": 0.20052067935466766, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002795829763635993, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002795829763635993, "signal/frontier_ece_reward/centered_abs_mean": 0.0035572517197579147, "signal/frontier_ece_reward/group_bin_occupancy": 0.58828125, "signal/frontier_ece_reward/group_std_mean": 0.004552530776709318, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00044465646496973934, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00044465646496973934, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.276011261343956, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72734375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35015120506286623, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0345014076679945, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0345014076679945, "step": 280 }, { "calibration/aurc": 0.35540442785278614, "calibration/batch_distribution_entropy": 0.9746369661753949, "calibration/batch_entropy_100bins": 0.9632824538548672, "calibration/batch_entropy_10bins": 0.9746369661753949, "calibration/batch_entropy_50bins": 0.973053929289723, "calibration/batch_uniqueness": 0.951324462890625, "calibration/buffer_distribution_entropy": 0.9989514375360533, "calibration/buffer_entropy_100bins": 0.9990828334202781, "calibration/buffer_entropy_10bins": 0.9989514375360533, "calibration/buffer_entropy_50bins": 0.9991101689613113, "calibration/confidence_entropy": 0.5108973531224291, "calibration/coverage@0%": 0.00703125, "calibration/coverage@1%": 0.00703125, "calibration/coverage@10%": 0.090234375, "calibration/coverage@15%": 0.190234375, "calibration/coverage@20%": 0.295703125, "calibration/coverage@25%": 0.3859375, "calibration/coverage@30%": 0.460546875, "calibration/coverage@5%": 0.03515625, "calibration/ece": 0.14220309508484869, "calibration/mean_confidence": 0.4849877995867112, "calibration/prompt_uniqueness": 0.84873046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 741.0, "completions/max_terminated_length": 551.2, "completions/mean_length": 241.6802734375, "completions/mean_terminated_length": 241.55431518554687, "completions/min_length": 119.8, "completions/min_terminated_length": 119.8, "epoch": 0.912, "grad_norm": 0.0008511711494065821, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 976227211.0, "reward": 0.8543228030204773, "reward_std": 0.08962784111499786, "rewards/accuracy_reward": 0.5455078125, "rewards/brier_reward": 0.7963875532150269, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0024389707017689944, "rewards/frontier_coverage_1": 0.09579984173178673, "rewards/frontier_coverage_10": 0.09567191377282143, "rewards/frontier_coverage_15": 0.0898715415969491, "rewards/frontier_coverage_20": 0.06119959354400635, "rewards/frontier_coverage_25": 0.04600660875439644, "rewards/frontier_coverage_5": 0.09579412266612053, "rewards/frontier_ece_reward": 0.0018233929062262178, "rewards/frontier_entropy_batch_reward": -0.2134964257478714, "signal/accuracy_reward/centered_abs_mean": 0.0875244140625, "signal/accuracy_reward/group_bin_occupancy": 0.171875, "signal/accuracy_reward/group_std_mean": 0.12260164320468903, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04376220703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04376220703125, "signal/advantage_abs_mean": 0.06862077489495277, "signal/advantage_pre_scale_abs_mean": 0.06862077489495277, "signal/advantage_pre_scale_std": 0.10393733531236649, "signal/advantage_std": 0.10393733531236649, "signal/brier_reward/centered_abs_mean": 0.11736378222703933, "signal/brier_reward/group_bin_occupancy": 0.855859375, "signal/brier_reward/group_std_mean": 0.15200705230236053, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014670472778379916, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014670472778379916, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021737504750490187, "signal/frontier_aurc_reward/group_bin_occupancy": 0.711328125, "signal/frontier_aurc_reward/group_std_mean": 0.003746302565559745, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.891013257089071e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.891013257089071e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16225638389587402, "signal/frontier_coverage_1/group_bin_occupancy": 0.875, "signal/frontier_coverage_1/group_std_mean": 0.20896171331405639, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029043891932815312, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029043891932815312, "signal/frontier_coverage_10/centered_abs_mean": 0.16155781745910644, "signal/frontier_coverage_10/group_bin_occupancy": 0.874609375, "signal/frontier_coverage_10/group_std_mean": 0.20806140899658204, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028918846510350704, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028918846510350704, "signal/frontier_coverage_15/centered_abs_mean": 0.14396750777959824, "signal/frontier_coverage_15/group_bin_occupancy": 0.871875, "signal/frontier_coverage_15/group_std_mean": 0.18532683253288268, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025770182721316813, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025770182721316813, "signal/frontier_coverage_20/centered_abs_mean": 0.0858291208744049, "signal/frontier_coverage_20/group_bin_occupancy": 0.9, "signal/frontier_coverage_20/group_std_mean": 0.11078901290893554, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015363412443548442, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015363412443548442, "signal/frontier_coverage_25/centered_abs_mean": 0.05511407479643822, "signal/frontier_coverage_25/group_bin_occupancy": 0.9265625, "signal/frontier_coverage_25/group_std_mean": 0.07155242562294006, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009865418775007128, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009865418775007128, "signal/frontier_coverage_5/centered_abs_mean": 0.16224364936351776, "signal/frontier_coverage_5/group_bin_occupancy": 0.875, "signal/frontier_coverage_5/group_std_mean": 0.20894473493099214, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029041611589491366, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029041611589491366, "signal/frontier_ece_reward/centered_abs_mean": 0.003552433103322983, "signal/frontier_ece_reward/group_bin_occupancy": 0.576953125, "signal/frontier_ece_reward/group_std_mean": 0.004548510629683733, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00044405413791537285, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00044405413791537285, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2813218832015991, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.743359375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3521720230579376, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03516523540019989, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03516523540019989, "step": 285 }, { "calibration/aurc": 0.3877844515793165, "calibration/batch_distribution_entropy": 0.9866501795107988, "calibration/batch_entropy_100bins": 0.9728511672753379, "calibration/batch_entropy_10bins": 0.9866501795107988, "calibration/batch_entropy_50bins": 0.9808825904754377, "calibration/batch_uniqueness": 0.954571533203125, "calibration/buffer_distribution_entropy": 0.9990166406847901, "calibration/buffer_entropy_100bins": 0.9991172063970971, "calibration/buffer_entropy_10bins": 0.9990166406847901, "calibration/buffer_entropy_50bins": 0.9991579855854283, "calibration/confidence_entropy": 0.49825895928661373, "calibration/coverage@0%": 0.006640625, "calibration/coverage@1%": 0.006640625, "calibration/coverage@10%": 0.0171875, "calibration/coverage@15%": 0.0203125, "calibration/coverage@20%": 0.083984375, "calibration/coverage@25%": 0.213671875, "calibration/coverage@30%": 0.295703125, "calibration/coverage@5%": 0.006640625, "calibration/ece": 0.1253656157246842, "calibration/mean_confidence": 0.5123772352862292, "calibration/prompt_uniqueness": 0.8548828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 680.2, "completions/max_terminated_length": 468.0, "completions/mean_length": 233.83251953125, "completions/mean_terminated_length": 233.70564880371094, "completions/min_length": 112.6, "completions/min_terminated_length": 112.6, "epoch": 0.928, "grad_norm": 0.0007553471950814128, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 993648472.0, "reward": 0.8464865446090698, "reward_std": 0.08689890056848526, "rewards/accuracy_reward": 0.52626953125, "rewards/brier_reward": 0.788770604133606, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0032333484385162594, "rewards/frontier_coverage_1": 0.11250228732824326, "rewards/frontier_coverage_10": 0.11221933662891388, "rewards/frontier_coverage_15": 0.1025825411081314, "rewards/frontier_coverage_20": 0.06942355185747147, "rewards/frontier_coverage_25": 0.053556407988071444, "rewards/frontier_coverage_5": 0.11250228732824326, "rewards/frontier_ece_reward": 0.0022580260410904884, "rewards/frontier_entropy_batch_reward": -0.2031704902648926, "signal/accuracy_reward/centered_abs_mean": 0.084185791015625, "signal/accuracy_reward/group_bin_occupancy": 0.16640625, "signal/accuracy_reward/group_std_mean": 0.1131853774189949, "signal/accuracy_reward/group_zero_std_frac": 0.66875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0420928955078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0420928955078125, "signal/advantage_abs_mean": 0.06731941103935242, "signal/advantage_pre_scale_abs_mean": 0.06731941103935242, "signal/advantage_pre_scale_std": 0.10365704894065857, "signal/advantage_std": 0.10365704894065857, "signal/brier_reward/centered_abs_mean": 0.1226132184267044, "signal/brier_reward/group_bin_occupancy": 0.843359375, "signal/brier_reward/group_std_mean": 0.15720722079277039, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01532665230333805, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01532665230333805, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.003174196882173419, "signal/frontier_aurc_reward/group_bin_occupancy": 0.71796875, "signal/frontier_aurc_reward/group_std_mean": 0.005680124741047621, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.681812253897078e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.681812253897078e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15731285214424134, "signal/frontier_coverage_1/group_bin_occupancy": 0.86640625, "signal/frontier_coverage_1/group_std_mean": 0.20189858376979827, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028158999979496003, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028158999979496003, "signal/frontier_coverage_10/centered_abs_mean": 0.15667309165000914, "signal/frontier_coverage_10/group_bin_occupancy": 0.86640625, "signal/frontier_coverage_10/group_std_mean": 0.2010861098766327, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002804448362439871, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002804448362439871, "signal/frontier_coverage_15/centered_abs_mean": 0.13898983597755432, "signal/frontier_coverage_15/group_bin_occupancy": 0.859375, "signal/frontier_coverage_15/group_std_mean": 0.1787546008825302, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024879179894924165, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024879179894924165, "signal/frontier_coverage_20/centered_abs_mean": 0.08620916604995728, "signal/frontier_coverage_20/group_bin_occupancy": 0.887890625, "signal/frontier_coverage_20/group_std_mean": 0.11133407950401306, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015431440435349942, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015431440435349942, "signal/frontier_coverage_25/centered_abs_mean": 0.059717252105474475, "signal/frontier_coverage_25/group_bin_occupancy": 0.925, "signal/frontier_coverage_25/group_std_mean": 0.07652692198753357, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010689388029277325, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010689388029277325, "signal/frontier_coverage_5/centered_abs_mean": 0.15731285214424134, "signal/frontier_coverage_5/group_bin_occupancy": 0.86640625, "signal/frontier_coverage_5/group_std_mean": 0.20189858376979827, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028158999979496003, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028158999979496003, "signal/frontier_ece_reward/centered_abs_mean": 0.0038206770084798338, "signal/frontier_ece_reward/group_bin_occupancy": 0.584765625, "signal/frontier_ece_reward/group_std_mean": 0.004873193427920341, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004775846260599792, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004775846260599792, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.266440337896347, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.735546875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34233739972114563, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03330504223704338, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03330504223704338, "step": 290 }, { "calibration/aurc": 0.2447793406319588, "calibration/batch_distribution_entropy": 0.9874497094930328, "calibration/batch_entropy_100bins": 0.9716391593970762, "calibration/batch_entropy_10bins": 0.9874497094930328, "calibration/batch_entropy_50bins": 0.9810979705902847, "calibration/batch_uniqueness": 0.9539276123046875, "calibration/buffer_distribution_entropy": 0.9989772205264174, "calibration/buffer_entropy_100bins": 0.9990865812417503, "calibration/buffer_entropy_10bins": 0.9989772205264174, "calibration/buffer_entropy_50bins": 0.9991286661644855, "calibration/confidence_entropy": 0.5002220328331736, "calibration/coverage@0%": 0.03515625, "calibration/coverage@1%": 0.03515625, "calibration/coverage@10%": 0.261328125, "calibration/coverage@15%": 0.34765625, "calibration/coverage@20%": 0.43671875, "calibration/coverage@25%": 0.531640625, "calibration/coverage@30%": 0.608984375, "calibration/coverage@5%": 0.149609375, "calibration/ece": 0.09089448170709025, "calibration/mean_confidence": 0.4852771091860312, "calibration/prompt_uniqueness": 0.844384765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 690.6, "completions/max_terminated_length": 481.0, "completions/mean_length": 231.37978515625, "completions/mean_terminated_length": 231.25249328613282, "completions/min_length": 114.2, "completions/min_terminated_length": 114.2, "epoch": 0.944, "grad_norm": 0.0009843307780101895, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 1010993225.0, "reward": 0.8495798826217651, "reward_std": 0.0940755695104599, "rewards/accuracy_reward": 0.53564453125, "rewards/brier_reward": 0.789400064945221, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002888223179616034, "rewards/frontier_coverage_1": 0.10932025760412216, "rewards/frontier_coverage_10": 0.10875759422779083, "rewards/frontier_coverage_15": 0.10018027424812317, "rewards/frontier_coverage_20": 0.06886630058288574, "rewards/frontier_coverage_25": 0.049372269213199614, "rewards/frontier_coverage_5": 0.10932025760412216, "rewards/frontier_ece_reward": 0.0021821844391524793, "rewards/frontier_entropy_batch_reward": -0.2144875019788742, "signal/accuracy_reward/centered_abs_mean": 0.105511474609375, "signal/accuracy_reward/group_bin_occupancy": 0.177734375, "signal/accuracy_reward/group_std_mean": 0.1422753319144249, "signal/accuracy_reward/group_zero_std_frac": 0.578125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0527557373046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0527557373046875, "signal/advantage_abs_mean": 0.07274120301008224, "signal/advantage_pre_scale_abs_mean": 0.07274120301008224, "signal/advantage_pre_scale_std": 0.11103657335042953, "signal/advantage_std": 0.11103657335042953, "signal/brier_reward/centered_abs_mean": 0.11999978870153427, "signal/brier_reward/group_bin_occupancy": 0.855859375, "signal/brier_reward/group_std_mean": 0.153327140212059, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014999973587691784, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014999973587691784, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025067355018109083, "signal/frontier_aurc_reward/group_bin_occupancy": 0.728125, "signal/frontier_aurc_reward/group_std_mean": 0.004060426913201809, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.4870566489407794e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.4870566489407794e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1733368456363678, "signal/frontier_coverage_1/group_bin_occupancy": 0.865625, "signal/frontier_coverage_1/group_std_mean": 0.2196456164121628, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031027294229716063, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031027294229716063, "signal/frontier_coverage_10/centered_abs_mean": 0.1724557787179947, "signal/frontier_coverage_10/group_bin_occupancy": 0.865625, "signal/frontier_coverage_10/group_std_mean": 0.2185318350791931, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030869582667946817, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030869582667946817, "signal/frontier_coverage_15/centered_abs_mean": 0.15549071431159972, "signal/frontier_coverage_15/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_15/group_std_mean": 0.19710105359554292, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027832836378365753, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027832836378365753, "signal/frontier_coverage_20/centered_abs_mean": 0.09377928972244262, "signal/frontier_coverage_20/group_bin_occupancy": 0.887890625, "signal/frontier_coverage_20/group_std_mean": 0.11911326348781585, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001678649242967367, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001678649242967367, "signal/frontier_coverage_25/centered_abs_mean": 0.05974511280655861, "signal/frontier_coverage_25/group_bin_occupancy": 0.9234375, "signal/frontier_coverage_25/group_std_mean": 0.07545108199119568, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010694375028833746, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010694375028833746, "signal/frontier_coverage_5/centered_abs_mean": 0.1733368456363678, "signal/frontier_coverage_5/group_bin_occupancy": 0.865625, "signal/frontier_coverage_5/group_std_mean": 0.2196456164121628, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031027294229716063, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031027294229716063, "signal/frontier_ece_reward/centered_abs_mean": 0.0038582887034863235, "signal/frontier_ece_reward/group_bin_occupancy": 0.576953125, "signal/frontier_ece_reward/group_std_mean": 0.004859161656349897, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00048228608793579044, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00048228608793579044, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.277313631772995, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.736328125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35219224691390993, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034664203971624376, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034664203971624376, "step": 295 }, { "calibration/aurc": 0.31620081263607647, "calibration/batch_distribution_entropy": 0.9888025135738457, "calibration/batch_entropy_100bins": 0.9754164842245048, "calibration/batch_entropy_10bins": 0.9888025135738457, "calibration/batch_entropy_50bins": 0.9829728236087112, "calibration/batch_uniqueness": 0.9548370361328125, "calibration/buffer_distribution_entropy": 0.9990489479151083, "calibration/buffer_entropy_100bins": 0.9991271919139264, "calibration/buffer_entropy_10bins": 0.9990489479151083, "calibration/buffer_entropy_50bins": 0.9991661909354466, "calibration/confidence_entropy": 0.4907980481305323, "calibration/coverage@0%": 0.01171875, "calibration/coverage@1%": 0.01171875, "calibration/coverage@10%": 0.14296875, "calibration/coverage@15%": 0.1796875, "calibration/coverage@20%": 0.319140625, "calibration/coverage@25%": 0.4140625, "calibration/coverage@30%": 0.48125, "calibration/coverage@5%": 0.07578125, "calibration/ece": 0.14081409067623374, "calibration/mean_confidence": 0.5085684695833261, "calibration/prompt_uniqueness": 0.84306640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 750.4, "completions/max_terminated_length": 527.8, "completions/mean_length": 230.4478515625, "completions/mean_terminated_length": 230.19272155761718, "completions/min_length": 121.6, "completions/min_terminated_length": 121.6, "epoch": 0.96, "grad_norm": 0.0007394987624138594, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 1028293331.0, "reward": 0.8490480065345765, "reward_std": 0.0807856947183609, "rewards/accuracy_reward": 0.5232421875, "rewards/brier_reward": 0.805585753917694, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002769366092979908, "rewards/frontier_coverage_1": 0.1345707431435585, "rewards/frontier_coverage_10": 0.13391480296850206, "rewards/frontier_coverage_15": 0.12401831150054932, "rewards/frontier_coverage_20": 0.08151039481163025, "rewards/frontier_coverage_25": 0.05505374222993851, "rewards/frontier_coverage_5": 0.13456859886646272, "rewards/frontier_ece_reward": 0.0024473052471876144, "rewards/frontier_entropy_batch_reward": -0.2024726927280426, "signal/accuracy_reward/centered_abs_mean": 0.07801513671875, "signal/accuracy_reward/group_bin_occupancy": 0.165625, "signal/accuracy_reward/group_std_mean": 0.10689240992069245, "signal/accuracy_reward/group_zero_std_frac": 0.675, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039007568359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.039007568359375, "signal/advantage_abs_mean": 0.0619741216301918, "signal/advantage_pre_scale_abs_mean": 0.0619741216301918, "signal/advantage_pre_scale_std": 0.09630223214626313, "signal/advantage_std": 0.09630223214626313, "signal/brier_reward/centered_abs_mean": 0.11064407974481583, "signal/brier_reward/group_bin_occupancy": 0.850390625, "signal/brier_reward/group_std_mean": 0.14350316524505616, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013830509968101978, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013830509968101978, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022713606245815753, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7234375, "signal/frontier_aurc_reward/group_std_mean": 0.0036464712116867303, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0657354111317547e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0657354111317547e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1542697876691818, "signal/frontier_coverage_1/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_1/group_std_mean": 0.19899408221244813, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027614288963377477, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027614288963377477, "signal/frontier_coverage_10/centered_abs_mean": 0.15353223979473113, "signal/frontier_coverage_10/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_10/group_std_mean": 0.19805727005004883, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002748226933181286, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002748226933181286, "signal/frontier_coverage_15/centered_abs_mean": 0.14255075454711913, "signal/frontier_coverage_15/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_15/group_std_mean": 0.18408787548542022, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025516584049910308, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025516584049910308, "signal/frontier_coverage_20/centered_abs_mean": 0.08376922607421874, "signal/frontier_coverage_20/group_bin_occupancy": 0.880859375, "signal/frontier_coverage_20/group_std_mean": 0.10870558023452759, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014994690660387277, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014994690660387277, "signal/frontier_coverage_25/centered_abs_mean": 0.05670462995767593, "signal/frontier_coverage_25/group_bin_occupancy": 0.92265625, "signal/frontier_coverage_25/group_std_mean": 0.07235517054796219, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010150128742679953, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010150128742679953, "signal/frontier_coverage_5/centered_abs_mean": 0.15426267683506012, "signal/frontier_coverage_5/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_5/group_std_mean": 0.1989847391843796, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00276130186393857, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00276130186393857, "signal/frontier_ece_reward/centered_abs_mean": 0.003688620775938034, "signal/frontier_ece_reward/group_bin_occupancy": 0.5734375, "signal/frontier_ece_reward/group_std_mean": 0.004731486923992634, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00046107759699225425, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00046107759699225425, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26567680239677427, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72421875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34176658391952514, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033209600299596784, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033209600299596784, "step": 300 }, { "epoch": 0.96, "eval_calibration/aurc": 0.4157009406435348, "eval_calibration/batch_distribution_entropy": 0.9144449702126978, "eval_calibration/batch_entropy_100bins": 0.6990601166122136, "eval_calibration/batch_entropy_10bins": 0.9144449702126978, "eval_calibration/batch_entropy_50bins": 0.7689095013084135, "eval_calibration/batch_uniqueness": 0.8984375, "eval_calibration/buffer_distribution_entropy": 0.9989272187954801, "eval_calibration/buffer_entropy_100bins": 0.9990825487296975, "eval_calibration/buffer_entropy_10bins": 0.9989272187954801, "eval_calibration/buffer_entropy_50bins": 0.9991098135800613, "eval_calibration/confidence_entropy": 0.48430149279421963, "eval_calibration/coverage@0%": 0.09375, "eval_calibration/coverage@1%": 0.09375, "eval_calibration/coverage@10%": 0.09375, "eval_calibration/coverage@15%": 0.1640625, "eval_calibration/coverage@20%": 0.2578125, "eval_calibration/coverage@25%": 0.296875, "eval_calibration/coverage@30%": 0.328125, "eval_calibration/coverage@5%": 0.09375, "eval_calibration/ece": 0.17053071360991148, "eval_calibration/mean_confidence": 0.4246910924677646, "eval_calibration/prompt_uniqueness": 0.8984375, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 374.75, "eval_completions/max_terminated_length": 374.75, "eval_completions/mean_length": 230.63119506835938, "eval_completions/mean_terminated_length": 230.63119506835938, "eval_completions/min_length": 139.0, "eval_completions/min_terminated_length": 139.0, "eval_loss": 0.0, "eval_num_tokens": 1028293331.0, "eval_reward": 0.710255429148674, "eval_reward_std": 0.22263594716787338, "eval_rewards/accuracy_reward": 0.43359375, "eval_rewards/brier_reward": 0.8085650652647018, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0028918907046318054, "eval_rewards/frontier_coverage_1": 0.19964230805635452, "eval_rewards/frontier_coverage_10": 0.19866662845015526, "eval_rewards/frontier_coverage_15": 0.184698436409235, "eval_rewards/frontier_coverage_20": 0.1101516131311655, "eval_rewards/frontier_coverage_25": 0.0620901882648468, "eval_rewards/frontier_coverage_5": 0.1996377371251583, "eval_rewards/frontier_ece_reward": 0.0027777274372056127, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 20.2818, "eval_samples_per_second": 24.653, "eval_signal/accuracy_reward/centered_abs_mean": 0.47607421875, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4955107420682907, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.238037109375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.238037109375, "eval_signal/advantage_abs_mean": 0.20791196078062057, "eval_signal/advantage_pre_scale_abs_mean": 0.20791196078062057, "eval_signal/advantage_pre_scale_std": 0.22018880769610405, "eval_signal/advantage_std": 0.22018880769610405, "eval_signal/brier_reward/centered_abs_mean": 0.18127229064702988, "eval_signal/brier_reward/group_bin_occupancy": 0.8984375, "eval_signal/brier_reward/group_std_mean": 0.22954664751887321, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022659036330878735, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.022659036330878735, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.00344617961673066, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.640625, "eval_signal/frontier_aurc_reward/group_std_mean": 0.006800854112952948, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.168661366245942e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.168661366245942e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3764362931251526, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_1/group_std_mean": 0.4541953206062317, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006738209398463368, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006738209398463368, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.374702051281929, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_10/group_std_mean": 0.4522312879562378, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006707166787236929, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006707166787236929, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.34953027218580246, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_15/group_std_mean": 0.4235554412007332, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006256591761484742, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006256591761484742, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.19098489359021187, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9453125, "eval_signal/frontier_coverage_20/group_std_mean": 0.23890389502048492, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003418629406951368, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003418629406951368, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.0917560514062643, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_25/group_std_mean": 0.11645574681460857, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016424332570750266, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016424332570750266, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3764154985547066, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_5/group_std_mean": 0.45417140424251556, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0067378373350948095, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0067378373350948095, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.005336694768629968, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9296875, "eval_signal/frontier_ece_reward/group_std_mean": 0.006381870131008327, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000667086846078746, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000667086846078746, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.125, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.197, "step": 300 }, { "calibration/aurc": 0.26110004370907136, "calibration/batch_distribution_entropy": 0.9663232191264044, "calibration/batch_entropy_100bins": 0.9602856930043288, "calibration/batch_entropy_10bins": 0.9663232191264044, "calibration/batch_entropy_50bins": 0.9700703532954462, "calibration/batch_uniqueness": 0.9515728882667596, "calibration/buffer_distribution_entropy": 0.9989411083119799, "calibration/buffer_entropy_100bins": 0.9990920989566723, "calibration/buffer_entropy_10bins": 0.9989411083119799, "calibration/buffer_entropy_50bins": 0.9991269827910889, "calibration/confidence_entropy": 0.5043553017329548, "calibration/coverage@0%": 0.03242340386497065, "calibration/coverage@1%": 0.03242340386497065, "calibration/coverage@10%": 0.19649201932485322, "calibration/coverage@15%": 0.3742263943248532, "calibration/coverage@20%": 0.47579424535225046, "calibration/coverage@25%": 0.5375168175146772, "calibration/coverage@30%": 0.6078407840019568, "calibration/coverage@5%": 0.08125152886497064, "calibration/ece": 0.1224501044294222, "calibration/mean_confidence": 0.5046927209883952, "calibration/prompt_uniqueness": 0.8447966290810353, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 923.2, "completions/max_terminated_length": 544.6, "completions/mean_length": 231.37236328125, "completions/mean_terminated_length": 230.86200561523438, "completions/min_length": 116.8, "completions/min_terminated_length": 116.8, "epoch": 0.976, "grad_norm": 0.0011766533134505153, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 1045523704.0, "reward": 0.8560734391212463, "reward_std": 0.08937492370605468, "rewards/accuracy_reward": 0.54404296875, "rewards/brier_reward": 0.8007814288139343, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.0027466853614896538, "rewards/frontier_coverage_1": 0.11451977603137493, "rewards/frontier_coverage_10": 0.11409206595271826, "rewards/frontier_coverage_15": 0.10787402391433716, "rewards/frontier_coverage_20": 0.07268583029508591, "rewards/frontier_coverage_25": 0.052094388753175735, "rewards/frontier_coverage_5": 0.11450284756720067, "rewards/frontier_ece_reward": 0.0022714813821949065, "rewards/frontier_entropy_batch_reward": -0.21035043001174927, "signal/accuracy_reward/centered_abs_mean": 0.094769287109375, "signal/accuracy_reward/group_bin_occupancy": 0.1703125, "signal/accuracy_reward/group_std_mean": 0.1257157877087593, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0473846435546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0473846435546875, "signal/advantage_abs_mean": 0.06940693110227585, "signal/advantage_pre_scale_abs_mean": 0.06940693110227585, "signal/advantage_pre_scale_std": 0.10633570104837417, "signal/advantage_std": 0.10633570104837417, "signal/brier_reward/centered_abs_mean": 0.11308815479278564, "signal/brier_reward/group_bin_occupancy": 0.853125, "signal/brier_reward/group_std_mean": 0.1454048365354538, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014136019349098205, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014136019349098205, "signal/format_reward/centered_abs_mean": 0.001123046875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.0029782545287162067, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005615234375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0005615234375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023753143846988677, "signal/frontier_aurc_reward/group_bin_occupancy": 0.725390625, "signal/frontier_aurc_reward/group_std_mean": 0.003789714723825455, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.251812424627133e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.251812424627133e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16219232231378555, "signal/frontier_coverage_1/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_1/group_std_mean": 0.20617010891437532, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029032424092292784, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029032424092292784, "signal/frontier_coverage_10/centered_abs_mean": 0.16142708957195281, "signal/frontier_coverage_10/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_10/group_std_mean": 0.2052207589149475, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028895447496324776, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028895447496324776, "signal/frontier_coverage_15/centered_abs_mean": 0.1506349816918373, "signal/frontier_coverage_15/group_bin_occupancy": 0.866015625, "signal/frontier_coverage_15/group_std_mean": 0.19179299771785735, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026963659562170505, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026963659562170505, "signal/frontier_coverage_20/centered_abs_mean": 0.08505127876996994, "signal/frontier_coverage_20/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_20/group_std_mean": 0.1089501440525055, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015224177855998277, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015224177855998277, "signal/frontier_coverage_25/centered_abs_mean": 0.05604914203286171, "signal/frontier_coverage_25/group_bin_occupancy": 0.93359375, "signal/frontier_coverage_25/group_std_mean": 0.07139091566205025, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001003279653377831, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001003279653377831, "signal/frontier_coverage_5/centered_abs_mean": 0.16217613518238067, "signal/frontier_coverage_5/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_5/group_std_mean": 0.20614968240261078, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029029527213424444, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029029527213424444, "signal/frontier_ece_reward/centered_abs_mean": 0.0038645747117698193, "signal/frontier_ece_reward/group_bin_occupancy": 0.58828125, "signal/frontier_ece_reward/group_std_mean": 0.004852784611284733, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004830718389712274, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004830718389712274, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2707963943481445, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7171875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3448775112628937, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033849549293518064, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033849549293518064, "step": 305 }, { "calibration/aurc": 0.3658527640919357, "calibration/batch_distribution_entropy": 0.9803335563539537, "calibration/batch_entropy_100bins": 0.9691597495026512, "calibration/batch_entropy_10bins": 0.9803335563539537, "calibration/batch_entropy_50bins": 0.9781998927735055, "calibration/batch_uniqueness": 0.953106689453125, "calibration/buffer_distribution_entropy": 0.9989034102433759, "calibration/buffer_entropy_100bins": 0.9990778744277339, "calibration/buffer_entropy_10bins": 0.9989034102433759, "calibration/buffer_entropy_50bins": 0.9991183500383848, "calibration/confidence_entropy": 0.4943518091239916, "calibration/coverage@0%": 0.016015625, "calibration/coverage@1%": 0.016015625, "calibration/coverage@10%": 0.06015625, "calibration/coverage@15%": 0.10859375, "calibration/coverage@20%": 0.154296875, "calibration/coverage@25%": 0.197265625, "calibration/coverage@30%": 0.398046875, "calibration/coverage@5%": 0.020703125, "calibration/ece": 0.1389597168024852, "calibration/mean_confidence": 0.46682059475391463, "calibration/prompt_uniqueness": 0.8416015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 463.6, "completions/max_terminated_length": 463.6, "completions/mean_length": 231.00029296875, "completions/mean_terminated_length": 231.00029296875, "completions/min_length": 127.2, "completions/min_terminated_length": 127.2, "epoch": 0.992, "grad_norm": 0.0008861988899298012, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 1063017627.0, "reward": 0.8441213011741638, "reward_std": 0.08615548759698868, "rewards/accuracy_reward": 0.52431640625, "rewards/brier_reward": 0.8000134348869323, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002819360885769129, "rewards/frontier_coverage_1": 0.1251816540956497, "rewards/frontier_coverage_10": 0.12461385577917099, "rewards/frontier_coverage_15": 0.11719222217798234, "rewards/frontier_coverage_20": 0.07521957084536553, "rewards/frontier_coverage_25": 0.05361300930380821, "rewards/frontier_coverage_5": 0.1251749500632286, "rewards/frontier_ece_reward": 0.0021149621577933432, "rewards/frontier_entropy_batch_reward": -0.2345559537410736, "signal/accuracy_reward/centered_abs_mean": 0.082110595703125, "signal/accuracy_reward/group_bin_occupancy": 0.166796875, "signal/accuracy_reward/group_std_mean": 0.11262907832860947, "signal/accuracy_reward/group_zero_std_frac": 0.665625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0410552978515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0410552978515625, "signal/advantage_abs_mean": 0.06661349236965179, "signal/advantage_pre_scale_abs_mean": 0.06661349236965179, "signal/advantage_pre_scale_std": 0.1012403666973114, "signal/advantage_std": 0.1012403666973114, "signal/brier_reward/centered_abs_mean": 0.10932144820690155, "signal/brier_reward/group_bin_occupancy": 0.855859375, "signal/brier_reward/group_std_mean": 0.13959217667579651, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013665181025862694, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013665181025862694, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023461440578103065, "signal/frontier_aurc_reward/group_bin_occupancy": 0.721875, "signal/frontier_aurc_reward/group_std_mean": 0.0038652042858302593, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1995976789621634e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1995976789621634e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15163334608078002, "signal/frontier_coverage_1/group_bin_occupancy": 0.866015625, "signal/frontier_coverage_1/group_std_mean": 0.19423333406448365, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027142366860061886, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027142366860061886, "signal/frontier_coverage_10/centered_abs_mean": 0.15079353153705596, "signal/frontier_coverage_10/group_bin_occupancy": 0.865625, "signal/frontier_coverage_10/group_std_mean": 0.1931879073381424, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026992041151970626, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026992041151970626, "signal/frontier_coverage_15/centered_abs_mean": 0.1394236296415329, "signal/frontier_coverage_15/group_bin_occupancy": 0.859375, "signal/frontier_coverage_15/group_std_mean": 0.17897272109985352, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002495682844892144, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002495682844892144, "signal/frontier_coverage_20/centered_abs_mean": 0.07920315265655517, "signal/frontier_coverage_20/group_bin_occupancy": 0.889453125, "signal/frontier_coverage_20/group_std_mean": 0.10239728689193725, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014177364064380527, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014177364064380527, "signal/frontier_coverage_25/centered_abs_mean": 0.05471629798412323, "signal/frontier_coverage_25/group_bin_occupancy": 0.92109375, "signal/frontier_coverage_25/group_std_mean": 0.07026181817054748, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009794216603040695, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009794216603040695, "signal/frontier_coverage_5/centered_abs_mean": 0.1516157403588295, "signal/frontier_coverage_5/group_bin_occupancy": 0.866015625, "signal/frontier_coverage_5/group_std_mean": 0.1942117065191269, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002713921666145325, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002713921666145325, "signal/frontier_ece_reward/centered_abs_mean": 0.003750180173665285, "signal/frontier_ece_reward/group_bin_occupancy": 0.585546875, "signal/frontier_ece_reward/group_std_mean": 0.0047975870780646804, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004687725217081606, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004687725217081606, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29427412152290344, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.727734375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36567636132240294, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03678426519036293, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03678426519036293, "step": 310 }, { "calibration/aurc": 0.27186836607159154, "calibration/batch_distribution_entropy": 0.9591700570502693, "calibration/batch_entropy_100bins": 0.9570191564453638, "calibration/batch_entropy_10bins": 0.9591700570502693, "calibration/batch_entropy_50bins": 0.963697706521873, "calibration/batch_uniqueness": 0.9496231079101562, "calibration/buffer_distribution_entropy": 0.9989876036497771, "calibration/buffer_entropy_100bins": 0.9991295514741796, "calibration/buffer_entropy_10bins": 0.9989876036497771, "calibration/buffer_entropy_50bins": 0.9991822719110717, "calibration/confidence_entropy": 0.48186544687195904, "calibration/coverage@0%": 0.02734375, "calibration/coverage@1%": 0.02734375, "calibration/coverage@10%": 0.0556640625, "calibration/coverage@15%": 0.08203125, "calibration/coverage@20%": 0.2568359375, "calibration/coverage@25%": 0.587890625, "calibration/coverage@30%": 0.7333984375, "calibration/coverage@5%": 0.0341796875, "calibration/ece": 0.16727483770900242, "calibration/mean_confidence": 0.5957875381649225, "calibration/prompt_uniqueness": 0.8248291015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 421.5, "completions/max_terminated_length": 421.5, "completions/mean_length": 230.24088287353516, "completions/mean_terminated_length": 230.24088287353516, "completions/min_length": 128.5, "completions/min_terminated_length": 128.5, "epoch": 0.9984, "num_tokens": 1069964559.0, "reward": 0.8514951169490814, "reward_std": 0.09036770090460777, "rewards/accuracy_reward": 0.549072265625, "rewards/brier_reward": 0.7759084403514862, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002963867736980319, "rewards/frontier_coverage_1": 0.07437552884221077, "rewards/frontier_coverage_10": 0.07389985024929047, "rewards/frontier_coverage_15": 0.06876883283257484, "rewards/frontier_coverage_20": 0.04556947015225887, "rewards/frontier_coverage_25": 0.03713721036911011, "rewards/frontier_coverage_5": 0.0743844173848629, "rewards/frontier_ece_reward": 0.001525860745459795, "rewards/frontier_entropy_batch_reward": -0.21491432189941406, "signal/accuracy_reward/centered_abs_mean": 0.0856781005859375, "signal/accuracy_reward/group_bin_occupancy": 0.1708984375, "signal/accuracy_reward/group_std_mean": 0.11928322166204453, "signal/accuracy_reward/group_zero_std_frac": 0.6328125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04283905029296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04283905029296875, "signal/advantage_abs_mean": 0.0704660713672638, "signal/advantage_pre_scale_abs_mean": 0.0704660713672638, "signal/advantage_pre_scale_std": 0.10723469033837318, "signal/advantage_std": 0.10723469033837318, "signal/brier_reward/centered_abs_mean": 0.11325568333268166, "signal/brier_reward/group_bin_occupancy": 0.880859375, "signal/brier_reward/group_std_mean": 0.1432887762784958, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014156960416585207, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014156960416585207, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002491934224963188, "signal/frontier_aurc_reward/group_bin_occupancy": 0.736328125, "signal/frontier_aurc_reward/group_std_mean": 0.0038100657984614372, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.460562558961101e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.460562558961101e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14216963946819305, "signal/frontier_coverage_1/group_bin_occupancy": 0.8759765625, "signal/frontier_coverage_1/group_std_mean": 0.1839248612523079, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025448364904150367, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025448364904150367, "signal/frontier_coverage_10/centered_abs_mean": 0.14118493348360062, "signal/frontier_coverage_10/group_bin_occupancy": 0.8779296875, "signal/frontier_coverage_10/group_std_mean": 0.18262220919132233, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002527210279367864, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002527210279367864, "signal/frontier_coverage_15/centered_abs_mean": 0.13103638216853142, "signal/frontier_coverage_15/group_bin_occupancy": 0.8720703125, "signal/frontier_coverage_15/group_std_mean": 0.16926757991313934, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002345551154576242, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002345551154576242, "signal/frontier_coverage_20/centered_abs_mean": 0.07273482158780098, "signal/frontier_coverage_20/group_bin_occupancy": 0.8798828125, "signal/frontier_coverage_20/group_std_mean": 0.09434954449534416, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00130195333622396, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00130195333622396, "signal/frontier_coverage_25/centered_abs_mean": 0.048997994512319565, "signal/frontier_coverage_25/group_bin_occupancy": 0.921875, "signal/frontier_coverage_25/group_std_mean": 0.06378625705838203, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008770640706643462, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008770640706643462, "signal/frontier_coverage_5/centered_abs_mean": 0.1421535238623619, "signal/frontier_coverage_5/group_bin_occupancy": 0.8759765625, "signal/frontier_coverage_5/group_std_mean": 0.18390395492315292, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025445478968322277, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025445478968322277, "signal/frontier_ece_reward/centered_abs_mean": 0.00375261134468019, "signal/frontier_ece_reward/group_bin_occupancy": 0.609375, "signal/frontier_ece_reward/group_std_mean": 0.0048857699148356915, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00046907641808502376, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00046907641808502376, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28836265206336975, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7119140625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3610518276691437, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03604533150792122, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03604533150792122, "step": 312, "total_flos": 0.0, "train_loss": 0.0038806597355384533, "train_runtime": 60786.4201, "train_samples_per_second": 0.329, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 1069964559, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }